Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions src/lib/real-estate/__tests__/parseRealEstateListing.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import { describe, it, expect } from 'vitest';
import { parseRealEstateListing } from '../parseRealEstateListing';

describe('parseRealEstateListing', () => {
it('parses full listing with all fields', () => {
const html = '<ul><li>Cena: 250.000 €</li><li>Površina: 65 m²</li><li>Sobe: 3</li><li>Sprat: 4/7</li><li>Lokacija: Novi Sad</li></ul>';
const result = parseRealEstateListing(html);
expect(result).toEqual({
price: 250000,
area: 65,
rooms: 3,
floor: 4,
totalFloors: 7,
location: 'Novi Sad',
});
});

it('parses garsonjera with prizemlje', () => {
const html = '<div>Cena: 120.000 €</div><div>garsonjera</div><div>Prizemlje</div><div>50m2</div>';
const result = parseRealEstateListing(html);
expect(result).toEqual({
price: 120000,
area: 50,
rooms: 1,
floor: 0,
totalFloors: null,
location: null,
});
});

it('parses with alternative price, area, rooms, floor/totalFloors', () => {
const html = '<span>85 000 €</span><span>45 m²</span><span>2 sobe</span><span>1. od 5</span>';
const result = parseRealEstateListing(html);
expect(result).toEqual({
price: 85000,
area: 45,
rooms: 2,
floor: 1,
totalFloors: 5,
location: null,
});
});

it('returns all nulls when no data found', () => {
const html = '<p>Kontaktirajte nas za cenu</p>';
const result = parseRealEstateListing(html);
expect(result).toEqual({
price: null,
area: null,
rooms: null,
floor: null,
totalFloors: null,
location: null,
});
});
});
86 changes: 86 additions & 0 deletions src/lib/real-estate/parseRealEstateListing.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
export interface RealEstateListing {
price: number | null;
area: number | null;
rooms: number | null;
floor: number | null;
totalFloors: number | null;
location: string | null;
}

export function parseRealEstateListing(html: string): RealEstateListing {
const text = html.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
let price: number | null = null;
let area: number | null = null;
let rooms: number | null = null;
let floor: number | null = null;
let totalFloors: number | null = null;
let location: string | null = null;

// Price (e.g., '250.000 €', '85 000 €', '120.000 €')
// Accept dots or spaces as thousand separators
const priceMatch = text.match(/(?:Cena:|^|\s)(\d{1,3}(?:[ .]\d{3})*|\d+)(?:[.,](\d{1,2}))? ?€/i) || text.match(/(\d{1,3}(?:[ .]\d{3})*|\d+) ?€/i);
if (priceMatch) {
// Remove spaces and dots, handle decimals if exist (ignore decimals for price)
let main = priceMatch[1].replace(/[ .]/g, '');
price = parseInt(main, 10);
}

// Area (e.g. '65 m²', '50m2', '45 m²')
const areaMatch = text.match(/(\d+(?:[.,]\d+)?) ?m(?:2|²)/i);
if (areaMatch) {
area = Math.round(parseFloat(areaMatch[1].replace(',', '.')));
}

// Rooms (as per snippets)
const lm = text.match(/sobe?:\s*(\d+(?:\.\d+)?)/i);
const vm = text.match(/(\d+(?:\.\d+)?)\s*sobe?/i);
const m = lm || vm;
if (m) rooms = Math.round(parseFloat(m[1]));
// Special case: 'garsonjera' or 'studio' = 1 room
if (!rooms && /garsonjera|studio/i.test(text)) {
rooms = 1;
}

// Floor and Total Floors (as per snippet)
const fm = /(\d+)\.?\s*(?:od|\/)\s*(\d+)/i.exec(text);
if (fm) {
floor = parseInt(fm[1], 10);
totalFloors = parseInt(fm[2], 10);
} else {
// If 'Prizemlje' (ground floor, Serbian), set floor = 0
if (/prizemlje/i.test(text)) {
floor = 0;
}
// Handle 'Sprat: 4/7' or 'Sprat: 4 od 7' (already handled above), fallback if 'Sprat: broj'
const singleFloor = text.match(/Sprat:\s*(\d+)/i);
if (singleFloor && floor === null) {
floor = parseInt(singleFloor[1], 10);
}
}

// Location (e.g. 'Lokacija: Novi Sad')
const locMatch = text.match(/Lokacija:\s*([^<\n\r]+)/i);
if (locMatch) {
location = locMatch[1].trim();
}

// If no price, area, rooms, floor, totalFloors, location found, return all null
if (
price === null &&
area === null &&
rooms === null &&
floor === null &&
totalFloors === null &&
location === null
) {
return {
price: null,
area: null,
rooms: null,
floor: null,
totalFloors: null,
location: null,
};
}
return { price, area, rooms, floor, totalFloors, location };
}