Skip to content

Commit 7c5d87b

Browse files
author
Sentience Dev
committed
Merge pull request #5 from SentienceAPI/phase3_dsl
Phase 3: DSL query completed
2 parents 4b7d55c + 4daead6 commit 7c5d87b

File tree

2 files changed

+519
-8
lines changed

2 files changed

+519
-8
lines changed

src/query.ts

Lines changed: 269 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,28 @@
55
import { Snapshot, Element, QuerySelector, QuerySelectorObject } from './types';
66

77
export function parseSelector(selector: string): QuerySelectorObject {
8-
const query: QuerySelectorObject = {};
8+
const query: QuerySelectorObject & {
9+
role_exclude?: string;
10+
text_contains?: string;
11+
text_prefix?: string;
12+
text_suffix?: string;
13+
visible?: boolean;
14+
tag?: string;
15+
importance?: number;
16+
importance_min?: number;
17+
importance_max?: number;
18+
z_index_min?: number;
19+
z_index_max?: number;
20+
in_viewport?: boolean;
21+
is_occluded?: boolean;
22+
[key: string]: any; // For bbox.* and attr.*, css.*
23+
} = {};
924

10-
// Match patterns like: key=value, key~'value', key!="value"
11-
// This regex matches: key, operator (=, ~, !=), and value (quoted or unquoted)
12-
const pattern = /(\w+)([=~!]+)((?:'[^']+'|"[^"]+"|[^\s]+))/g;
25+
// Match patterns like: key=value, key~'value', key!="value", key>123, key^='prefix', key$='suffix'
26+
// Updated regex to support: =, !=, ~, ^=, $=, >, >=, <, <=
27+
// Supports dot notation: attr.id, css.color
28+
// Note: Handle ^= and $= first (before single char operators) to avoid regex conflicts
29+
const pattern = /([\w.]+)(\^=|\$=|>=|<=|!=|[=~<>])((?:'[^']+'|"[^"]+"|[^\s]+))/g;
1330
let match;
1431

1532
while ((match = pattern.exec(selector)) !== null) {
@@ -20,31 +37,144 @@ export function parseSelector(selector: string): QuerySelectorObject {
2037
// Remove quotes from value
2138
value = value.replace(/^["']|["']$/g, '');
2239

40+
// Handle numeric comparisons
41+
let isNumeric = false;
42+
let numericValue = 0;
43+
const parsedNum = parseFloat(value);
44+
if (!isNaN(parsedNum) && isFinite(parsedNum)) {
45+
isNumeric = true;
46+
numericValue = parsedNum;
47+
}
48+
2349
if (op === '!=') {
2450
if (key === 'role') {
25-
(query as any).role_exclude = value;
51+
query.role_exclude = value;
2652
} else if (key === 'clickable') {
2753
query.clickable = false;
54+
} else if (key === 'visible') {
55+
query.visible = false;
2856
}
2957
} else if (op === '~') {
58+
// Substring match (case-insensitive)
59+
if (key === 'text' || key === 'name') {
60+
query.text_contains = value;
61+
}
62+
} else if (op === '^=') {
63+
// Prefix match
64+
if (key === 'text' || key === 'name') {
65+
query.text_prefix = value;
66+
}
67+
} else if (op === '$=') {
68+
// Suffix match
3069
if (key === 'text' || key === 'name') {
31-
(query as any).text_contains = value;
70+
query.text_suffix = value;
71+
}
72+
} else if (op === '>') {
73+
// Greater than
74+
if (isNumeric) {
75+
if (key === 'importance') {
76+
query.importance_min = numericValue + 0.0001; // Exclusive
77+
} else if (key.startsWith('bbox.')) {
78+
query[`${key}_min`] = numericValue + 0.0001;
79+
} else if (key === 'z_index') {
80+
query.z_index_min = numericValue + 0.0001;
81+
}
82+
} else if (key.startsWith('attr.') || key.startsWith('css.')) {
83+
query[`${key}_gt`] = value;
84+
}
85+
} else if (op === '>=') {
86+
// Greater than or equal
87+
if (isNumeric) {
88+
if (key === 'importance') {
89+
query.importance_min = numericValue;
90+
} else if (key.startsWith('bbox.')) {
91+
query[`${key}_min`] = numericValue;
92+
} else if (key === 'z_index') {
93+
query.z_index_min = numericValue;
94+
}
95+
} else if (key.startsWith('attr.') || key.startsWith('css.')) {
96+
query[`${key}_gte`] = value;
97+
}
98+
} else if (op === '<') {
99+
// Less than
100+
if (isNumeric) {
101+
if (key === 'importance') {
102+
query.importance_max = numericValue - 0.0001; // Exclusive
103+
} else if (key.startsWith('bbox.')) {
104+
query[`${key}_max`] = numericValue - 0.0001;
105+
} else if (key === 'z_index') {
106+
query.z_index_max = numericValue - 0.0001;
107+
}
108+
} else if (key.startsWith('attr.') || key.startsWith('css.')) {
109+
query[`${key}_lt`] = value;
110+
}
111+
} else if (op === '<=') {
112+
// Less than or equal
113+
if (isNumeric) {
114+
if (key === 'importance') {
115+
query.importance_max = numericValue;
116+
} else if (key.startsWith('bbox.')) {
117+
query[`${key}_max`] = numericValue;
118+
} else if (key === 'z_index') {
119+
query.z_index_max = numericValue;
120+
}
121+
} else if (key.startsWith('attr.') || key.startsWith('css.')) {
122+
query[`${key}_lte`] = value;
32123
}
33124
} else if (op === '=') {
125+
// Exact match
34126
if (key === 'role') {
35127
query.role = value;
36128
} else if (key === 'clickable') {
37129
query.clickable = value.toLowerCase() === 'true';
130+
} else if (key === 'visible') {
131+
query.visible = value.toLowerCase() === 'true';
132+
} else if (key === 'tag') {
133+
query.tag = value;
38134
} else if (key === 'name' || key === 'text') {
39135
query.text = value;
136+
} else if (key === 'importance' && isNumeric) {
137+
query.importance = numericValue;
138+
} else if (key.startsWith('attr.')) {
139+
// Dot notation for attributes: attr.id="submit-btn"
140+
const attrKey = key.substring(5); // Remove "attr." prefix
141+
if (!query.attr) {
142+
query.attr = {};
143+
}
144+
(query.attr as any)[attrKey] = value;
145+
} else if (key.startsWith('css.')) {
146+
// Dot notation for CSS: css.color="red"
147+
const cssKey = key.substring(4); // Remove "css." prefix
148+
if (!query.css) {
149+
query.css = {};
150+
}
151+
(query.css as any)[cssKey] = value;
40152
}
41153
}
42154
}
43155

44156
return query;
45157
}
46158

47-
function matchElement(element: Element, query: QuerySelectorObject & { role_exclude?: string; text_contains?: string }): boolean {
159+
function matchElement(
160+
element: Element,
161+
query: QuerySelectorObject & {
162+
role_exclude?: string;
163+
text_contains?: string;
164+
text_prefix?: string;
165+
text_suffix?: string;
166+
visible?: boolean;
167+
tag?: string;
168+
importance?: number;
169+
importance_min?: number;
170+
importance_max?: number;
171+
z_index_min?: number;
172+
z_index_max?: number;
173+
in_viewport?: boolean;
174+
is_occluded?: boolean;
175+
[key: string]: any; // For bbox.* and attr.*, css.*
176+
}
177+
): boolean {
48178
// Role exact match
49179
if (query.role !== undefined) {
50180
if (element.role !== query.role) {
@@ -66,6 +196,20 @@ function matchElement(element: Element, query: QuerySelectorObject & { role_excl
66196
}
67197
}
68198

199+
// Visible (using in_viewport and !is_occluded)
200+
if (query.visible !== undefined) {
201+
const isVisible = element.in_viewport && !element.is_occluded;
202+
if (isVisible !== query.visible) {
203+
return false;
204+
}
205+
}
206+
207+
// Tag (not yet in Element model, but prepare for future)
208+
if (query.tag !== undefined) {
209+
// For now, this will always fail since tag is not in Element model
210+
// This is a placeholder for future implementation
211+
}
212+
69213
// Text exact match
70214
if (query.text !== undefined) {
71215
if (!element.text || element.text !== query.text) {
@@ -83,12 +227,129 @@ function matchElement(element: Element, query: QuerySelectorObject & { role_excl
83227
}
84228
}
85229

230+
// Text prefix match
231+
if (query.text_prefix !== undefined) {
232+
if (!element.text) {
233+
return false;
234+
}
235+
if (!element.text.toLowerCase().startsWith(query.text_prefix.toLowerCase())) {
236+
return false;
237+
}
238+
}
239+
240+
// Text suffix match
241+
if (query.text_suffix !== undefined) {
242+
if (!element.text) {
243+
return false;
244+
}
245+
if (!element.text.toLowerCase().endsWith(query.text_suffix.toLowerCase())) {
246+
return false;
247+
}
248+
}
249+
250+
// Importance filtering
251+
if (query.importance !== undefined) {
252+
if (element.importance !== query.importance) {
253+
return false;
254+
}
255+
}
256+
if (query.importance_min !== undefined) {
257+
if (element.importance < query.importance_min) {
258+
return false;
259+
}
260+
}
261+
if (query.importance_max !== undefined) {
262+
if (element.importance > query.importance_max) {
263+
return false;
264+
}
265+
}
266+
267+
// BBox filtering (spatial)
268+
if (query['bbox.x_min'] !== undefined) {
269+
if (element.bbox.x < query['bbox.x_min']) {
270+
return false;
271+
}
272+
}
273+
if (query['bbox.x_max'] !== undefined) {
274+
if (element.bbox.x > query['bbox.x_max']) {
275+
return false;
276+
}
277+
}
278+
if (query['bbox.y_min'] !== undefined) {
279+
if (element.bbox.y < query['bbox.y_min']) {
280+
return false;
281+
}
282+
}
283+
if (query['bbox.y_max'] !== undefined) {
284+
if (element.bbox.y > query['bbox.y_max']) {
285+
return false;
286+
}
287+
}
288+
if (query['bbox.width_min'] !== undefined) {
289+
if (element.bbox.width < query['bbox.width_min']) {
290+
return false;
291+
}
292+
}
293+
if (query['bbox.width_max'] !== undefined) {
294+
if (element.bbox.width > query['bbox.width_max']) {
295+
return false;
296+
}
297+
}
298+
if (query['bbox.height_min'] !== undefined) {
299+
if (element.bbox.height < query['bbox.height_min']) {
300+
return false;
301+
}
302+
}
303+
if (query['bbox.height_max'] !== undefined) {
304+
if (element.bbox.height > query['bbox.height_max']) {
305+
return false;
306+
}
307+
}
308+
309+
// Z-index filtering
310+
if (query.z_index_min !== undefined) {
311+
if (element.z_index < query.z_index_min) {
312+
return false;
313+
}
314+
}
315+
if (query.z_index_max !== undefined) {
316+
if (element.z_index > query.z_index_max) {
317+
return false;
318+
}
319+
}
320+
321+
// In viewport filtering
322+
if (query.in_viewport !== undefined) {
323+
if (element.in_viewport !== query.in_viewport) {
324+
return false;
325+
}
326+
}
327+
328+
// Occlusion filtering
329+
if (query.is_occluded !== undefined) {
330+
if (element.is_occluded !== query.is_occluded) {
331+
return false;
332+
}
333+
}
334+
335+
// Attribute filtering (dot notation: attr.id="submit-btn")
336+
if (query.attr !== undefined) {
337+
// This requires DOM access, which is not available in the Element model
338+
// This is a placeholder for future implementation when we add DOM access
339+
}
340+
341+
// CSS property filtering (dot notation: css.color="red")
342+
if (query.css !== undefined) {
343+
// This requires DOM access, which is not available in the Element model
344+
// This is a placeholder for future implementation when we add DOM access
345+
}
346+
86347
return true;
87348
}
88349

89350
export function query(snapshot: Snapshot, selector: QuerySelector): Element[] {
90351
// Parse selector if string
91-
const queryObj = typeof selector === 'string' ? parseSelector(selector) : selector;
352+
const queryObj = typeof selector === 'string' ? parseSelector(selector) : (selector as any);
92353

93354
// Filter elements
94355
const matches = snapshot.elements.filter((el) => matchElement(el, queryObj));

0 commit comments

Comments
 (0)