Skip to content

Commit e8aeb28

Browse files
author
Sentience Dev
committed
Merge pull request #71 from SentienceAPI/text_coord
get text coordinates
2 parents 86a84c6 + 675e26c commit e8aeb28

File tree

5 files changed

+420
-0
lines changed

5 files changed

+420
-0
lines changed

README.md

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,87 @@ const dataUrl = await screenshot(browser, { format: 'jpeg', quality: 85 });
523523

524524
</details>
525525

526+
<details>
527+
<summary><h3>🔎 Text Search - Find Elements by Visible Text</h3></summary>
528+
529+
**`findTextRect(page, options)`** - Find text on page and get exact pixel coordinates
530+
531+
Find buttons, links, or any UI elements by their visible text without needing element IDs or CSS selectors. Returns exact pixel coordinates for each match.
532+
533+
**Example:**
534+
```typescript
535+
import { SentienceBrowser, findTextRect, clickRect } from 'sentienceapi';
536+
537+
const browser = await SentienceBrowser.create();
538+
await browser.getPage().goto('https://example.com');
539+
540+
// Find "Sign In" button (simple string syntax)
541+
const result = await findTextRect(browser.getPage(), "Sign In");
542+
if (result.status === "success" && result.results) {
543+
const firstMatch = result.results[0];
544+
console.log(`Found at: (${firstMatch.rect.x}, ${firstMatch.rect.y})`);
545+
console.log(`In viewport: ${firstMatch.in_viewport}`);
546+
547+
// Click on the found text
548+
if (firstMatch.in_viewport) {
549+
await clickRect(browser, {
550+
x: firstMatch.rect.x,
551+
y: firstMatch.rect.y,
552+
w: firstMatch.rect.width,
553+
h: firstMatch.rect.height
554+
});
555+
}
556+
}
557+
```
558+
559+
**Advanced Options:**
560+
```typescript
561+
// Case-sensitive search
562+
const result = await findTextRect(browser.getPage(), {
563+
text: "LOGIN",
564+
caseSensitive: true
565+
});
566+
567+
// Whole word only (won't match "login" as part of "loginButton")
568+
const result = await findTextRect(browser.getPage(), {
569+
text: "log",
570+
wholeWord: true
571+
});
572+
573+
// Find multiple matches
574+
const result = await findTextRect(browser.getPage(), {
575+
text: "Buy",
576+
maxResults: 10
577+
});
578+
for (const match of result.results || []) {
579+
if (match.in_viewport) {
580+
console.log(`Found '${match.text}' at (${match.rect.x}, ${match.rect.y})`);
581+
console.log(`Context: ...${match.context.before}[${match.text}]${match.context.after}...`);
582+
}
583+
}
584+
```
585+
586+
**Returns:** Promise<TextRectSearchResult> with:
587+
- **`status`**: "success" or "error"
588+
- **`results`**: Array of `TextMatch` objects with:
589+
- `text` - The matched text
590+
- `rect` - Absolute coordinates (with scroll offset)
591+
- `viewport_rect` - Viewport-relative coordinates
592+
- `context` - Surrounding text (before/after)
593+
- `in_viewport` - Whether visible in current viewport
594+
595+
**Use Cases:**
596+
- Find buttons/links by visible text without CSS selectors
597+
- Get exact pixel coordinates for click automation
598+
- Verify text visibility and position on page
599+
- Search dynamic content that changes frequently
600+
601+
**Note:** Does not consume API credits (runs locally in browser)
602+
603+
**See example:** `examples/find-text-demo.ts`
604+
605+
</details>
606+
526607
---
527608

528609
## 📋 Reference

examples/find-text-demo.ts

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
/**
2+
* Text Search Demo - Using findTextRect() to locate elements by visible text
3+
*
4+
* This example demonstrates how to:
5+
* 1. Find text on a webpage and get exact pixel coordinates
6+
* 2. Use case-sensitive and whole-word matching options
7+
* 3. Click on found text using clickRect()
8+
* 4. Handle multiple matches and filter by viewport visibility
9+
*/
10+
11+
import { SentienceBrowser, findTextRect, clickRect } from '../src';
12+
13+
async function main() {
14+
const browser = new SentienceBrowser();
15+
await browser.start();
16+
17+
const page = browser.getPage();
18+
19+
// Navigate to a search page
20+
await page.goto('https://www.google.com');
21+
await page.waitForLoadState('networkidle');
22+
23+
console.log('\n' + '='.repeat(60));
24+
console.log('Text Search Demo');
25+
console.log('='.repeat(60) + '\n');
26+
27+
// Example 1: Simple text search
28+
console.log('Example 1: Finding "Google Search" button');
29+
console.log('-'.repeat(60));
30+
let result = await findTextRect(page, 'Google Search');
31+
32+
if (result.status === 'success' && result.results) {
33+
console.log(`✓ Found ${result.matches} match(es) for '${result.query}'`);
34+
for (let i = 0; i < Math.min(3, result.results.length); i++) {
35+
const match = result.results[i];
36+
console.log(`\nMatch ${i + 1}:`);
37+
console.log(` Text: '${match.text}'`);
38+
console.log(` Position: (${match.rect.x.toFixed(1)}, ${match.rect.y.toFixed(1)})`);
39+
console.log(` Size: ${match.rect.width.toFixed(1)}x${match.rect.height.toFixed(1)} pixels`);
40+
console.log(` In viewport: ${match.in_viewport}`);
41+
console.log(
42+
` Context: ...${match.context.before}[${match.text}]${match.context.after}...`
43+
);
44+
}
45+
} else {
46+
console.log(`✗ Search failed: ${result.error}`);
47+
}
48+
49+
// Example 2: Find and click search box (using simple string syntax)
50+
console.log('\n\nExample 2: Finding and clicking the search box');
51+
console.log('-'.repeat(60));
52+
result = await findTextRect(page, {
53+
text: 'Search',
54+
maxResults: 5
55+
});
56+
57+
if (result.status === 'success' && result.results) {
58+
// Find the first visible match
59+
for (const match of result.results) {
60+
if (match.in_viewport) {
61+
console.log(`✓ Found visible match: '${match.text}'`);
62+
console.log(` Clicking at (${match.rect.x.toFixed(1)}, ${match.rect.y.toFixed(1)})`);
63+
64+
// Click in the center of the text
65+
const clickResult = await clickRect(browser, {
66+
x: match.rect.x,
67+
y: match.rect.y,
68+
w: match.rect.width,
69+
h: match.rect.height
70+
});
71+
72+
if (clickResult.success) {
73+
console.log(` ✓ Click successful!`);
74+
}
75+
break;
76+
}
77+
}
78+
}
79+
80+
// Example 3: Case-sensitive search
81+
console.log('\n\nExample 3: Case-sensitive search for "GOOGLE"');
82+
console.log('-'.repeat(60));
83+
const resultInsensitive = await findTextRect(page, {
84+
text: 'GOOGLE',
85+
caseSensitive: false
86+
});
87+
const resultSensitive = await findTextRect(page, {
88+
text: 'GOOGLE',
89+
caseSensitive: true
90+
});
91+
92+
console.log(`Case-insensitive search: ${resultInsensitive.matches || 0} matches`);
93+
console.log(`Case-sensitive search: ${resultSensitive.matches || 0} matches`);
94+
95+
// Example 4: Whole word search
96+
console.log('\n\nExample 4: Whole word search');
97+
console.log('-'.repeat(60));
98+
const resultPartial = await findTextRect(page, {
99+
text: 'Search',
100+
wholeWord: false
101+
});
102+
const resultWhole = await findTextRect(page, {
103+
text: 'Search',
104+
wholeWord: true
105+
});
106+
107+
console.log(`Partial word match: ${resultPartial.matches || 0} matches`);
108+
console.log(`Whole word only: ${resultWhole.matches || 0} matches`);
109+
110+
// Example 5: Get viewport information
111+
console.log('\n\nExample 5: Viewport and scroll information');
112+
console.log('-'.repeat(60));
113+
result = await findTextRect(page, 'Google');
114+
if (result.status === 'success' && result.viewport) {
115+
console.log(`Viewport size: ${result.viewport.width}x${result.viewport.height}`);
116+
if ('scroll_x' in result.viewport && 'scroll_y' in result.viewport) {
117+
console.log(`Scroll position: (${result.viewport.scroll_x}, ${result.viewport.scroll_y})`);
118+
}
119+
}
120+
121+
console.log('\n' + '='.repeat(60));
122+
console.log('Demo complete!');
123+
console.log('='.repeat(60) + '\n');
124+
125+
await browser.close();
126+
}
127+
128+
main().catch(console.error);

src/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ export { ScriptGenerator, generate } from './generator';
1414
export { read, ReadOptions, ReadResult } from './read';
1515
export { screenshot, ScreenshotOptions } from './screenshot';
1616
export { showOverlay, clearOverlay } from './overlay';
17+
export { findTextRect } from './textSearch';
1718
export * from './types';
1819
export { saveStorageState } from './utils';
1920

src/textSearch.ts

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
/**
2+
* Text search utilities - find text and get pixel coordinates
3+
*/
4+
5+
import { Page } from "playwright";
6+
import { FindTextRectOptions, TextRectSearchResult } from "./types";
7+
8+
/**
9+
* Find all occurrences of text on the page and get their exact pixel coordinates.
10+
*
11+
* This function searches for text in all visible text nodes on the page and returns
12+
* the bounding rectangles for each match. Useful for:
13+
* - Finding specific UI elements by their text content
14+
* - Locating buttons, links, or labels without element IDs
15+
* - Getting exact coordinates for click automation
16+
* - Highlighting search results visually
17+
*
18+
* @param page - Playwright Page instance
19+
* @param options - Search options
20+
* @returns TextRectSearchResult with all matches and their coordinates
21+
*
22+
* @example
23+
* // Find "Sign In" button
24+
* const result = await findTextRect(page, { text: "Sign In" });
25+
* if (result.status === "success" && result.results) {
26+
* const firstMatch = result.results[0];
27+
* console.log(`Found at: (${firstMatch.rect.x}, ${firstMatch.rect.y})`);
28+
* console.log(`Size: ${firstMatch.rect.width}x${firstMatch.rect.height}`);
29+
* console.log(`In viewport: ${firstMatch.in_viewport}`);
30+
* }
31+
*
32+
* @example
33+
* // Case-sensitive search
34+
* const result = await findTextRect(page, {
35+
* text: "LOGIN",
36+
* caseSensitive: true
37+
* });
38+
*
39+
* @example
40+
* // Whole word only
41+
* const result = await findTextRect(page, {
42+
* text: "log",
43+
* wholeWord: true // Won't match "login"
44+
* });
45+
*
46+
* @example
47+
* // Find all matches and click the first visible one
48+
* const result = await findTextRect(page, {
49+
* text: "Buy Now",
50+
* maxResults: 5
51+
* });
52+
* if (result.status === "success" && result.results) {
53+
* for (const match of result.results) {
54+
* if (match.in_viewport) {
55+
* // Use clickRect from actions module
56+
* await page.mouse.click(
57+
* match.rect.x + match.rect.width / 2,
58+
* match.rect.y + match.rect.height / 2
59+
* );
60+
* break;
61+
* }
62+
* }
63+
* }
64+
*/
65+
export async function findTextRect(
66+
page: Page,
67+
options: FindTextRectOptions | string
68+
): Promise<TextRectSearchResult> {
69+
// Support simple string input for convenience
70+
const opts: FindTextRectOptions =
71+
typeof options === "string" ? { text: options } : options;
72+
73+
const {
74+
text,
75+
caseSensitive = false,
76+
wholeWord = false,
77+
maxResults = 10,
78+
} = opts;
79+
80+
if (!text || text.trim().length === 0) {
81+
return {
82+
status: "error",
83+
error: "Text parameter is required and cannot be empty",
84+
};
85+
}
86+
87+
// Limit max_results to prevent performance issues
88+
const limitedMaxResults = Math.min(maxResults, 100);
89+
90+
// Call the extension's findTextRect method
91+
const result = await page.evaluate(
92+
(evalOptions) => {
93+
return (window as any).sentience.findTextRect(evalOptions);
94+
},
95+
{
96+
text,
97+
caseSensitive,
98+
wholeWord,
99+
maxResults: limitedMaxResults,
100+
}
101+
);
102+
103+
return result as TextRectSearchResult;
104+
}

0 commit comments

Comments
 (0)