Skip to content

Commit ab72cb7

Browse files
author
Sentience Dev
committed
Merge pull request #7 from SentienceAPI/injected_func
updated release pipeline with new functions for read
2 parents 570cd57 + 6bdd17e commit ab72cb7

File tree

9 files changed

+445
-18
lines changed

9 files changed

+445
-18
lines changed

.github/workflows/sync-extension.yml

Lines changed: 72 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ jobs:
2525
uses: actions/checkout@v4
2626
with:
2727
token: ${{ secrets.GITHUB_TOKEN }}
28+
fetch-depth: 0 # Fetch all history for proper branching
2829

2930
- name: Set up Node.js
3031
uses: actions/setup-node@v4
@@ -62,31 +63,79 @@ jobs:
6263
mkdir -p extension-temp
6364
cd extension-temp
6465
65-
# Download each file from release
66-
curl -L -H "Authorization: token ${{ secrets.SENTIENCE_CHROME_TOKEN }}" \
66+
# First, try to download the zip archive if available
67+
ZIP_URL=$(curl -s -H "Authorization: token ${{ secrets.SENTIENCE_CHROME_TOKEN }}" \
6768
"https://api.github.com/repos/$REPO/releases/tags/$TAG" | \
68-
jq -r '.assets[] | select(.name | endswith(".js") or endswith(".wasm") or endswith(".json") or endswith(".d.ts")) | .browser_download_url' | \
69-
while read url; do
70-
filename=$(basename "$url")
71-
curl -L -H "Authorization: token ${{ secrets.SENTIENCE_CHROME_TOKEN }}" "$url" -o "$filename"
72-
done
69+
jq -r '.assets[] | select(.name == "extension-package.zip") | .browser_download_url')
70+
71+
if [ -n "$ZIP_URL" ] && [ "$ZIP_URL" != "null" ]; then
72+
echo "📦 Downloading extension-package.zip..."
73+
curl -L -H "Authorization: token ${{ secrets.SENTIENCE_CHROME_TOKEN }}" "$ZIP_URL" -o extension-package.zip
74+
unzip -q extension-package.zip -d .
75+
# Files should now be in extension-temp/extension-package/ or extension-temp/
76+
if [ -d "extension-package" ]; then
77+
mv extension-package/* . 2>/dev/null || true
78+
rmdir extension-package 2>/dev/null || true
79+
fi
80+
else
81+
echo "📁 Downloading individual files from release..."
82+
# Download each file from release
83+
curl -s -H "Authorization: token ${{ secrets.SENTIENCE_CHROME_TOKEN }}" \
84+
"https://api.github.com/repos/$REPO/releases/tags/$TAG" | \
85+
jq -r '.assets[] | select(.name | endswith(".js") or endswith(".wasm") or endswith(".json") or endswith(".d.ts")) | .browser_download_url' | \
86+
while read url; do
87+
if [ -n "$url" ] && [ "$url" != "null" ]; then
88+
filename=$(basename "$url")
89+
echo " Downloading $filename..."
90+
curl -L -H "Authorization: token ${{ secrets.SENTIENCE_CHROME_TOKEN }}" "$url" -o "$filename"
91+
fi
92+
done
93+
fi
94+
95+
# Verify files were downloaded
96+
echo "📋 Downloaded files:"
97+
ls -la
7398
7499
- name: Copy extension files
75100
if: steps.release.outputs.skip != 'true'
76101
run: |
77102
# Create extension directory structure
78103
mkdir -p src/extension/pkg
79104
80-
# Copy extension files
81-
cp extension-temp/manifest.json src/extension/ 2>/dev/null || echo "manifest.json not found in release"
82-
cp extension-temp/content.js src/extension/ 2>/dev/null || echo "content.js not found in release"
83-
cp extension-temp/background.js src/extension/ 2>/dev/null || echo "background.js not found in release"
84-
cp extension-temp/injected_api.js src/extension/ 2>/dev/null || echo "injected_api.js not found in release"
105+
# Copy extension files (check both root and pkg subdirectory)
106+
cp extension-temp/manifest.json src/extension/ 2>/dev/null || echo "⚠️ manifest.json not found in release"
107+
cp extension-temp/content.js src/extension/ 2>/dev/null || echo "⚠️ content.js not found in release"
108+
cp extension-temp/background.js src/extension/ 2>/dev/null || echo "⚠️ background.js not found in release"
109+
cp extension-temp/injected_api.js src/extension/ 2>/dev/null || echo "⚠️ injected_api.js not found in release"
85110
86-
# Copy WASM files
87-
cp extension-temp/pkg/sentience_core.js src/extension/pkg/ 2>/dev/null || echo "sentience_core.js not found"
88-
cp extension-temp/pkg/sentience_core_bg.wasm src/extension/pkg/ 2>/dev/null || echo "sentience_core_bg.wasm not found"
89-
cp extension-temp/pkg/*.d.ts src/extension/pkg/ 2>/dev/null || echo "Type definitions not found"
111+
# Copy WASM files (check both root and pkg subdirectory)
112+
if [ -f "extension-temp/pkg/sentience_core.js" ]; then
113+
cp extension-temp/pkg/sentience_core.js src/extension/pkg/
114+
elif [ -f "extension-temp/sentience_core.js" ]; then
115+
cp extension-temp/sentience_core.js src/extension/pkg/
116+
else
117+
echo "⚠️ sentience_core.js not found"
118+
fi
119+
120+
if [ -f "extension-temp/pkg/sentience_core_bg.wasm" ]; then
121+
cp extension-temp/pkg/sentience_core_bg.wasm src/extension/pkg/
122+
elif [ -f "extension-temp/sentience_core_bg.wasm" ]; then
123+
cp extension-temp/sentience_core_bg.wasm src/extension/pkg/
124+
else
125+
echo "⚠️ sentience_core_bg.wasm not found"
126+
fi
127+
128+
# Copy TypeScript definitions
129+
if [ -d "extension-temp/pkg" ]; then
130+
cp extension-temp/pkg/*.d.ts src/extension/pkg/ 2>/dev/null || echo "⚠️ Type definitions not found"
131+
elif [ -d "extension-temp" ]; then
132+
cp extension-temp/*.d.ts src/extension/pkg/ 2>/dev/null || echo "⚠️ Type definitions not found"
133+
fi
134+
135+
# Verify copied files
136+
echo "📋 Copied files:"
137+
ls -la src/extension/
138+
ls -la src/extension/pkg/ 2>/dev/null || echo "⚠️ pkg directory not created"
90139
91140
- name: Check for changes
92141
if: steps.release.outputs.skip != 'true'
@@ -107,7 +156,9 @@ jobs:
107156
if: steps.release.outputs.skip != 'true' && steps.changes.outputs.changed == 'true'
108157
uses: peter-evans/create-pull-request@v5
109158
with:
110-
token: ${{ secrets.GITHUB_TOKEN }}
159+
# Use GITHUB_TOKEN (built-in) if repository allows PR creation, otherwise use PR_TOKEN (PAT)
160+
# To use PAT: create secret named PR_TOKEN with a Personal Access Token that has 'repo' scope
161+
token: ${{ secrets.PR_TOKEN || secrets.GITHUB_TOKEN }}
111162
commit-message: "chore: sync extension files from sentience-chrome ${{ steps.release.outputs.tag }}"
112163
title: "Sync Extension: ${{ steps.release.outputs.tag }}"
113164
body: |
@@ -117,7 +168,10 @@ jobs:
117168
- Extension manifest and scripts
118169
- WASM binary and bindings
119170
120-
**Source:** [sentience-chrome release ${{ steps.release.outputs.tag }}](${{ secrets.SENTIENCE_CHROME_REPO }}/releases/tag/${{ steps.release.outputs.tag }})
171+
**Source:** [sentience-chrome release ${{ steps.release.outputs.tag }}](https://github.com/${{ secrets.SENTIENCE_CHROME_REPO }}/releases/tag/${{ steps.release.outputs.tag }})
121172
branch: sync-extension-${{ steps.release.outputs.tag }}
122173
delete-branch: true
174+
labels: |
175+
automated
176+
extension-sync
123177

README.md

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,14 @@ npm run build
7878
- `snapshot(browser, options)` - Capture page state
7979
- TypeScript types for type safety
8080

81+
### Content Reading & Screenshots
82+
- `read(browser, options)` - Read page content as text or markdown
83+
- Enhanced markdown conversion using `turndown` (better than extension's lightweight conversion)
84+
- Supports `enhance_markdown` option to use improved conversion
85+
- `screenshot(browser, options)` - Capture standalone screenshot
86+
- Returns base64-encoded data URL
87+
- Supports PNG and JPEG formats with quality control
88+
8189
### Day 4: Query Engine
8290
- `query(snapshot, selector)` - Find elements matching selector
8391
- `find(snapshot, selector)` - Find single best match
@@ -105,6 +113,50 @@ See `examples/` directory:
105113
- `query-demo.ts` - Query engine
106114
- `wait-and-click.ts` - Wait and actions
107115

116+
### Content Reading Example
117+
118+
```typescript
119+
import { SentienceBrowser, read } from './src';
120+
121+
const browser = new SentienceBrowser();
122+
await browser.start();
123+
124+
await browser.getPage().goto('https://example.com');
125+
await browser.getPage().waitForLoadState('networkidle');
126+
127+
// Read as enhanced markdown (better quality)
128+
const result = await read(browser, {
129+
format: 'markdown',
130+
enhance_markdown: true
131+
});
132+
console.log(result.content); // High-quality markdown
133+
134+
await browser.close();
135+
```
136+
137+
### Screenshot Example
138+
139+
```typescript
140+
import { SentienceBrowser, screenshot } from './src';
141+
import { writeFileSync } from 'fs';
142+
143+
const browser = new SentienceBrowser();
144+
await browser.start();
145+
146+
await browser.getPage().goto('https://example.com');
147+
await browser.getPage().waitForLoadState('networkidle');
148+
149+
// Capture PNG screenshot
150+
const dataUrl = await screenshot(browser, { format: 'png' });
151+
152+
// Save to file
153+
const base64Data = dataUrl.split(',')[1];
154+
const imageData = Buffer.from(base64Data, 'base64');
155+
writeFileSync('screenshot.png', imageData);
156+
157+
await browser.close();
158+
```
159+
108160
## Testing
109161

110162
```bash

package-lock.json

Lines changed: 28 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,13 @@
1818
},
1919
"dependencies": {
2020
"playwright": "^1.40.0",
21+
"turndown": "^7.2.2",
2122
"zod": "^3.22.0"
2223
},
2324
"devDependencies": {
2425
"@types/jest": "^29.5.14",
2526
"@types/node": "^20.0.0",
27+
"@types/turndown": "^5.0.3",
2628
"jest": "^29.0.0",
2729
"ts-jest": "^29.0.0",
2830
"ts-node": "^10.9.0",

src/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,7 @@ export { expect, Expectation } from './expect';
1111
export { Inspector, inspect } from './inspector';
1212
export { Recorder, Trace, TraceStep, record } from './recorder';
1313
export { ScriptGenerator, generate } from './generator';
14+
export { read, ReadOptions, ReadResult } from './read';
15+
export { screenshot, ScreenshotOptions } from './screenshot';
1416
export * from './types';
1517

src/read.ts

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/**
2+
* Read page content - enhanced markdown conversion
3+
*/
4+
5+
import { SentienceBrowser } from './browser';
6+
import TurndownService from 'turndown';
7+
8+
export interface ReadOptions {
9+
format?: 'text' | 'markdown';
10+
enhance_markdown?: boolean;
11+
}
12+
13+
export interface ReadResult {
14+
status: 'success' | 'error';
15+
url: string;
16+
format: 'text' | 'markdown';
17+
content: string;
18+
length: number;
19+
error?: string;
20+
}
21+
22+
/**
23+
* Read page content as text or markdown
24+
*
25+
* @param browser - SentienceBrowser instance
26+
* @param options - Read options
27+
* @returns ReadResult with page content
28+
*/
29+
export async function read(
30+
browser: SentienceBrowser,
31+
options: ReadOptions = {}
32+
): Promise<ReadResult> {
33+
const page = browser.getPage();
34+
const format = options.format || 'text';
35+
const enhanceMarkdown = options.enhance_markdown !== false; // Default to true
36+
37+
// Get basic content from extension
38+
const result = (await page.evaluate(
39+
(opts) => {
40+
return (window as any).sentience.read(opts);
41+
},
42+
{ format }
43+
)) as ReadResult;
44+
45+
// Enhance markdown if requested and format is markdown
46+
if (format === 'markdown' && enhanceMarkdown && result.status === 'success') {
47+
try {
48+
// Get full HTML from page
49+
const htmlContent = await page.evaluate(
50+
() => document.documentElement.outerHTML
51+
);
52+
53+
// Use turndown for better conversion
54+
const turndownService = new TurndownService({
55+
headingStyle: 'atx', // Use # for headings
56+
bulletListMarker: '-', // Use - for lists
57+
codeBlockStyle: 'fenced', // Use ``` for code blocks
58+
});
59+
60+
// Add custom rules for better conversion
61+
turndownService.addRule('strikethrough', {
62+
filter: ['del', 's', 'strike'] as any,
63+
replacement: (content: string) => `~~${content}~~`,
64+
});
65+
66+
// Strip unwanted tags
67+
turndownService.remove(['script', 'style', 'nav', 'footer', 'header', 'noscript']);
68+
69+
const enhancedMarkdown = turndownService.turndown(htmlContent);
70+
result.content = enhancedMarkdown;
71+
result.length = enhancedMarkdown.length;
72+
} catch (e) {
73+
// If enhancement fails, use extension's result
74+
result.error = `Markdown enhancement failed: ${e}`;
75+
}
76+
}
77+
78+
return result;
79+
}
80+

0 commit comments

Comments
 (0)