diff --git a/auth.js b/auth.js index 81f4ec8..516e36b 100644 --- a/auth.js +++ b/auth.js @@ -99,23 +99,3 @@ export function persistToken(token) { localStorage.setItem(TOKEN_KEY, token) return token } - -/** - * Extract the agent IRI from a TPEN JWT. - * - * Mirrors tpen3-interfaces/components/iiif-tools/index.js:getAgentIRIFromToken. - * The agent IRI lives in a custom claim whose key ends with `/agent` - * (typically `http://store.rerum.io/agent`). - * @param {string} token - * @returns {string|null} - */ -export function getAgentIRIFromToken(token) { - try { - const decoded = decodeJwt(token) - if (!decoded || typeof decoded !== 'object') return null - const key = Object.keys(decoded).find(k => k.endsWith('/agent')) || 'http://store.rerum.io/agent' - return decoded[key] ?? null - } catch { - return null - } -} diff --git a/iiif-ids.js b/iiif-ids.js index b104a9a..48a1801 100644 --- a/iiif-ids.js +++ b/iiif-ids.js @@ -30,3 +30,35 @@ export function trailingId(value) { const parts = String(iri).split('/').filter(Boolean) return parts.pop() ?? null } + +/** + * Pull a Media Fragments `xywh=…` selector value out of any of the target + * shapes that flow through this app: + * + * - W3C `SpecificResource` object with `selector.value` (or `selector[0].value` + * when the selector is wrapped in an array). + * - Bare string target like `"#xywh=10,20,300,40"` — historical + * annotations stored this way still show up in hydrated pages. + * - Already-bare selector like `"xywh=10,20,300,40"` — the shape prompts emit + * in condensed fallback payloads. + * + * Returns the full `"xywh=…"` form (suitable for a `FragmentSelector.value`) + * or `null` if no selector is present. Strips the non-standard `pixel:` + * prefix that Annotorious produces. + * @param {any} target a target value: string, `SpecificResource`, or nullish. + * @returns {string|null} + */ +export function parseXywh(target) { + if (typeof target === 'string') { + if (!target.includes('xywh=')) return null + return target.slice(target.indexOf('xywh=')).replace(/^xywh=pixel:/, 'xywh=') + } + if (target && typeof target === 'object') { + const sel = target.selector + const value = Array.isArray(sel) ? sel[0]?.value : sel?.value + if (typeof value === 'string' && value.includes('xywh=')) { + return value.slice(value.indexOf('xywh=')).replace(/^xywh=pixel:/, 'xywh=') + } + } + return null +} diff --git a/main.js b/main.js index 3376704..39ae391 100644 --- a/main.js +++ b/main.js @@ -110,14 +110,29 @@ export class PromptsApp { return } + // Hydrated items carry more than `{id, type}`; unhydrated vault refs + // carry only those two keys. + const isHydratedItem = it => it && typeof it === 'object' && + (it.target !== undefined || it.body !== undefined || it.motivation !== undefined) + + const isPageHydrated = p => + Array.isArray(p?.items) && + (p.items.length === 0 || isHydratedItem(p.items[0])) + + const isProjectHydrated = p => + Array.isArray(p?.layers) && + (p.layers.length === 0 || Array.isArray(p.layers[0]?.pages)) + // Upgrade a stub project (no layers) when we have a token. - if (!project.layers && this.token) { + if (this.token && !isProjectHydrated(project)) { + console.warn('[tpen-prompts] refetching project — parent sent unhydrated payload') try { project = await fetchProject(projectID, this.token) } catch (err) { console.warn('fetchProject failed', err) } } const pageID = page ? (trailingId(page) ?? '') : '' - // Upgrade a stub page (no items) when we have a token and a pageID. - if (page && !Array.isArray(page.items) && this.token && pageID) { + // Upgrade a stub page (unhydrated items) when we have a token and a pageID. + if (this.token && pageID && !isPageHydrated(page)) { + console.warn('[tpen-prompts] refetching page — parent sent unhydrated payload') try { page = await fetchPageResolved(projectID, pageID, this.token) ?? page } catch (err) { console.warn('fetchPageResolved failed', err) } } diff --git a/prompt-generator.js b/prompt-generator.js index fd3695c..72aec8d 100644 --- a/prompt-generator.js +++ b/prompt-generator.js @@ -14,6 +14,7 @@ import { detectColumnsTemplate } from './templates/detect-columns/index.js' import { detectLinesTemplate } from './templates/detect-lines/index.js' import { detectColumnsAndLinesTemplate } from './templates/detect-columns-and-lines/index.js' import { detectAndTranscribeTemplate } from './templates/detect-and-transcribe/index.js' +import { detectOrderAndTranscribeTemplate } from './templates/detect-order-and-transcribe/index.js' /** * @typedef {object} PromptTemplate @@ -42,6 +43,7 @@ register(detectColumnsTemplate) register(detectLinesTemplate) register(detectColumnsAndLinesTemplate) register(detectAndTranscribeTemplate) +register(detectOrderAndTranscribeTemplate) /** * Fetch every registered template's markdown body once and cache it. Must be diff --git a/styles.css b/styles.css index dce914f..ac3cc8f 100644 --- a/styles.css +++ b/styles.css @@ -115,7 +115,8 @@ button:hover:not(:disabled) { filter: brightness(1.05); } button:focus-visible, input:focus-visible, select:focus-visible, -textarea:focus-visible { +textarea:focus-visible, +summary:focus-visible { outline: 2px solid var(--accent); outline-offset: 2px; } @@ -163,3 +164,25 @@ textarea:focus-visible { .tool-header { text-align: center; } + +.fallback { + margin-top: 1rem; + border: 1px solid var(--border); + border-radius: 4px; + padding: 0.5rem 0.75rem; + background: var(--surface); +} +.fallback summary { + cursor: pointer; + font-size: 0.9rem; + color: var(--muted); + padding: 0.25rem 0; +} +.fallback summary:hover, +.fallback summary:focus-visible { color: var(--text); } +.fallback .hint { + margin: 0.5rem 0; + font-size: 0.85rem; + color: var(--muted); +} +.fallback textarea { margin-top: 0.25rem; } diff --git a/templates/detect-and-transcribe/PROMPT.md b/templates/detect-and-transcribe/PROMPT.md index a0c04fc..b098faa 100644 --- a/templates/detect-and-transcribe/PROMPT.md +++ b/templates/detect-and-transcribe/PROMPT.md @@ -1,46 +1,49 @@ # Task: detect and transcribe every text line on a TPEN3 page end-to-end -You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services. +You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services (direct) or emitted as a fallback JSON payload for the user to paste. ## Context -- Project: {{projectID}} -- Page: {{pageID}} - Canvas: {{canvasId}} - Canvas Dimensions: {{canvasWidth}} × {{canvasHeight}} - Image: {{imageUrl}} -- Manifest: {{manifestUri}} -- User Agent URI: {{userAgentURI}} - Page endpoint: {{pageEndpoint}} ## Preconditions -1. Required context present: `projectID`, `pageID`, `canvasId`, `{{token}}`. If any is missing, stop and report. -2. Vision capability: you must be able to load the page image as raw bytes, measure pixel coordinates, and crop/inspect per-line regions. -3. Authorization: `{{token}}` must be usable for PUT against the page endpoint (and optionally PATCH for per-line text updates). -4. HTTP PUT capability with `Content-Type: application/json`; optional PATCH with `Content-Type: text/plain`. +All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: -If any precondition fails, stop and return a concise failure report. +1. Ability to fetch the image bytes (or a derivative) and identify line bounds and text from them. Precise pixel measurement is preferred when available; visual estimation and on-sight transcription from the fetched image are acceptable otherwise. +2. Either HTTP PUT capability with `Content-Type: application/json`, or the ability to emit the payload as a fallback JSON code block in your report. If HTTP PUT is not available, skip straight to the Fallback section — do not retry. + +Use only tools already available in your environment. Do not install packages, libraries, or system utilities. ## Steps -1. Resolve canvas dimensions. Use `{{canvasWidth}}`/`{{canvasHeight}}` when numeric. Otherwise GET `{{canvasId}}` and read `width`/`height`. If that fails, GET `{{manifestUri}}` and find the matching canvas in `items` by id. -2. Fetch the page image. Detect every text line in reading order. -3. For each line, measure a bounding box and convert to integer canvas coordinates. Clamp to the canvas and round. +1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, fetch a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. If you have precise pixel tooling and want tighter bounds or a clearer crop for transcription, request a region server-side as `{base}/x,y,w,h/max/0/default.jpg` and add the crop's `x,y` origin back before applying the canvas conversion below. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. +2. Detect text lines across the whole page in reading order. This task does not create TPEN columns. +3. For every line, measure a bounding box in image-pixel space and convert to integer canvas coordinates using: + - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` + - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` + - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` + - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` + Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). 4. Run handwriting text recognition on each line's crop. Apply the recognition rules below. -5. Build one Annotation per line with the recognized text as the `TextualBody` value and `xywh=x,y,w,h` (integer canvas coordinates). -6. PUT the full set of line annotations to the page endpoint in a single request. -7. Optionally PATCH a specific line's text afterward if a recognition result needs a later revision (see TPEN API). -8. Report counts (lines saved) and notable ambiguities (e.g., illegible lines transcribed as empty or flagged). +5. If HTTP PUT is available, build the full payload under **TPEN API** — one Annotation per line with the recognized text and `xywh=x,y,w,h` selector — and send the request once. On any non-2xx response, do not retry — fall back. +6. If HTTP PUT is unavailable (or step 5 fell back), emit the condensed payload under **Fallback** as the final code block. +7. Report counts (lines saved/in payload, non-empty text, uncertain) and which path was used (direct PUT or fallback). +8. Report notable ambiguities (e.g., illegible lines transcribed as empty or flagged). ## Rules ### Detection (IMAGE_ANALYSIS) -- Preserve reading order. Prefer high recall for likely text lines over aggressive pruning. -- Keep line boxes tight but do not clip ascenders/descenders. -- Flag ambiguous regions in the report rather than silently dropping them. - Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value. +- Preserve reading order across the whole page. +- Prefer tight bounds when you can measure them; best-effort bounds are acceptable. When uncertain whether a tall run is one line or several, prefer splitting over merging. +- Do not include decorative borders, frame rules, ornaments, or illustrations as part of a line. +- Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected downstream. +- Zero lines detected is an unprocessable outcome. Stop and report — do not PUT, do not emit a fallback payload. An empty `items` array would erase every existing annotation on the page. ### Recognition (HANDWRITING_TEXT_RECOGNITION) @@ -53,7 +56,7 @@ If any precondition fails, stop and return a concise failure report. ## TPEN API -Save every detected line with its transcription in a single PUT: +Save every detected line with its transcription in a single PUT. The `items` array must contain one annotation per detected line; replace `x,y,w,h` with the integer canvas coordinates computed in step 3, and `` with the recognized text (empty string for fully illegible lines). ``` PUT {{pageEndpoint}} @@ -63,8 +66,6 @@ Content-Type: application/json { "items": [ { - "type": "Annotation", - "@context": "http://www.w3.org/ns/anno.jsonld", "body": [{ "type": "TextualBody", "value": "", "format": "text/plain" }], "target": { "source": "{{canvasId}}", @@ -81,39 +82,33 @@ Content-Type: application/json } ``` -Optional per-line text revision after the PUT: +## Fallback -``` -PATCH {{pageEndpoint}}/line//text -Authorization: Bearer {{token}} -Content-Type: text/plain +When the direct PUT is impossible or returns non-2xx, emit the condensed payload below as the final code block of your report. The TPEN splitscreen tool expands each item into a full W3C Annotation before PUTting it — do not inline the canvas source, selector boilerplate, or motivation. It must be valid JSON (no comments, no placeholders — substitute the real coordinates and recognized text). - ``` - -Error handling (both calls): - -```javascript -if (!response.ok) { - throw new Error(`TPEN API ${response.status}: ${await response.text()}`) +{ + "items": [ + { "text": "", "target": "xywh=x,y,w,h" } + ] } ``` +One item per detected line, in reading order. `target` is the bare selector value (no `#`, no `pixel:` prefix). `text` is an empty string for fully illegible lines — do not drop the item. + ## Completion -On success, report: +Direct PUT path, report: -- operation: `PUT page` (plus any follow-up `PATCH line text`) -- target: `{{pageEndpoint}}` +- operation: `PUT page` +- target: {{pageEndpoint}} - counts: lines saved, lines with non-empty text, lines flagged uncertain - notable ambiguities worth a human review -On failure, report: - -- the failing stage (image fetch, detection, recognition, PUT, or PATCH) -- HTTP status and error body -- recommended next step +Fallback path, report: -## Fallback - -If vision / write capability is missing, do not fabricate geometry or transcriptions. Report what is missing and stop. +- path: `fallback` +- counts: lines in payload, lines with non-empty text, lines flagged uncertain +- HTTP status and error body if a PUT was attempted first +- notable ambiguities worth a human review +- final code block: the condensed `{ "items": [...] }` JSON for the user to paste diff --git a/templates/detect-and-transcribe/index.js b/templates/detect-and-transcribe/index.js index 32eb56a..55c4aaf 100644 --- a/templates/detect-and-transcribe/index.js +++ b/templates/detect-and-transcribe/index.js @@ -12,7 +12,7 @@ import { buildTemplateContext } from '../inject-context.js' /** @type {import('../../prompt-generator.js').PromptTemplate} */ export const detectAndTranscribeTemplate = { id: 'detect-and-transcribe', - label: 'Auto Main Content Detection + Auto Transcription', + label: 'Line Detection + Transcription', templateUrl: new URL('./PROMPT.md', import.meta.url), buildContext: buildTemplateContext } diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index f69a896..f69f3e6 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -1,16 +1,12 @@ # Task: detect columns AND lines on a TPEN3 page and save both to the page -You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services. +You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services (direct) or emitted as a fallback JSON payload for the user to paste. ## Context -- Project: {{projectID}} -- Page: {{pageID}} - Canvas: {{canvasId}} - Canvas Dimensions: {{canvasWidth}} × {{canvasHeight}} - Image: {{imageUrl}} -- Manifest: {{manifestUri}} -- User Agent URI: {{userAgentURI}} - Page endpoint: {{pageEndpoint}} ## Existing columns on this page @@ -19,37 +15,43 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Preconditions -1. Required context present: `projectID`, `pageID`, `canvasId`, `{{token}}`. If any is missing, stop and report. -2. Vision capability: you must be able to load the page image as raw bytes and measure pixel coordinates on it. -3. Authorization: `{{token}}` must be usable for both POST (column) and PUT (page) against the page endpoints. -4. HTTP POST and PUT capability with `Content-Type: application/json`. +All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: -If any precondition fails, stop and return a concise failure report. +1. Ability to fetch the image bytes (or a derivative) and identify line and column bounds from them. Precise pixel measurement is preferred when available; visual estimation from the fetched image is acceptable otherwise. +2. Either HTTP PUT and POST capability with `Content-Type: application/json`, or the ability to emit the lines-only payload as a fallback JSON code block in your report. Column creation has no fallback — if POST is unavailable, column grouping is dropped. If PUT is unavailable, skip straight to the Fallback section — do not retry. -## Steps +Use only tools already available in your environment. Do not install packages, libraries, or system utilities. -1. Resolve canvas dimensions. Use `{{canvasWidth}}`/`{{canvasHeight}}` when numeric. Otherwise GET `{{canvasId}}` and read `width`/`height`. If that fails, GET `{{manifestUri}}` and find the matching canvas in `items` by id. -2. Fetch the page image. Detect column regions in reading order first, then detect the lines inside each column (reading order preserved within each column). -3. For every line, measure a bounding box and convert to integer canvas coordinates. Clamp to the canvas and round. -4. Mint a stable local id for each line (for example, `line-1`, `line-2`, …) so you can reference them in column `annotations` arrays before the PUT assigns real ids. After the PUT, use the server-assigned ids when creating columns. -5. PUT every detected line to the page endpoint (see TPEN API below). Capture the server-assigned annotation ids from the response. -6. For each column, POST `{ label, annotations }` where `annotations` is the server-assigned line ids that belong to that column. Labels must be unique and must not clash with anything in "Existing columns on this page". -7. Report counts: lines saved, columns created, and any failures. +## Steps -Execution order is strict: lines are PUT first, then columns are POSTed against the now-persisted line ids. If the Project read fails and column state cannot be verified, you may proceed to save lines and skip column association — flag it clearly in the report. +1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, fetch a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. If you have precise pixel tooling and want tighter bounds on a region, request it server-side as `{base}/x,y,w,h/max/0/default.jpg` and add the crop's `x,y` origin back before applying the canvas conversion below. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. +2. Detect main text column regions in reading order first, then detect the lines inside each column (reading order preserved within each column). If the page visibly has a single text block, create one column containing every detected line — do not subdivide. Track each line's column index (an integer, 0-based) as you detect it. Then flatten into a single global reading-order sequence across columns (column-major: every line in the first column, then the second, etc., adjusted for script tradition). +3. For every line, measure a bounding box in image-pixel space and convert to integer canvas coordinates using: + - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` + - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` + - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` + - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` + Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). +4. If HTTP PUT and POST are available, build the full payload under **TPEN API** and PUT the items once in the global reading-order sequence from step 2. If the PUT returns non-2xx, stop and fall back — lines are not persisted yet. If the PUT succeeds, for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The PUT response's `items` array is guaranteed to be in the same order as the submitted items, so use each line's column index from step 2 to slice the returned ids. Labels must be unique and must not clash with anything in "Existing columns on this page". If a column POST returns non-2xx, stop and report the partial state — do not emit a fallback payload; lines are already saved. +5. If HTTP PUT is unavailable (or the PUT in step 4 failed), emit the condensed payload under **Fallback** as the final code block. Column creation is out of scope for the fallback path. +6. Report counts (lines saved/in payload, columns created/in payload) and which path was used. ## Rules -- Preserve reading order across columns and within each column. -- Prefer high recall: include borderline columns/lines and flag them, rather than silently dropping them. -- Keep line boxes tight enough for line-level recognition but generous enough not to clip ascenders/descenders. - Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value. - Column labels are page-scoped and must be unique. Do not duplicate an existing column label. - Each line annotation belongs to at most one column. +- Preserve reading order across columns and within each column. +- Line geometry is the primary accuracy target. Column grouping is secondary — for a single-column page, one column containing every line is correct. +- Prefer tight bounds when you can measure them; best-effort bounds are acceptable. When uncertain whether a tall run is one line or several, prefer splitting over merging. +- Do not include decorative borders, frame rules, ornaments, or illustrations as part of a line. +- Do not POST a column with an empty `annotations` array — the server rejects it. Skip any detected column that ends up with zero assigned lines. +- Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected downstream. +- Zero lines detected is an unprocessable outcome. Stop and report — do not PUT, do not POST a column, do not emit a fallback payload. An empty `items` array would erase every existing annotation on the page. ## TPEN API -Save all lines via a single PUT: +Save all lines via a single PUT. The `items` array must contain one annotation per detected line; replace `x,y,w,h` with the integer canvas coordinates computed in step 3. ``` PUT {{pageEndpoint}} @@ -59,8 +61,6 @@ Content-Type: application/json { "items": [ { - "type": "Annotation", - "@context": "http://www.w3.org/ns/anno.jsonld", "body": [], "target": { "source": "{{canvasId}}", @@ -77,41 +77,46 @@ Content-Type: application/json } ``` -Then POST each column: +Then POST each column (reuse the same Bearer token as the PUT above): ``` POST {{pageEndpoint}}/column -Authorization: Bearer {{token}} +Authorization: Bearer Content-Type: application/json { "label": "Column A", - "annotations": ["", ""] + "annotations": ["", ""] } ``` -Error handling (both calls): +## Fallback + +When the direct path is unavailable or returns non-2xx, emit the condensed payload below as the final code block of your report, in the global reading-order sequence from step 2. The TPEN splitscreen tool expands each item into a full W3C Annotation before PUTting it — do not inline the canvas source, selector boilerplate, or motivation. It must be valid JSON. Column creation is out of scope for this fallback. -```javascript -if (!response.ok) { - throw new Error(`TPEN API ${response.status}: ${await response.text()}`) +``` +{ + "items": [ + { "target": "xywh=x,y,w,h" } + ] } ``` +One item per detected line, in the global reading-order sequence. `target` is the bare selector value (no `#`, no `pixel:` prefix). `body` is omitted because no text is produced by this task. + ## Completion -On success, report: +Direct path, report: - operations: `PUT page`, `POST column` (×N) -- target: `{{pageEndpoint}}` and `{{pageEndpoint}}/column` +- target: {{pageEndpoint}} (page) and {{pageEndpoint}}/column - counts: lines saved, columns created +- whether lines were saved even if a column POST failed (partial success is acceptable — describe what persists) -On failure, report: - -- the failing stage (image fetch, detection, PUT, or a specific POST) -- HTTP status and error body -- whether lines were saved even if column creation failed (partial success is acceptable — describe what persists) - -## Fallback +Fallback path, report: -If vision / write capability is missing, do not fabricate geometry or send partial payloads. Report what is missing and stop. +- path: `fallback` +- counts: lines in payload +- HTTP status and error body if a request was attempted first +- final code block: the condensed `{ "items": [...] }` JSON for the user to paste +- list the labels of any columns already created before the failure, so a follow-up pass can avoid duplicating them. diff --git a/templates/detect-columns-and-lines/index.js b/templates/detect-columns-and-lines/index.js index decb8e9..bf2c1c5 100644 --- a/templates/detect-columns-and-lines/index.js +++ b/templates/detect-columns-and-lines/index.js @@ -12,10 +12,10 @@ import { buildTemplateContext, formatExistingColumns } from '../inject-context.j /** @type {import('../../prompt-generator.js').PromptTemplate} */ export const detectColumnsAndLinesTemplate = { id: 'detect-columns-and-lines', - label: 'Detect Main Text Columns and Individual Lines', + label: 'Line Detection + Column Grouping', templateUrl: new URL('./PROMPT.md', import.meta.url), buildContext: (ctx) => ({ ...buildTemplateContext(ctx), - existingColumns: formatExistingColumns(ctx.project, ctx.pageID, ctx.page) + existingColumns: formatExistingColumns(ctx.project, ctx.page) }) } diff --git a/templates/detect-columns/PROMPT.md b/templates/detect-columns/PROMPT.md index 652c689..36562bb 100644 --- a/templates/detect-columns/PROMPT.md +++ b/templates/detect-columns/PROMPT.md @@ -1,97 +1,127 @@ -# Task: detect column regions on a TPEN3 page and assign existing lines to them +# Task: order existing lines on a TPEN3 page into reading order and group them into columns -You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services. +You are assisting with TPEN manuscript transcription. This task rebuilds the column layout on a page that already has line annotations. It has no fallback: on any precondition failure, image-analysis failure, or non-2xx response from a TPEN API call, stop and return a failure report. ## Context -- Project: {{projectID}} -- Page: {{pageID}} - Canvas: {{canvasId}} - Canvas Dimensions: {{canvasWidth}} × {{canvasHeight}} - Image: {{imageUrl}} -- Manifest: {{manifestUri}} -- User Agent URI: {{userAgentURI}} - Page endpoint: {{pageEndpoint}} -## Existing columns on this page +## Existing lines -{{existingColumns}} +Each entry is ` | xywh= | ` in canvas coordinates, printed in the page's current order. Use the full annotation URI verbatim when assigning lines to columns and when echoing lines in the page PUT. Compare the current order against the reading-order sequence you compute in step 5 to decide whether the PUT in step 8 is necessary. -## Existing lines +The body form is one of: -Each entry is `: ` in canvas coordinates. Use these ids verbatim when assigning lines to columns. +- `body=[]` — echo as `[]`. +- `text=""` — echo as `[{ "type": "TextualBody", "value": , "format": "text/plain" }]`. +- `body=` — echo the JSON verbatim. {{existingLines}} ## Preconditions -1. Required context present: `projectID`, `pageID`, `canvasId`, `{{token}}`, and a non-empty existing-lines list above. If any is missing, stop and report. -2. Vision capability: you must be able to load the page image as raw bytes and measure pixel coordinates on it. -3. Authorization: `{{token}}` must be usable for POST against the page's column endpoint. -4. HTTP POST capability with `Content-Type: application/json`. +All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions, existing-line list) are provided above. This task operates on an existing line set: `lineCount` = `{{lineCount}}`. If `lineCount` is `0`, stop immediately and return a failure report — this task cannot create lines. + +You must have: -If any precondition fails, stop and return a concise failure report. +1. Programmatic pixel access to the full-resolution image — a numeric pixel buffer you can iterate over. A prose description of the image, or any measurement taken from a rendered or previewed image, does not qualify; previews are downsampled and visually estimated bounds will be wrong. **If you cannot obtain pixel data with the capabilities already available to you, stop now and return a failure report naming the missing capability.** +2. HTTP DELETE, POST, and PUT capability with `Content-Type: application/json` (DELETE carries no body). **If any verb is unavailable, stop now and return a failure report naming the missing capability.** + +Use only tools already available in your environment. Do not install packages, libraries, or system utilities. ## Steps -1. Resolve canvas dimensions. Use `{{canvasWidth}}`/`{{canvasHeight}}` when numeric. Otherwise GET `{{canvasId}}` and read `width`/`height`. If that fails, GET `{{manifestUri}}` and find the matching canvas in `items` by id. -2. Analyze the page image and detect column regions in reading order. -3. For each detected column, determine which of the existing line ids (from the list above) fall within its bounds using each line's `xywh`. A line is assigned to exactly one column. -4. Choose a unique label per column (e.g., `Column A`, `Column B`). The label must not clash with any label listed under "Existing columns on this page". -5. POST one column at a time via the column endpoint, with `{ label, annotations }` where `annotations` is the array of line ids assigned to that column. -6. Report the count of created columns and any per-column failures. +1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, prefer a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. When you need to inspect a specific region at full fidelity, request it server-side as `{base}/x,y,w,h/max/0/default.jpg` rather than downloading the whole page and cropping locally. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. If you measured coordinates inside a region crop, add the crop's `x,y` origin back before applying the canvas conversion below. +2. Detect main text column regions in reading order in image-pixel space. If the page visibly has a single text block, create one column containing every existing line id — do not subdivide. +3. Convert every detected column region to integer canvas coordinates using: + - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` + - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` + - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` + - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` + Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). +4. For each detected column, determine which of the existing line ids (from the list above) belong to it. Assign a line to the column whose canvas-space region contains the center point of the line's `xywh`. If a line's center falls outside every detected column, assign it to the nearest column by Euclidean distance from the center point to the column's region (distance `0` when the point is inside). Each line belongs to exactly one column. +5. Build a global reading-order sequence of all existing line ids: columns in reading order; within each column, lines sorted top-to-bottom by the `xywh` y-center. +6. DELETE every existing column on the page (see TPEN API below). On any non-2xx, stop and report. Do not POST or PUT after a DELETE failure. +7. For each detected column, POST `{ label, annotations }` where `annotations` is the contiguous slice of the reading-order id sequence from step 5 that belongs to that column. Choose a unique label per column (e.g., `Column A`, `Column B`) that does not clash with any other label chosen in this run. On any non-2xx, stop and report — columns POSTed before the failure remain persisted. +8. Compare the step-5 sequence against the "Existing lines" order index-by-index. If they are identical, skip the PUT. Otherwise, PUT the page with `items` in the step-5 order. Each entry re-uses the existing annotation URI verbatim as its `id`, its `body` reconstructed from the entry's body form, and its `target` rebuilt from the entry's `xywh` selector. The server remaps column references when URIs change, but echoing `body` and `target` verbatim avoids minting unnecessary RERUM versions. On any non-2xx, stop and report. +9. Report: columns deleted, columns created, whether the page order was updated, and per-column line counts. ## Rules - Preserve reading order. Columns proceed as the page is read (left→right for Latin-script layouts; adjust for script tradition). -- Prefer high recall: include borderline regions as columns when they contain text, rather than silently dropping them. +- Prefer high recall: include borderline regions as columns when they contain text rather than silently dropping them. - Keep column boundaries tight enough that each line clearly belongs to one column, but generous enough to avoid clipping existing line selectors. -- Flag ambiguous regions (e.g., marginalia that may be a column) in the report rather than silently including or excluding them. -- Column labels are page-scoped and must be unique. Do not duplicate an existing column label. -- Annotations cannot be assigned to more than one column. If a line clearly sits in an existing column, do not reassign it. +- Column labels must be unique within this run. The DELETE in step 6 clears every existing column, so no pre-existing label can collide. +- Each existing line belongs to exactly one column. +- Do not POST a column with an empty `annotations` array — the server rejects it. If a detected column would end up with zero assigned lines, merge its assignments into the nearest populated column instead. +- Echo each line's existing `body` and `target` unchanged in the PUT. Changing either mints a new RERUM version of the line; the server remaps columns to the new URIs, but echoing verbatim avoids the needless version. ## TPEN API -Create one column: +First, delete all existing columns on the page. Expect `204 No Content` on success (including when the page had no columns): ``` -POST {{pageEndpoint}}/column +DELETE {{pageEndpoint}}/clear-columns Authorization: Bearer {{token}} +``` + +Then POST each new column — one request per column: + +``` +POST {{pageEndpoint}}/column +Authorization: Bearer Content-Type: application/json { "label": "Column A", - "annotations": ["", "", ""] + "annotations": ["", "", ""] } ``` -Each `` is the trailing id segment of a line annotation listed above. +Each `` is the full id of an existing line listed above, used verbatim. -Error handling: +Finally, if step 8 determined the reading order changed, PUT the page to rewrite its canonical line order. Each `items` entry carries the existing annotation URI verbatim as `id`, its `body` reconstructed from the entry's body form in "Existing lines", and its `target` rebuilt from the entry's `xywh` selector: -```javascript -if (!response.ok) { - throw new Error(`TPEN API ${response.status}: ${await response.text()}`) -} ``` +PUT {{pageEndpoint}} +Authorization: Bearer +Content-Type: application/json -Column verification (best-effort): if you need to re-read current columns mid-task, GET `{{projectEndpoint}}` with `Authorization: Bearer {{token}}` and locate the page inside `project.layers[*].pages[*]` — inspect `page.columns`. Do not block column creation on a failed Project read; continue with the POSTs and flag verification as unavailable. - -## Completion +{ + "items": [ + { + "id": "", + "body": , + "target": { + "source": "{{canvasId}}", + "type": "SpecificResource", + "selector": { + "type": "FragmentSelector", + "conformsTo": "http://www.w3.org/TR/media-frags/", + "value": "xywh=x,y,w,h" + } + } + } + ] +} +``` -On success, report: +## Failure -- operation: `POST column` -- target: `{{pageEndpoint}}/column` -- count: number of columns created -- per-column line counts +There is no fallback. If image analysis cannot be performed or any TPEN API call returns non-2xx, stop and report: -On failure, report: +- the failing stage (precondition, image analysis, DELETE clear-columns, POST column, or PUT page) +- HTTP status and error body when applicable +- which operations persisted before the failure (e.g., `DELETE succeeded, POST Column A succeeded, POST Column B failed`) so the resulting page state is clear -- the failing stage (image fetch, detection, POST) -- HTTP status and error body for any failed POST -- recommended next step (e.g., choose a different label, reassign lines) +## Completion -## Fallback +On success, report: -If required resources are unreachable or you lack vision / POST capability, do not fabricate column geometry and do not send partial POSTs that misassign lines. Report what is missing and stop. +- operations: `DELETE clear-columns`, `POST column` (×N), optionally `PUT page` +- targets: `{{pageEndpoint}}/clear-columns`, `{{pageEndpoint}}/column`, `{{pageEndpoint}}` (page) +- counts: columns deleted, columns created, per-column line counts +- whether the page order was updated diff --git a/templates/detect-columns/index.js b/templates/detect-columns/index.js index d7c8b16..0eb33b6 100644 --- a/templates/detect-columns/index.js +++ b/templates/detect-columns/index.js @@ -1,22 +1,21 @@ /** - * @file Template: "Detect columns → POST column". - * - * Targets workflow #2 from the absorbed cubap `_tools/COMMON_TASKS.md`: - * Column Detection. + * @file Template: "Detect columns → clear-columns DELETE, then POST column per + * detected column, then PUT page to reorder lines (if needed)". Operates on + * an existing line set; no fallback — fails and reports if image analysis or + * any of the three HTTP verbs are unavailable. * * @author thehabes */ -import { buildTemplateContext, formatExistingColumns, formatExistingLines } from '../inject-context.js' +import { buildTemplateContext, formatExistingLines } from '../inject-context.js' /** @type {import('../../prompt-generator.js').PromptTemplate} */ export const detectColumnsTemplate = { id: 'detect-columns', - label: 'Detect Main Text Columns', + label: 'Group Existing Lines Into Columns', templateUrl: new URL('./PROMPT.md', import.meta.url), buildContext: (ctx) => ({ ...buildTemplateContext(ctx), - existingColumns: formatExistingColumns(ctx.project, ctx.pageID, ctx.page), existingLines: formatExistingLines(ctx.page) }) } diff --git a/templates/detect-lines/PROMPT.md b/templates/detect-lines/PROMPT.md index a5ba85c..f21e559 100644 --- a/templates/detect-lines/PROMPT.md +++ b/templates/detect-lines/PROMPT.md @@ -1,47 +1,49 @@ # Task: detect every text line on a TPEN3 page and save them to the page -You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services. +You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services (direct) or emitted as a fallback JSON payload for the user to paste. ## Context -- Project: {{projectID}} -- Page: {{pageID}} - Canvas: {{canvasId}} - Canvas Dimensions: {{canvasWidth}} × {{canvasHeight}} - Image: {{imageUrl}} -- Manifest: {{manifestUri}} -- User Agent URI: {{userAgentURI}} - Page endpoint: {{pageEndpoint}} ## Preconditions -1. Required context present: `projectID`, `pageID`, `canvasId`, `{{token}}`. If any is missing, stop and report. -2. Vision capability: you must be able to load the page image as raw bytes and measure pixel coordinates on it. -3. Authorization: `{{token}}` must be usable for PUT against the page endpoint. -4. HTTP PUT capability with `Content-Type: application/json`. +All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: -If any precondition fails, stop and return a concise failure report. +1. Ability to fetch the image bytes (or a derivative) and identify line bounds from them. Precise pixel measurement is preferred when available; visual estimation from the fetched image is acceptable otherwise. +2. Either HTTP PUT capability with `Content-Type: application/json`, or the ability to emit the payload as a fallback JSON code block in your report. If HTTP PUT is not available, skip straight to the Fallback section — do not retry. + +Use only tools already available in your environment. Do not install packages, libraries, or system utilities. ## Steps -1. Resolve canvas dimensions. Use `{{canvasWidth}}`/`{{canvasHeight}}` when numeric. Otherwise GET `{{canvasId}}` and read `width`/`height`. If that fails, GET `{{manifestUri}}` and find the matching canvas in `items` by id. -2. Fetch the page image and detect every text line in reading order (top→bottom within a column, columns left→right unless the script tradition dictates otherwise). -3. For each detected line, measure a bounding box on the image and convert it to canvas coordinates. Clamp to the canvas: `x ≥ 0`, `y ≥ 0`, `x + w ≤ canvasWidth`, `y + h ≤ canvasHeight`. Round to integers after clamping. -4. Build one Annotation per line using the shape below, with `body` as an empty array (no text yet) and `value` as `xywh=x,y,w,h` in integer canvas coordinates. -5. PUT the full set of line annotations to the page endpoint. -6. Report count and any failure cause. +1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, fetch a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. If you have precise pixel tooling and want tighter bounds on a region, request it server-side as `{base}/x,y,w,h/max/0/default.jpg` and add the crop's `x,y` origin back before applying the canvas conversion below. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. +2. Detect text lines across the whole page in reading order. This task does not create TPEN columns. +3. For every line, measure a bounding box in image-pixel space and convert to integer canvas coordinates using: + - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` + - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` + - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` + - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` + Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). +4. If HTTP PUT is available, build the full payload under **TPEN API** and send the request once. On any non-2xx response, do not retry — fall back. +5. If HTTP PUT is unavailable (or step 4 fell back), emit the condensed payload under **Fallback** as the final code block. +6. Report count and which path was used (direct PUT or fallback). ## Rules -- Preserve reading order across the whole page. -- Prefer high recall: a marginal or faint line that might carry text should be included and flagged, not silently dropped. -- Keep each line box tight enough for line-level recognition — do not merge adjacent lines — but generous enough not to clip ascenders/descenders. -- Flag ambiguous regions in the report rather than silently merging or dropping. - Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value. +- Preserve reading order across the whole page. +- Prefer tight bounds when you can measure them; best-effort bounds are acceptable. When uncertain whether a tall run is one line or several, prefer splitting over merging. +- Do not include decorative borders, frame rules, ornaments, or illustrations as part of a line. +- Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected downstream. +- Zero lines detected is an unprocessable outcome. Stop and report — do not PUT, do not emit a fallback payload. An empty `items` array would erase every existing annotation on the page. ## TPEN API -Save all detected lines via a single PUT: +Save all detected lines via a single PUT. The `items` array must contain one annotation per detected line; replace `x,y,w,h` with the integer canvas coordinates computed in step 3. ``` PUT {{pageEndpoint}} @@ -51,8 +53,6 @@ Content-Type: application/json { "items": [ { - "type": "Annotation", - "@context": "http://www.w3.org/ns/anno.jsonld", "body": [], "target": { "source": "{{canvasId}}", @@ -69,28 +69,31 @@ Content-Type: application/json } ``` -Error handling: +## Fallback -```javascript -if (!response.ok) { - throw new Error(`TPEN API ${response.status}: ${await response.text()}`) +When the direct PUT is impossible or returns non-2xx, emit the condensed payload below as the final code block of your report. The TPEN splitscreen tool expands each item into a full W3C Annotation before PUTting it — do not inline the canvas source, selector boilerplate, or motivation. It must be valid JSON (no comments, no placeholders — substitute the real coordinates). + +``` +{ + "items": [ + { "target": "xywh=x,y,w,h" } + ] } ``` +One item per detected line, in reading order. `target` is the bare selector value (no `#`, no `pixel:` prefix). `body` is omitted because no text is produced by this task. + ## Completion -On success, report: +Direct PUT path, report: - operation: `PUT page` -- target: `{{pageEndpoint}}` +- target: {{pageEndpoint}} - count: number of line annotations saved -On failure, report: - -- the failing stage (image fetch, detection, PUT) -- HTTP status and error body -- recommended next step - -## Fallback +Fallback path, report: -If required resources are unreachable or you lack vision / PUT capability, do not fabricate line geometry. Report what is missing and stop. +- path: `fallback` +- count: number of line annotations in the payload +- HTTP status and error body if a PUT was attempted first +- final code block: the condensed `{ "items": [...] }` JSON for the user to paste diff --git a/templates/detect-lines/index.js b/templates/detect-lines/index.js index d70d494..20b09cc 100644 --- a/templates/detect-lines/index.js +++ b/templates/detect-lines/index.js @@ -12,7 +12,7 @@ import { buildTemplateContext } from '../inject-context.js' /** @type {import('../../prompt-generator.js').PromptTemplate} */ export const detectLinesTemplate = { id: 'detect-lines', - label: 'Detect Individual Lines (No Column Grouping)', + label: 'Line Detection', templateUrl: new URL('./PROMPT.md', import.meta.url), buildContext: buildTemplateContext } diff --git a/templates/detect-order-and-transcribe/PROMPT.md b/templates/detect-order-and-transcribe/PROMPT.md new file mode 100644 index 0000000..f7d5645 --- /dev/null +++ b/templates/detect-order-and-transcribe/PROMPT.md @@ -0,0 +1,114 @@ +# Task: detect, order, and transcribe every text line on a TPEN3 page end-to-end + +You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services (direct) or emitted as a fallback JSON payload for the user to paste. + +## Context + +- Canvas: {{canvasId}} +- Canvas Dimensions: {{canvasWidth}} × {{canvasHeight}} +- Image: {{imageUrl}} +- Page endpoint: {{pageEndpoint}} + +## Preconditions + +All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: + +1. Ability to fetch the image bytes (or a derivative) and identify line bounds and text from them. Precise pixel measurement is preferred when available; visual estimation and on-sight transcription from the fetched image are acceptable otherwise. +2. Either HTTP PUT capability with `Content-Type: application/json`, or the ability to emit the payload as a fallback JSON code block in your report. If HTTP PUT is not available, skip straight to the Fallback section — do not retry. + +Use only tools already available in your environment. Do not install packages, libraries, or system utilities. + +## Steps + +1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, fetch a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. If you have precise pixel tooling and want tighter bounds or a clearer crop for transcription, request a region server-side as `{base}/x,y,w,h/max/0/default.jpg` and add the crop's `x,y` origin back before applying the canvas conversion below. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. +2. Identify the page's layout. If the page has multiple text blocks side-by-side, determine their reading order (left→right for Latin-script layouts; adjust for script tradition). Within each block, detect lines top-to-bottom. Then flatten into a single global reading-order sequence across blocks (block-major: every line in the first block, then the second, etc.). Single-block pages collapse to one top-to-bottom sequence. This task does not create TPEN columns. +3. For every line, measure a bounding box in image-pixel space and convert to integer canvas coordinates using: + - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` + - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` + - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` + - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` + Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). +4. Run handwriting text recognition on each line's crop. Apply the recognition rules below. +5. If HTTP PUT is available, build the full payload under **TPEN API** in the global reading-order sequence from step 2 and send the request once. On any non-2xx response, do not retry — fall back. +6. If HTTP PUT is unavailable (or step 5 fell back), emit the condensed payload under **Fallback** as the final code block. +7. Report counts (lines saved/in payload, non-empty text, uncertain) and which path was used (direct PUT or fallback). +8. Report notable ambiguities (e.g., illegible lines transcribed as empty or flagged) and the detected block count so reviewers know whether a multi-block layout was recognised. + +## Rules + +### Detection (IMAGE_ANALYSIS) + +- Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value. +- The PUT `items` order is the page's canonical reading order; do not interleave lines from different blocks. +- Prefer tight bounds when you can measure them; best-effort bounds are acceptable. When uncertain whether a tall run is one line or several, prefer splitting over merging. +- Do not include decorative borders, frame rules, ornaments, or illustrations as part of a line. +- Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected downstream. +- Zero lines detected is an unprocessable outcome. Stop and report — do not PUT, do not emit a fallback payload. An empty `items` array would erase every existing annotation on the page. + +### Recognition (HANDWRITING_TEXT_RECOGNITION) + +- Prioritize diplomatic transcription over normalization. +- Preserve orthography and punctuation as observed. +- Use explicit uncertainty markers for unclear glyphs (for example `[a?]`). Do not force certainty. +- Do not invent expansions. If an abbreviation mark is present, transcribe the mark; do not silently expand. +- Keep line segmentation stable even when text is partially uncertain. +- If a crop is fully illegible, save the annotation with an empty text body and flag the line id in the report — do not fabricate text. + +## TPEN API + +Save every detected line with its transcription in a single PUT. The `items` array must contain one annotation per detected line, in the reading-order sequence from step 2; replace `x,y,w,h` with the integer canvas coordinates computed in step 3, and `` with the recognized text (empty string for fully illegible lines). + +``` +PUT {{pageEndpoint}} +Authorization: Bearer {{token}} +Content-Type: application/json + +{ + "items": [ + { + "body": [{ "type": "TextualBody", "value": "", "format": "text/plain" }], + "target": { + "source": "{{canvasId}}", + "type": "SpecificResource", + "selector": { + "type": "FragmentSelector", + "conformsTo": "http://www.w3.org/TR/media-frags/", + "value": "xywh=x,y,w,h" + } + }, + "motivation": "transcribing" + } + ] +} +``` + +## Fallback + +When the direct PUT is impossible or returns non-2xx, emit the condensed payload below as the final code block of your report, in the reading-order sequence from step 2. The TPEN splitscreen tool expands each item into a full W3C Annotation before PUTting it — do not inline the canvas source, selector boilerplate, or motivation. It must be valid JSON (no comments, no placeholders — substitute the real coordinates and recognized text). + +``` +{ + "items": [ + { "text": "", "target": "xywh=x,y,w,h" } + ] +} +``` + +One item per detected line. `target` is the bare selector value (no `#`, no `pixel:` prefix). `text` is an empty string for fully illegible lines — do not drop the item. Item order is the page's canonical reading order; do not interleave lines from different blocks. + +## Completion + +Direct PUT path, report: + +- operation: `PUT page` +- target: {{pageEndpoint}} +- counts: lines saved, lines with non-empty text, lines flagged uncertain, text blocks detected +- notable ambiguities worth a human review + +Fallback path, report: + +- path: `fallback` +- counts: lines in payload, lines with non-empty text, lines flagged uncertain, text blocks detected +- HTTP status and error body if a PUT was attempted first +- notable ambiguities worth a human review +- final code block: the condensed `{ "items": [...] }` JSON for the user to paste diff --git a/templates/detect-order-and-transcribe/index.js b/templates/detect-order-and-transcribe/index.js new file mode 100644 index 0000000..255c5cb --- /dev/null +++ b/templates/detect-order-and-transcribe/index.js @@ -0,0 +1,19 @@ +/** + * @file Template: "Detect lines + order + transcribe → PUT page". + * + * Combines the multi-block reading-order detection from + * detect-columns-and-lines with the handwriting recognition from + * detect-and-transcribe, without creating column annotations. + * + * @author thehabes + */ + +import { buildTemplateContext } from '../inject-context.js' + +/** @type {import('../../prompt-generator.js').PromptTemplate} */ +export const detectOrderAndTranscribeTemplate = { + id: 'detect-order-and-transcribe', + label: 'Line Detection + Ordering + Transcription', + templateUrl: new URL('./PROMPT.md', import.meta.url), + buildContext: buildTemplateContext +} diff --git a/templates/inject-context.js b/templates/inject-context.js index c90e96e..b21ab71 100644 --- a/templates/inject-context.js +++ b/templates/inject-context.js @@ -10,8 +10,7 @@ * @author thehabes */ -import { getAgentIRIFromToken } from '../auth.js' -import { getIRI, trailingId } from '../iiif-ids.js' +import { getIRI, parseXywh, trailingId } from '../iiif-ids.js' /** * Pull the first image body URL off a IIIF canvas, or null if none is present. @@ -42,57 +41,76 @@ function canvasDimensions(canvas) { * @returns {Record} */ export function buildTemplateContext(ctx) { - const { canvas, project, projectID, pageID, projectEndpoint, pageEndpoint, token } = ctx + const { canvas, page, pageEndpoint, token } = ctx const canvasId = getIRI(canvas) ?? '(unknown canvas id)' const imageUrl = extractImageUrl(canvas) ?? '(no image body found on canvas)' const { width, height } = canvasDimensions(canvas) const canvasWidth = width != null ? String(width) : '(unknown)' const canvasHeight = height != null ? String(height) : '(unknown)' - const dims = (width && height) ? `${width} × ${height}` : 'unknown (use the IIIF Image API info.json)' - const projectManifest = Array.isArray(project?.manifest) ? project.manifest[0] : project?.manifest - const manifestUri = getIRI(canvas?.partOf) ?? getIRI(projectManifest) ?? '(unknown manifest URI)' - const userAgentURI = getAgentIRIFromToken(token) ?? '(unable to resolve agent IRI from token)' + const lineCount = Array.isArray(page?.items) ? page.items.length : 0 return { - projectID: projectID ?? '', - pageID: pageID ?? '', canvasId, imageUrl, canvasWidth, canvasHeight, - dims, - manifestUri, - userAgentURI, - projectEndpoint: projectEndpoint ?? '(unknown project endpoint)', + lineCount: String(lineCount), pageEndpoint: pageEndpoint ?? '(unknown page endpoint)', token: token ?? '' } } /** - * Extract an `xywh=x,y,w,h` fragment from a line annotation's target, accepting - * both `target.selector.value` and a plain `"source#xywh=..."` string target. - * Strips the non-standard `pixel:` prefix introduced by Annotorious — prompts - * and any annotations produced downstream must use plain integer coordinates. - * @param {any} item - * @returns {string|null} + * Summarize a line's body for the "Existing lines" listing. + * + * Three forms, chosen to keep the listing compact while still letting PUT + * consumers reconstruct an existing body verbatim (the services API replaces + * `body` with `[]` when a PUT item omits it): + * + * - `body=[]` — empty body; echo as `[]`. + * - `text="…"` — single plain-text `TextualBody`; echo as + * `[{ "type": "TextualBody", "value": , "format": "text/plain" }]`. + * The common case, so it's worth the shorter display. + * - `body=` — anything else; echo the JSON verbatim. + * + * Existing TPEN line bodies are expected to always carry `type`, `value`, and + * `format`. The `text=` round-trip reconstruction sets `format: "text/plain"`, + * so `only.format === 'text/plain'` is a strict match — any other shape (no + * format, different format, multiple bodies, non-`TextualBody`) drops to + * `body=` to preserve fidelity on the PUT echo. + * @param {any} body an annotation `body` value. + * @returns {string} */ -function extractXywh(item) { - const sel = item?.target?.selector - const selValue = Array.isArray(sel) ? sel[0]?.value : sel?.value - let raw = null - if (typeof selValue === 'string' && selValue.includes('xywh=')) { - raw = selValue.slice(selValue.indexOf('xywh=')) - } else { - const target = typeof item?.target === 'string' ? item.target : null - if (target && target.includes('#xywh=')) raw = target.slice(target.indexOf('xywh=')) +function formatBody(body) { + if (!Array.isArray(body) || body.length === 0) return 'body=[]' + if (body.length === 1) { + const only = body[0] + // Require EXACTLY {type, value, format} with the expected values so the + // `text=` → `[{type, value, format}]` round-trip is lossless. Any extra + // field (e.g. `language`, `creator`, `id`) would be silently dropped on + // the PUT echo and trigger a needless RERUM re-version. + const keys = only && typeof only === 'object' ? Object.keys(only) : [] + const isPlainTextual = + keys.length === 3 + && keys.every(k => k === 'type' || k === 'value' || k === 'format') + && only.type === 'TextualBody' + && typeof only.value === 'string' + && only.format === 'text/plain' + if (isPlainTextual) return `text=${JSON.stringify(only.value)}` } - return raw ? raw.replace(/^xywh=pixel:/, 'xywh=') : null + return `body=${JSON.stringify(body)}` } /** * Render the current line annotations on a page as a markdown bullet list - * keyed by trailing line id and xywh selector. Pre-resolving this list in the - * parent saves the LLM a GET + parse round trip. + * carrying the fields needed to echo each line back in a page PUT without + * losing data. Pre-resolving this list in the parent saves the LLM a GET + + * parse round trip. Column POSTs require the full URI to match + * `page.items[].id` server-side; PATCH-line-text consumers can split the + * URI's trailing segment themselves. + * + * Each entry exposes the body as one of three forms — `body=[]`, `text="…"`, + * or `body=` — consumed by the `detect-columns` and + * `transcribe-known-lines` prompts, which document how to reconstruct each. * @param {any} fetchedPage the page object returned by `fetchPageResolved`. * @returns {string} */ @@ -102,30 +120,29 @@ export function formatExistingLines(fetchedPage) { return '- (No existing lines on this page.)' } return items.map(item => { - const lineId = trailingId(item) ?? '(unknown)' - const xywh = extractXywh(item) ?? '(no xywh selector)' - return `- ${lineId}: ${xywh}` + const lineUri = getIRI(item) ?? '(unknown)' + const xywh = parseXywh(item?.target) ?? '(no xywh selector)' + return `- ${lineUri} | ${xywh} | ${formatBody(item?.body)}` }).join('\n') } /** * Render the current column state for a given page as a markdown bullet list. - * Used by templates that must avoid duplicate column labels. The directly - * fetched `page` is authoritative when supplied, since the project graph may - * not hydrate `layer.pages[].columns` for every page. + * Used by templates that must avoid duplicate column labels. Columns live on + * `project.layers[].pages[]`; the `/resolved` page endpoint does not emit + * them, so the project graph is the only source. * @param {any} project the TPEN project object. - * @param {string|null|undefined} pageID the short page id or full page IRI. - * @param {any} [fetchedPage] the page object returned by `fetchPageResolved`, preferred when available. + * @param {any} page the page object returned by `fetchPageResolved`. * @returns {string} */ -export function formatExistingColumns(project, pageID, fetchedPage = null) { - const tail = trailingId(pageID) +export function formatExistingColumns(project, page) { + const tail = trailingId(page) const projectPage = (project?.layers ?? []) .flatMap(l => l.pages ?? []) .find(pg => trailingId(pg) === tail) - const cols = fetchedPage?.columns ?? projectPage?.columns ?? [] - if (!Array.isArray(cols) || cols.length === 0) { + const cols = projectPage?.columns ?? [] + if (cols.length === 0) { return '- (No existing columns on this page — labels must be unique when created.)' } - return cols.map(c => `- ${c.label ?? '(unlabeled)'}: ${(c.lines ?? c.annotations ?? []).length} line(s)`).join('\n') + return cols.map(c => `- ${c.label ?? '(unlabeled)'}`).join('\n') } diff --git a/templates/transcribe-known-lines/PROMPT.md b/templates/transcribe-known-lines/PROMPT.md index be1f2a9..0d0c9dc 100644 --- a/templates/transcribe-known-lines/PROMPT.md +++ b/templates/transcribe-known-lines/PROMPT.md @@ -1,40 +1,43 @@ # Task: transcribe the existing lines on a TPEN3 page -You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services. +You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services (direct) or emitted as a fallback JSON payload for the user to paste. ## Context -- Project: {{projectID}} -- Page: {{pageID}} - Canvas: {{canvasId}} - Canvas Dimensions: {{canvasWidth}} × {{canvasHeight}} - Image: {{imageUrl}} -- Manifest: {{manifestUri}} -- User Agent URI: {{userAgentURI}} - Page endpoint: {{pageEndpoint}} ## Existing lines -Each entry is `: ` in canvas coordinates. If the list is empty, stop — this template only revises existing lines. +Each entry is ` | xywh= | ` in canvas coordinates. The body form is `body=[]` (empty), `text=""` (single plain-text `TextualBody`), or `body=` (anything else) — use it as context for what's already on the line. The fallback payload re-uses the full annotation URI verbatim as the `id` of each item; the splitscreen tool rebuilds each existing target from the hydrated page before PUTting it, and updates only the body text. {{existingLines}} ## Preconditions -1. Required context present: `projectID`, `pageID`, `canvasId`, `{{token}}`, and a non-empty existing-lines list above. If any is missing, stop and report. -2. Vision capability: you must be able to load the page image as raw bytes and crop/inspect per-line regions. A fetcher that returns only a prose description of the image does not count. -3. Authorization: `{{token}}` must be usable for PATCH against each line-text endpoint. -4. HTTP PATCH capability (with `Content-Type: text/plain`). +All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions, existing-line list) are provided above. This template only revises existing lines: `lineCount` = `{{lineCount}}`. If `lineCount` is `0`, stop immediately and report. -If any precondition fails, stop and return a concise failure report naming the missing capability. +You must have: + +1. Vision capability: fetch each line's region as image bytes (e.g. via a IIIF region URL) and read the inked glyphs directly. A fetcher that returns only a prose description of the image does not qualify, and any preview rendered back into chat is downsampled — do not transcribe from a preview. **If you cannot read image bytes directly with the capabilities already available to you, stop now and return a failure report naming the missing capability.** This precondition is hard — fallback does not rescue missing vision. +2. Either HTTP PATCH capability (with `Content-Type: text/plain`), or the ability to emit a fallback JSON code block in your report. If HTTP PATCH is not available, skip straight to the Fallback section — do not retry. + +Use only tools already available in your environment. Do not install packages, libraries, or system utilities. ## Steps -1. Resolve canvas dimensions. Use `{{canvasWidth}}`/`{{canvasHeight}}` when numeric. If either is `(unknown)`, GET `{{canvasId}}` and read `width`/`height`. If that fails, GET `{{manifestUri}}` and find the matching canvas in `items` by id. -2. Fetch the page image and a per-line crop using each line's `xywh` from the list above. Verify each crop visibly contains a single line of inked text. -3. Run handwriting text recognition over each crop. Apply the recognition rules below. -4. For each line, PATCH the text to its line-text endpoint. -5. Report a per-line summary: how many succeeded, how many failed, and the HTTP status for any failure. +1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}` and GET `{base}/info.json` for the dimensions; fetch each line's region server-side as `{base}/x,y,w,h/max/0/default.jpg` rather than downloading the whole page and cropping locally. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. The `xywh` selectors above are in canvas space; convert each to image-pixel space (for the IIIF region URL or the local crop) using: + - `pixel_x = round(canvas_x * img_w / {{canvasWidth}})` + - `pixel_y = round(canvas_y * img_h / {{canvasHeight}})` + - `pixel_w = round(canvas_w * img_w / {{canvasWidth}})` + - `pixel_h = round(canvas_h * img_h / {{canvasHeight}})` + Crop each line region and verify it visibly contains a single line of inked text. +2. Run handwriting text recognition over each crop. Apply the recognition rules below. +3. If HTTP PATCH is available, PATCH the text to each line's line-text endpoint — one PATCH per line in the "Existing lines" list. On any non-2xx, record the status and continue with the remaining lines. If every PATCH returned non-2xx, treat PATCH as unavailable and proceed to step 4. If HTTP PATCH is unavailable from the start, skip directly to step 4. +4. If you reached this step because PATCH was unavailable or every attempt failed, emit the condensed payload under **Fallback** as the final code block. +5. Report counts (lines updated, lines flagged illegible, lines failed) and which path was used. ## Rules @@ -42,11 +45,11 @@ If any precondition fails, stop and return a concise failure report naming the m - Use explicit uncertainty markers for unclear glyphs (for example `[a?]`). Do not force certainty. - Do not invent expansions. If an abbreviation mark is present, transcribe the mark; do not silently expand. - Keep line segmentation stable — one transcription string per existing line annotation. -- If a line's crop is illegible, send an empty body or skip the PATCH and report the line id as unresolved — do not fabricate text. +- If a line's crop is illegible, send an empty body (direct) or emit `"text": ""` (fallback) and report the line id as unresolved — do not fabricate text. In the fallback payload, do not drop the item. ## TPEN API -Update one line's text via PATCH with a plain-text body: +Update one line's text via PATCH with a plain-text body. `` is the trailing path segment of the annotation URI listed above (the last `/`-separated segment). ``` PATCH {{pageEndpoint}}/line//text @@ -56,30 +59,31 @@ Content-Type: text/plain ``` -`` is the trailing id segment of the annotation's id (the last path segment of the annotation URI). +## Fallback -Error handling: +The fallback tool only accepts JSON, so it uses a single page-level PUT instead of per-line PATCH. When PATCH is unavailable or every attempt returned non-2xx, emit the condensed payload below as the final code block of your report. The TPEN splitscreen tool re-uses each line's existing target from the hydrated page context before PUTting it — the item's `id` must match an entry in "Existing lines" above. -```javascript -if (!response.ok) { - throw new Error(`TPEN API ${response.status}: ${await response.text()}`) +``` +{ + "items": [ + { "id": "", "text": "" } + ] } ``` +There must be exactly one item per entry in "Existing lines", each re-using that entry's annotation URI verbatim as its `id`. Item order must match the order of "Existing lines" — do not reorder. `text` is an empty string for fully illegible lines — do not drop the item. It must be valid JSON (no comments, no placeholders). + ## Completion -On success, report: +Direct PATCH path, report: - operation: `PATCH line text` -- target: `{{pageEndpoint}}/line//text` per line -- count: number of lines updated - -On failure, report: +- target: {{pageEndpoint}}/line//text per line +- counts: lines updated, lines flagged illegible, lines failed (with HTTP status per failure) -- the failing stage (image fetch, recognition, PATCH, etc.) -- HTTP status and error body if applicable -- the line id(s) affected and a recommended next step - -## Fallback +Fallback path, report: -If required resources are unreachable or you lack vision / PATCH capability, do not fabricate transcriptions and do not send partial PATCHes that overwrite real text. Report what is missing and stop. +- path: `fallback` +- counts: lines in payload, lines flagged illegible +- HTTP status and error body if a PATCH was attempted first +- final code block: the condensed `{ "items": [...] }` JSON for the user to paste diff --git a/templates/transcribe-known-lines/index.js b/templates/transcribe-known-lines/index.js index b46a497..b72b0a3 100644 --- a/templates/transcribe-known-lines/index.js +++ b/templates/transcribe-known-lines/index.js @@ -12,7 +12,7 @@ import { buildTemplateContext, formatExistingLines } from '../inject-context.js' /** @type {import('../../prompt-generator.js').PromptTemplate} */ export const transcribeKnownLinesTemplate = { id: 'transcribe-known-lines', - label: 'Auto Transcribe Existing Lines', + label: 'Transcribe Over Existing Lines', templateUrl: new URL('./PROMPT.md', import.meta.url), buildContext: ctx => ({ ...buildTemplateContext(ctx), diff --git a/tpen-service.js b/tpen-service.js index 7a041a7..60dd123 100644 --- a/tpen-service.js +++ b/tpen-service.js @@ -9,31 +9,55 @@ import { CONFIG } from './config.js' /** - * GET a services endpoint with the user's Bearer token and a 15s timeout. + * Call a services endpoint with the user's Bearer token and a 15s timeout. * On non-2xx responses throws an Error whose `.status` matches the response. * @param {string} path path beginning with `/`, relative to `CONFIG.servicesURL`. + * @param {string} method HTTP verb (`GET`, `PUT`, `POST`, `PATCH`). + * @param {any} [body] JSON-serializable body; omitted for GET. * @param {string} token JWT. * @returns {Promise} parsed JSON body. */ -async function authedGet(path, token) { - if (!token) throw new Error(`Missing auth token for ${path}`) - const res = await fetch(`${CONFIG.servicesURL}${path}`, { - method: 'GET', +async function tpenServiceRequest(path, method, body, token) { + if (!token) { + const err = new Error(`Missing auth token for ${path}`) + err.status = 401 + throw err + } + const options = { + method, headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${token}` }, signal: AbortSignal.timeout(15000) - }) + } + if (body !== undefined) options.body = JSON.stringify(body) + const res = await fetch(`${CONFIG.servicesURL}${path}`, options) if (!res.ok) { - const detail = await res.text().catch(() => '') - const err = new Error(`${res.status} ${res.statusText} — ${path}${detail ? `: ${detail}` : ''}`) + // TPEN services always emit JSON errors (see tpen3-services + // utilities/shared.js#respondWithError and utilities/routeErrorHandler.js). + const detail = await res.json().catch(() => ({})) + const msg = detail.message ?? detail.error ?? res.statusText + // Prefix with the status so callers that surface `err.message` raw + // (e.g., main.js#loadContext) still show it; the numeric status is + // also preserved on `err.status` for programmatic handling. + const err = new Error(`${res.status} ${path}: ${msg}`) err.status = res.status throw err } return res.json() } +/** + * GET a services endpoint with the user's Bearer token. + * @param {string} path path beginning with `/`, relative to `CONFIG.servicesURL`. + * @param {string} token JWT. + * @returns {Promise} parsed JSON body. + */ +function authedGet(path, token) { + return tpenServiceRequest(path, 'GET', undefined, token) +} + /** * Fetch a project record. * @param {string} projectID short id (not a full IRI). @@ -58,13 +82,25 @@ export function fetchPageResolved(projectID, pageID, token) { } /** - * Build the project endpoint URL (project/index.js). Templates use this for - * best-effort GETs that verify project-level state mid-task. + * PUT a page body (`{ items: [...] }`). Used by the fallback JSON-paste flow + * when the user's LLM cannot issue writes itself. Items may be new (no `id`, + * or a non-http local id) or updates (item `id` is the line's full IRI). + * + * Note: items whose `body` is omitted get `body=[]` on the server — the Line + * class sets `body: this.body ?? []` before saving, which spreads over the existing + * RERUM document. Echo each existing item's body back to preserve its + * transcription. * @param {string} projectID - * @returns {string} absolute URL. + * @param {string} pageID + * @param {{ items: Array }} body + * @param {string} token + * @returns {Promise} */ -export function projectEndpoint(projectID) { - return `${CONFIG.servicesURL}/project/${encodeURIComponent(projectID)}` +export function putPage(projectID, pageID, body, token) { + return tpenServiceRequest( + `/project/${encodeURIComponent(projectID)}/page/${encodeURIComponent(pageID)}`, + 'PUT', body, token + ) } /** diff --git a/ui-manager.js b/ui-manager.js index f38ca92..4b9aeab 100644 --- a/ui-manager.js +++ b/ui-manager.js @@ -9,8 +9,8 @@ */ import { listTemplates, renderTemplate } from './prompt-generator.js' -import { pageEndpoint, projectEndpoint } from './tpen-service.js' -import { trailingId } from './iiif-ids.js' +import { pageEndpoint, putPage } from './tpen-service.js' +import { getIRI, parseXywh, trailingId } from './iiif-ids.js' /** * Build a DOM element. Recognizes a few special prop keys: @@ -50,6 +50,141 @@ const OPTIONAL_ID_FIELDS = [ { name: 'lineID', label: 'Line ID (optional)' } ] +/** + * Build a W3C `SpecificResource` target from a canvas IRI and an `xywh=…` + * selector value. + * @param {string} canvasId + * @param {string} xywh the bare selector value (e.g. `xywh=10,20,300,40`). + * @returns {{source: string, type: string, selector: {type: string, conformsTo: string, value: string}}} + */ +function buildSpecificResourceTarget(canvasId, xywh) { + return { + source: canvasId, + type: 'SpecificResource', + selector: { + type: 'FragmentSelector', + conformsTo: 'http://www.w3.org/TR/media-frags/', + value: xywh + } + } +} + +/** + * Pull the bare `xywh=…` selector value out of whatever target shape the + * fallback item carries. Delegates all target-shape handling to `parseXywh` + * in iiif-ids.js so both `SpecificResource` objects and legacy bare + * `"#xywh=…"` strings round-trip correctly. + * + * Known-line updates (item `id` matches an existing line) ignore any + * `target` the LLM included and re-use the existing line's selector — the + * fallback flow is documented as text-only in `transcribe-known-lines`, so + * trusting an LLM-supplied target would silently clobber bounds when the + * model echoes a stale or wrong selector. + * + * Returns `null` when no selector can be resolved; the caller leaves `target` + * off and the services API rejects the item with `Line data is malformed`. + * @param {any} item + * @param {Map} existingItemsById + * @returns {string|null} + */ +function resolveXywh(item, existingItemsById) { + if (typeof item?.id === 'string' && existingItemsById.has(item.id)) { + return parseXywh(existingItemsById.get(item.id)?.target) + } + return parseXywh(item?.target) +} + +/** + * Expand a condensed fallback item into a full W3C Annotation. Every output + * target is rebuilt fresh with `canvasId` as `source` — we don't trust any + * source that rode in on a pasted item or an echoed existing target, so the + * rebuilt annotation always points at the canvas the UI is showing. + * + * The condensed per-item shapes are (by prompt): + * + * - `{ target: "xywh=…" }` — detection only. + * - `{ target: "xywh=…", text }` — detection + transcription. + * - `{ id, text }` — known-line update; xywh is looked up from the hydrated + * page. + * + * Legacy full-shape pastes pass through in all other respects — only + * `target.source` gets normalized and `motivation` is filled when missing. + * @param {any} item raw parsed item from the fallback textarea. + * @param {string|null} canvasId the canvas IRI used as the annotation's target source. + * @param {Map} existingItemsById lookup from annotation id → resolved page item. + * @returns {object} a W3C Annotation ready for PUT. + */ +function expandFallbackItem(item, canvasId, existingItemsById) { + const out = { ...item } + const xywh = resolveXywh(item, existingItemsById) + if (xywh) out.target = buildSpecificResourceTarget(canvasId, xywh) + if (typeof item.text === 'string') { + out.body = item.text === '' + ? [] + : [{ type: 'TextualBody', value: item.text, format: 'text/plain' }] + delete out.text + } + if (!('motivation' in out)) out.motivation = 'transcribing' + return out +} + +/** + * Validate a pre-expansion `items` array, returning a user-facing error string + * or `null`. Catches two erasure traps: + * + * 1. An empty array — the services PUT handler's top-level copy loop writes + * `page.items = []` even when `itemsProvided` is false, erasing every line + * reference on the page and leaving columns pointing at stale ids. Prompts + * should stop and report "no lines" rather than emit an empty payload. + * 2. A known-line update (string `id`) without usable transcription content + * would be PUT with `body` absent or empty, causing the services API to + * overwrite the existing body with `[]` on save + * (Line.js#saveLineToRerum: `body: this.body ?? []`). `'body' in item` is + * not enough — `body: null`, `body: ""`, `body: {}` all collapse to `[]` + * via the `??` fallback. Require either a `text` string or a non-empty + * `body` array; reject any other `body` shape outright so a buggy paste + * can't slip through and silently truncate a line. + * @param {Array} items + * @returns {string|null} + */ +function validateItems(items) { + if (items.length === 0) { + return '`items` is empty — submitting would erase every line on the page. Regenerate the prompt response with at least one detected line or stop.' + } + for (const item of items) { + if (!item || typeof item !== 'object' || Array.isArray(item)) { + return 'Each item in `items` must be an annotation object.' + } + const hasId = typeof item.id === 'string' + const hasTargetField = 'target' in item && item.target != null + // Without an id we can't look up an existing target; without a target we + // can't build one. Either path resolves a selector — neither makes the + // server throw "Line data is malformed" with a generic 500. + if (!hasId && !hasTargetField) { + return 'Each item must include `target` (xywh selector) or an `id` matching an existing line.' + } + if (hasTargetField) { + const t = item.target + const ok = typeof t === 'string' || (typeof t === 'object' && !Array.isArray(t)) + if (!ok) return 'Each item `target` must be an `xywh=…` string or a full target object.' + } + if ('text' in item && typeof item.text !== 'string') { + return 'Each item `text` must be a string.' + } + if ('body' in item && item.body !== undefined && !Array.isArray(item.body)) { + return 'Each item `body` must be an array of body entries.' + } + if (hasId) { + const hasText = typeof item.text === 'string' + const hasBody = Array.isArray(item.body) && item.body.length > 0 + if (!hasText && !hasBody) { + return `Item for ${item.id} is missing transcription content (\`text\` string or non-empty \`body\` array) — would erase the existing transcription.` + } + } + } + return null +} + /** * Renders the three UI states (status, id form, workspace) into a single * root node and owns state while a workspace is displayed. The workspace @@ -69,6 +204,8 @@ export class UIManager { #workspaceBody = null /** Pending timer for clearing the Copy feedback message. */ #feedbackTimer = null + /** Fallback-panel submit button; toggled by `updateToken`. */ + #fallbackSubmit = null /** * @param {string} [rootId='app'] id of the element to render into. @@ -206,13 +343,15 @@ export class UIManager { select.append(el('option', { value: t.id, text: t.label })) } - // Prompts embed the auth token; generating before consent yields an - // unusable prompt (templates render "(unable to resolve agent IRI…)"). - // Gate Generate on token presence and nudge the user toward the consent - // button in the header. + // Prompts embed the auth token in `{{token}}` and the page endpoint + // in `{{pageEndpoint}}`. Generating without either yields a prompt + // whose Authorization header is `Bearer ` (no token) or whose target + // URL is `(unknown page endpoint)`. Gate Generate on both, and nudge + // the user toward whatever's missing. + const canGenerate = Boolean(token && pageID) const generateBtn = el('button', { type: 'button', id: 'generate-btn', text: 'Generate prompt', - disabled: !token + disabled: !canGenerate }) this.#generateBtn = generateBtn const output = el('textarea', { @@ -230,17 +369,189 @@ export class UIManager { generateBtn ] - const body = el('div', { class: 'workspace-body', hidden: !token }, [ - el('div', { class: 'controls' }, generateControls), + const bodyChildren = [ + el('div', { class: 'controls' }, generateControls) + ] + if (!pageID) { + bodyChildren.push(el('p', { class: 'hint', text: 'Needs a page context before a prompt can be generated.' })) + } + bodyChildren.push( el('label', { class: 'output-label', htmlFor: 'output', text: 'Generated prompt' }), output, - el('div', { class: 'controls' }, [copyBtn, feedback]) - ]) + el('div', { class: 'controls' }, [copyBtn, feedback]), + this.#buildFallbackPanel() + ) + const body = el('div', { class: 'workspace-body', hidden: !token }, bodyChildren) this.#workspaceBody = body this.#replace(el('section', { class: 'card' }, [header, body])) } + /** + * Build the paste-JSON fallback panel. Submit requires `projectID`, + * `pageID`, AND `token`. The workspace body is hidden when no token is + * held (`renderWorkspace` sets `hidden: !token`), so the disabled state + * below is belt-and-suspenders against a stale reference being clicked + * programmatically. `updateToken` still flips it when the token arrives + * after the panel was built so the pageID gate remains authoritative. + * + * The auto-clear timer for the feedback span lives in this closure, not + * on the instance — `renderWorkspace` rebuilds the panel on every render, + * and an instance-level timer reference would let an old panel's pending + * timer null out a new panel's timer slot. + * @returns {HTMLElement} + */ + #buildFallbackPanel() { + const { projectID, pageID, token } = this.state + const hasPage = Boolean(projectID && pageID) + const ready = hasPage && Boolean(token) + const textarea = el('textarea', { + rows: 10, spellcheck: false, autocomplete: 'off', + placeholder: '{ "items": [ { "target": "xywh=10,20,400,30" } ] }', + attrs: { 'aria-label': 'JSON payload to submit to TPEN' } + }) + const submit = el('button', { + type: 'button', + text: 'Submit to TPEN', + disabled: !ready + }) + this.#fallbackSubmit = submit + const feedback = el('span', { class: 'feedback', attrs: { 'aria-live': 'polite' } }) + let feedbackTimer = null + submit.addEventListener('click', () => this.#onFallbackSubmit({ + textarea, button: submit, feedback, + getTimer: () => feedbackTimer, + setTimer: (t) => { feedbackTimer = t } + })) + const children = [ + el('summary', { text: `Couldn't Use the API? Paste JSON from LLM here` }), + el('p', { class: 'hint', text: 'Use this when your chat LLM produced the JSON payload but could not call the TPEN API itself. This tool will submit it using the token you authorized.' }) + ] + if (!hasPage) children.push(el('p', { class: 'hint', text: 'Needs a page context before submission is possible.' })) + children.push(textarea, el('div', { class: 'controls' }, [submit, feedback])) + return el('details', { class: 'fallback' }, children) + } + + /** + * Parse the pasted JSON and submit it as a page PUT. Only one shape is + * accepted: `{ items: [...] }` — the shape every prompt fallback emits. + * @param {{textarea: HTMLTextAreaElement, button: HTMLButtonElement, feedback: HTMLElement, getTimer: () => any, setTimer: (t: any) => void}} ctx + */ + async #onFallbackSubmit({ textarea, button, feedback, getTimer, setTimer }) { + const { projectID, pageID, token } = this.state + const raw = textarea.value.trim() + // `renderWorkspace` can re-run mid-submit (e.g., token changes via + // `updateToken` during an await), detaching the nodes this handler + // closed over. Guard each UI write so a detached panel doesn't get + // silent stale mutations. + const alive = () => textarea.isConnected + const writeTextarea = (val) => { if (alive()) textarea.value = val } + const setFeedback = (msg, autoClear = false) => { + if (!alive()) return + feedback.textContent = msg + const existing = getTimer() + if (existing) { + clearTimeout(existing) + setTimer(null) + } + if (autoClear) { + const t = setTimeout(() => { + if (getTimer() !== t) return + feedback.textContent = '' + setTimer(null) + }, 3000) + setTimer(t) + } + } + if (!projectID || !pageID || !token) { + setFeedback('Missing project, page, or token — cannot submit.') + return + } + if (!raw) { + setFeedback('Paste a JSON payload first.') + return + } + let payload + try { payload = JSON.parse(raw) } + catch { setFeedback('Payload must be valid JSON.'); return } + + button.disabled = true + setFeedback('Submitting…') + const opts = { projectID, pageID, token, setFeedback, writeTextarea } + try { + if (payload && typeof payload === 'object' && !Array.isArray(payload) && Array.isArray(payload.items)) { + await this.#submitItems(payload.items, opts) + return + } + setFeedback('Unrecognized payload shape — expected `{ "items": [...] }`.') + } catch (err) { + setFeedback(err?.message ?? 'Submission failed.') + } finally { + if (button.isConnected) { + button.disabled = !(this.state.projectID && this.state.pageID && this.state.token) + } + } + } + + /** + * Validate, expand, and PUT an `items` payload. Narrows the PUT body to + * just `{ items }` — top-level keys beyond `items` would otherwise be + * applied to the page record by the server's property-copy loop. + * @param {Array} items + * @param {{projectID:string,pageID:string,token:string,setFeedback:Function,writeTextarea:Function}} opts + */ + async #submitItems(items, { projectID, pageID, token, setFeedback, writeTextarea }) { + const validationError = validateItems(items) + if (validationError) { setFeedback(validationError); return } + const canvasId = getIRI(this.state.canvas) + if (!canvasId) { + setFeedback('Canvas context missing — reload the workspace and retry.') + return + } + // Index the resolved page's items by id so the expander can recover + // each existing line's xywh for known-line updates (`{id, text}` only). + // The rebuilt target still uses `canvasId` as `source`; only the xywh + // selector value is pulled from the hydrated item. + const existingItemsById = new Map() + for (const existing of this.state.page?.items ?? []) { + const eid = getIRI(existing) + if (eid) existingItemsById.set(eid, existing) + } + const expanded = items.map(i => expandFallbackItem(i, canvasId, existingItemsById)) + const result = await putPage(projectID, pageID, { items: expanded }, token) + // Drop the saved page into local state so the next Generate's + // "Existing lines" listing reflects what was just persisted. + this.state.page = result + writeTextarea(JSON.stringify(result, null, 2)) + const saved = expanded.length + const noun = `line item${saved === 1 ? '' : 's'}` + // Mint `/transcribe?projectID=…&pageID=…` from the parent + // origin (taken from `document.referrer`, which survives the default + // `strict-origin-when-cross-origin` policy) and the workspace state, + // then top-navigate there to refresh the transcription column. + // Writing `top.location.href` is allowed cross-origin under user + // activation (the Submit click); when it works the iframe is torn + // down. When no origin is resolvable (sandboxed iframe with + // `allow-top-navigation` withheld, or strict `no-referrer` policy), + // fall back to a manual-refresh hint. The proper postMessage-based + // fix lives in TPEN-interfaces#528. + const reloadUrl = mintTranscriptionUrl(projectID, pageID) + if (reloadUrl) { + setFeedback(`Saved ${saved} ${noun}. Refreshing the transcription page…`) + // The PUT already succeeded; if the navigation throws (sandbox + // without `allow-top-navigation`, or top is cross-origin and + // the click's user activation has been consumed by the await + // chain above), don't let it surface as a submission failure. + try { + window.top.location.href = reloadUrl + return + } catch (err) { + console.warn('top.location navigation blocked', err) + } + } + setFeedback(`Saved ${saved} ${noun}. Refresh the transcription page to see the new lines in the column.`, true) + } + /** * Update the stored token and remove the in-workspace consent button if * it's on screen. Called from `PromptsApp.acceptAuth` when the parent @@ -260,6 +571,7 @@ export class UIManager { return } if (this.#generateBtn) this.#generateBtn.disabled = true + if (this.#fallbackSubmit) this.#fallbackSubmit.disabled = true if (this.#workspaceBody) this.#workspaceBody.hidden = true return } @@ -267,7 +579,11 @@ export class UIManager { this.#authButton.remove() this.#authButton = null } - if (this.#generateBtn) this.#generateBtn.disabled = false + const { projectID, pageID } = this.state + if (this.#generateBtn) this.#generateBtn.disabled = !pageID + if (this.#fallbackSubmit) { + this.#fallbackSubmit.disabled = !(projectID && pageID) + } if (this.#workspaceBody) this.#workspaceBody.hidden = false } @@ -304,11 +620,7 @@ export class UIManager { try { const full = renderTemplate(select.value, { project: s.project, page: s.page, canvas: s.canvas, - layer: s.layer, column: s.column, line: s.line, - projectID: s.projectID, pageID: s.pageID, - layerID: s.layerID, columnID: s.columnID, lineID: s.lineID, token: s.token, - projectEndpoint: s.projectID ? projectEndpoint(s.projectID) : null, pageEndpoint: (s.projectID && s.pageID) ? pageEndpoint(s.projectID, s.pageID) : null }) this.#fullPrompt = full @@ -385,3 +697,20 @@ function truncateToken(token) { return `${token.slice(0, 10)}…${token.slice(-10)}` } +/** + * Fallback reload target when the parent didn't forward `parentUrl` via + * `TPEN_CONTEXT`. Minted from the parent origin (taken from + * `document.referrer`, which survives the default cross-origin + * `strict-origin-when-cross-origin` policy) and the tpen3-interfaces + * transcription permalink shape (`/transcribe?projectID=…&pageID=…`). + * @param {string} projectID + * @param {string} pageID + * @returns {string|null} the minted URL, or null when no origin is available. + */ +function mintTranscriptionUrl(projectID, pageID) { + let origin = null + try { origin = new URL(document.referrer).origin } catch {} + if (!origin) return null + return `${origin}/transcribe?projectID=${encodeURIComponent(projectID)}&pageID=${encodeURIComponent(pageID)}` +} +