From 5d94a89c3e22a029830f63103ed82dc1ff86521e Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Tue, 21 Apr 2026 15:46:22 -0500 Subject: [PATCH 01/47] prompt tuning pass 1 --- templates/detect-and-transcribe/PROMPT.md | 52 ++++++-------------- templates/detect-columns-and-lines/PROMPT.md | 45 ++++++----------- templates/detect-columns/PROMPT.md | 31 +++--------- templates/detect-lines/PROMPT.md | 36 +++++--------- templates/inject-context.js | 8 ++- templates/transcribe-known-lines/PROMPT.md | 31 ++++-------- 6 files changed, 64 insertions(+), 139 deletions(-) diff --git a/templates/detect-and-transcribe/PROMPT.md b/templates/detect-and-transcribe/PROMPT.md index a0c04fc..38ae51e 100644 --- a/templates/detect-and-transcribe/PROMPT.md +++ b/templates/detect-and-transcribe/PROMPT.md @@ -9,38 +9,34 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en - Canvas: {{canvasId}} - Canvas Dimensions: {{canvasWidth}} × {{canvasHeight}} - Image: {{imageUrl}} -- Manifest: {{manifestUri}} -- User Agent URI: {{userAgentURI}} - Page endpoint: {{pageEndpoint}} ## Preconditions -1. Required context present: `projectID`, `pageID`, `canvasId`, `{{token}}`. If any is missing, stop and report. +1. Required context present: `projectID`, `pageID`, `canvasId`, `token`. If any is missing, stop and report. 2. Vision capability: you must be able to load the page image as raw bytes, measure pixel coordinates, and crop/inspect per-line regions. -3. Authorization: `{{token}}` must be usable for PUT against the page endpoint (and optionally PATCH for per-line text updates). -4. HTTP PUT capability with `Content-Type: application/json`; optional PATCH with `Content-Type: text/plain`. +3. Authorization: the token shown in the PUT example below must be usable for PUT against the page endpoint. +4. HTTP PUT capability with `Content-Type: application/json`. If any precondition fails, stop and return a concise failure report. ## Steps -1. Resolve canvas dimensions. Use `{{canvasWidth}}`/`{{canvasHeight}}` when numeric. Otherwise GET `{{canvasId}}` and read `width`/`height`. If that fails, GET `{{manifestUri}}` and find the matching canvas in `items` by id. -2. Fetch the page image. Detect every text line in reading order. -3. For each line, measure a bounding box and convert to integer canvas coordinates. Clamp to the canvas and round. -4. Run handwriting text recognition on each line's crop. Apply the recognition rules below. -5. Build one Annotation per line with the recognized text as the `TextualBody` value and `xywh=x,y,w,h` (integer canvas coordinates). -6. PUT the full set of line annotations to the page endpoint in a single request. -7. Optionally PATCH a specific line's text afterward if a recognition result needs a later revision (see TPEN API). -8. Report counts (lines saved) and notable ambiguities (e.g., illegible lines transcribed as empty or flagged). +1. Fetch the page image. Detect every text line in reading order. +2. For each line, measure a bounding box and convert to integer canvas coordinates. Clamp to the canvas and round. +3. Run handwriting text recognition on each line's crop. Apply the recognition rules below. +4. Build one Annotation per line with the recognized text as the `TextualBody` value and `xywh=x,y,w,h` (integer canvas coordinates). +5. PUT the full set of line annotations to the page endpoint in a single request. +6. Report counts (lines saved) and notable ambiguities (e.g., illegible lines transcribed as empty or flagged). ## Rules ### Detection (IMAGE_ANALYSIS) +- Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value. - Preserve reading order. Prefer high recall for likely text lines over aggressive pruning. - Keep line boxes tight but do not clip ascenders/descenders. - Flag ambiguous regions in the report rather than silently dropping them. -- Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value. ### Recognition (HANDWRITING_TEXT_RECOGNITION) @@ -81,39 +77,19 @@ Content-Type: application/json } ``` -Optional per-line text revision after the PUT: - -``` -PATCH {{pageEndpoint}}/line//text -Authorization: Bearer {{token}} -Content-Type: text/plain - - -``` - -Error handling (both calls): - -```javascript -if (!response.ok) { - throw new Error(`TPEN API ${response.status}: ${await response.text()}`) -} -``` +On any non-2xx response, stop the operation in progress and include the HTTP status and response body in the failure report. ## Completion On success, report: -- operation: `PUT page` (plus any follow-up `PATCH line text`) -- target: `{{pageEndpoint}}` +- operation: `PUT page` +- target: {{pageEndpoint}} - counts: lines saved, lines with non-empty text, lines flagged uncertain - notable ambiguities worth a human review On failure, report: -- the failing stage (image fetch, detection, recognition, PUT, or PATCH) +- the failing stage (image fetch, detection, recognition, PUT) - HTTP status and error body - recommended next step - -## Fallback - -If vision / write capability is missing, do not fabricate geometry or transcriptions. Report what is missing and stop. diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index f69a896..c4cfae8 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -9,8 +9,6 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en - Canvas: {{canvasId}} - Canvas Dimensions: {{canvasWidth}} × {{canvasHeight}} - Image: {{imageUrl}} -- Manifest: {{manifestUri}} -- User Agent URI: {{userAgentURI}} - Page endpoint: {{pageEndpoint}} ## Existing columns on this page @@ -19,33 +17,30 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Preconditions -1. Required context present: `projectID`, `pageID`, `canvasId`, `{{token}}`. If any is missing, stop and report. +1. Required context present: `projectID`, `pageID`, `canvasId`, `token`. If any is missing, stop and report. 2. Vision capability: you must be able to load the page image as raw bytes and measure pixel coordinates on it. -3. Authorization: `{{token}}` must be usable for both POST (column) and PUT (page) against the page endpoints. +3. Authorization: the token shown in the PUT example below must be usable for both POST (column) and PUT (page) against the page endpoints. 4. HTTP POST and PUT capability with `Content-Type: application/json`. If any precondition fails, stop and return a concise failure report. ## Steps -1. Resolve canvas dimensions. Use `{{canvasWidth}}`/`{{canvasHeight}}` when numeric. Otherwise GET `{{canvasId}}` and read `width`/`height`. If that fails, GET `{{manifestUri}}` and find the matching canvas in `items` by id. -2. Fetch the page image. Detect column regions in reading order first, then detect the lines inside each column (reading order preserved within each column). -3. For every line, measure a bounding box and convert to integer canvas coordinates. Clamp to the canvas and round. -4. Mint a stable local id for each line (for example, `line-1`, `line-2`, …) so you can reference them in column `annotations` arrays before the PUT assigns real ids. After the PUT, use the server-assigned ids when creating columns. -5. PUT every detected line to the page endpoint (see TPEN API below). Capture the server-assigned annotation ids from the response. -6. For each column, POST `{ label, annotations }` where `annotations` is the server-assigned line ids that belong to that column. Labels must be unique and must not clash with anything in "Existing columns on this page". -7. Report counts: lines saved, columns created, and any failures. - -Execution order is strict: lines are PUT first, then columns are POSTed against the now-persisted line ids. If the Project read fails and column state cannot be verified, you may proceed to save lines and skip column association — flag it clearly in the report. +1. Resolve canvas dimensions. {{canvasDimsResolution}} +2. Fetch the page image. Detect main text column regions in reading order first, then detect the lines inside each column (reading order preserved within each column). If the page visibly has a single text block, create one column containing every detected line — do not subdivide. +3. For every line, measure a bounding box and convert to integer canvas coordinates. Clamp to the canvas and round. Track each line's column index (an integer, 0-based) as you detect it. +4. PUT every detected line to the page endpoint in a single request (see TPEN API below). The response returns line ids in the same order as the submitted `items` — use positional mapping to recover ids per column index. +5. For each column, POST `{ label, annotations }` where `annotations` is the server-assigned line ids that belong to that column index. Labels must be unique and must not clash with anything in "Existing columns on this page". +6. Report counts: lines saved, columns created, and any failures. ## Rules -- Preserve reading order across columns and within each column. -- Prefer high recall: include borderline columns/lines and flag them, rather than silently dropping them. -- Keep line boxes tight enough for line-level recognition but generous enough not to clip ascenders/descenders. - Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value. - Column labels are page-scoped and must be unique. Do not duplicate an existing column label. - Each line annotation belongs to at most one column. +- Preserve reading order across columns and within each column. +- Prefer high recall: include borderline columns/lines and flag them, rather than silently dropping them. +- Keep line boxes tight enough for line-level recognition but generous enough not to clip ascenders/descenders. ## TPEN API @@ -77,11 +72,11 @@ Content-Type: application/json } ``` -Then POST each column: +Then POST each column (reuse the same Bearer token as the PUT above): ``` POST {{pageEndpoint}}/column -Authorization: Bearer {{token}} +Authorization: Bearer Content-Type: application/json { @@ -90,20 +85,14 @@ Content-Type: application/json } ``` -Error handling (both calls): - -```javascript -if (!response.ok) { - throw new Error(`TPEN API ${response.status}: ${await response.text()}`) -} -``` +On any non-2xx response, stop the operation in progress and include the HTTP status and response body in the failure report. ## Completion On success, report: - operations: `PUT page`, `POST column` (×N) -- target: `{{pageEndpoint}}` and `{{pageEndpoint}}/column` +- target: {{pageEndpoint}} (page) and {{pageEndpoint}}/column - counts: lines saved, columns created On failure, report: @@ -111,7 +100,3 @@ On failure, report: - the failing stage (image fetch, detection, PUT, or a specific POST) - HTTP status and error body - whether lines were saved even if column creation failed (partial success is acceptable — describe what persists) - -## Fallback - -If vision / write capability is missing, do not fabricate geometry or send partial payloads. Report what is missing and stop. diff --git a/templates/detect-columns/PROMPT.md b/templates/detect-columns/PROMPT.md index 652c689..e93d0ca 100644 --- a/templates/detect-columns/PROMPT.md +++ b/templates/detect-columns/PROMPT.md @@ -9,8 +9,6 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en - Canvas: {{canvasId}} - Canvas Dimensions: {{canvasWidth}} × {{canvasHeight}} - Image: {{imageUrl}} -- Manifest: {{manifestUri}} -- User Agent URI: {{userAgentURI}} - Page endpoint: {{pageEndpoint}} ## Existing columns on this page @@ -25,21 +23,20 @@ Each entry is `: ` in canvas coordinates. Use these ids v ## Preconditions -1. Required context present: `projectID`, `pageID`, `canvasId`, `{{token}}`, and a non-empty existing-lines list above. If any is missing, stop and report. +1. Required context present: `projectID`, `pageID`, `canvasId`, `token`, and at least one existing line. `lineCount` = `{{lineCount}}`; if this is `0`, stop immediately — this template operates on an existing line set. 2. Vision capability: you must be able to load the page image as raw bytes and measure pixel coordinates on it. -3. Authorization: `{{token}}` must be usable for POST against the page's column endpoint. +3. Authorization: the token shown in the POST example below must be usable for POST against the page's column endpoint. 4. HTTP POST capability with `Content-Type: application/json`. If any precondition fails, stop and return a concise failure report. ## Steps -1. Resolve canvas dimensions. Use `{{canvasWidth}}`/`{{canvasHeight}}` when numeric. Otherwise GET `{{canvasId}}` and read `width`/`height`. If that fails, GET `{{manifestUri}}` and find the matching canvas in `items` by id. -2. Analyze the page image and detect column regions in reading order. -3. For each detected column, determine which of the existing line ids (from the list above) fall within its bounds using each line's `xywh`. A line is assigned to exactly one column. -4. Choose a unique label per column (e.g., `Column A`, `Column B`). The label must not clash with any label listed under "Existing columns on this page". -5. POST one column at a time via the column endpoint, with `{ label, annotations }` where `annotations` is the array of line ids assigned to that column. -6. Report the count of created columns and any per-column failures. +1. Resolve canvas dimensions. {{canvasDimsResolution}} +2. Fetch the page image and detect main text column regions in reading order. If the page visibly has a single text block, create one column containing every existing line id — do not subdivide. +3. For each detected column, determine which of the existing line ids (from the list above) belong to it. Assign a line to the column that contains the center point of its `xywh`. Each line belongs to exactly one column. +4. Choose a unique label per column (e.g., `Column A`, `Column B`) that does not clash with any label under "Existing columns on this page", then POST `{ label, annotations }` to the column endpoint. `annotations` is the array of line ids assigned to that column. +5. Report the count of created columns and any per-column failures. ## Rules @@ -67,15 +64,7 @@ Content-Type: application/json Each `` is the trailing id segment of a line annotation listed above. -Error handling: - -```javascript -if (!response.ok) { - throw new Error(`TPEN API ${response.status}: ${await response.text()}`) -} -``` - -Column verification (best-effort): if you need to re-read current columns mid-task, GET `{{projectEndpoint}}` with `Authorization: Bearer {{token}}` and locate the page inside `project.layers[*].pages[*]` — inspect `page.columns`. Do not block column creation on a failed Project read; continue with the POSTs and flag verification as unavailable. +On any non-2xx response, stop the column in progress and include the HTTP status and response body in the failure report. ## Completion @@ -91,7 +80,3 @@ On failure, report: - the failing stage (image fetch, detection, POST) - HTTP status and error body for any failed POST - recommended next step (e.g., choose a different label, reassign lines) - -## Fallback - -If required resources are unreachable or you lack vision / POST capability, do not fabricate column geometry and do not send partial POSTs that misassign lines. Report what is missing and stop. diff --git a/templates/detect-lines/PROMPT.md b/templates/detect-lines/PROMPT.md index a5ba85c..fb9368c 100644 --- a/templates/detect-lines/PROMPT.md +++ b/templates/detect-lines/PROMPT.md @@ -9,35 +9,31 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en - Canvas: {{canvasId}} - Canvas Dimensions: {{canvasWidth}} × {{canvasHeight}} - Image: {{imageUrl}} -- Manifest: {{manifestUri}} -- User Agent URI: {{userAgentURI}} - Page endpoint: {{pageEndpoint}} ## Preconditions -1. Required context present: `projectID`, `pageID`, `canvasId`, `{{token}}`. If any is missing, stop and report. +1. Required context present: `projectID`, `pageID`, `canvasId`, `token`. If any is missing, stop and report. 2. Vision capability: you must be able to load the page image as raw bytes and measure pixel coordinates on it. -3. Authorization: `{{token}}` must be usable for PUT against the page endpoint. +3. Authorization: the token shown in the PUT example below must be usable for PUT against the page endpoint. 4. HTTP PUT capability with `Content-Type: application/json`. If any precondition fails, stop and return a concise failure report. ## Steps -1. Resolve canvas dimensions. Use `{{canvasWidth}}`/`{{canvasHeight}}` when numeric. Otherwise GET `{{canvasId}}` and read `width`/`height`. If that fails, GET `{{manifestUri}}` and find the matching canvas in `items` by id. -2. Fetch the page image and detect every text line in reading order (top→bottom within a column, columns left→right unless the script tradition dictates otherwise). -3. For each detected line, measure a bounding box on the image and convert it to canvas coordinates. Clamp to the canvas: `x ≥ 0`, `y ≥ 0`, `x + w ≤ canvasWidth`, `y + h ≤ canvasHeight`. Round to integers after clamping. -4. Build one Annotation per line using the shape below, with `body` as an empty array (no text yet) and `value` as `xywh=x,y,w,h` in integer canvas coordinates. -5. PUT the full set of line annotations to the page endpoint. -6. Report count and any failure cause. +1. Resolve canvas dimensions. {{canvasDimsResolution}} +2. Fetch the page image and detect every text line in reading order. +3. For each detected line, measure a bounding box and convert to integer canvas coordinates. Clamp to the canvas and round. +4. PUT every detected line to the page endpoint in a single request (see TPEN API below). Leave `body` empty — no text yet. +5. Report count and any failure cause. ## Rules +- Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value. - Preserve reading order across the whole page. -- Prefer high recall: a marginal or faint line that might carry text should be included and flagged, not silently dropped. - Keep each line box tight enough for line-level recognition — do not merge adjacent lines — but generous enough not to clip ascenders/descenders. -- Flag ambiguous regions in the report rather than silently merging or dropping. -- Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value. +- Prefer high recall: include borderline lines and flag them, rather than silently dropping them. ## TPEN API @@ -69,20 +65,14 @@ Content-Type: application/json } ``` -Error handling: - -```javascript -if (!response.ok) { - throw new Error(`TPEN API ${response.status}: ${await response.text()}`) -} -``` +On any non-2xx response, stop and include the HTTP status and response body in the failure report. ## Completion On success, report: - operation: `PUT page` -- target: `{{pageEndpoint}}` +- target: {{pageEndpoint}} - count: number of line annotations saved On failure, report: @@ -90,7 +80,3 @@ On failure, report: - the failing stage (image fetch, detection, PUT) - HTTP status and error body - recommended next step - -## Fallback - -If required resources are unreachable or you lack vision / PUT capability, do not fabricate line geometry. Report what is missing and stop. diff --git a/templates/inject-context.js b/templates/inject-context.js index c90e96e..84a8ed5 100644 --- a/templates/inject-context.js +++ b/templates/inject-context.js @@ -42,7 +42,7 @@ function canvasDimensions(canvas) { * @returns {Record} */ export function buildTemplateContext(ctx) { - const { canvas, project, projectID, pageID, projectEndpoint, pageEndpoint, token } = ctx + const { canvas, project, page, projectID, pageID, projectEndpoint, pageEndpoint, token } = ctx const canvasId = getIRI(canvas) ?? '(unknown canvas id)' const imageUrl = extractImageUrl(canvas) ?? '(no image body found on canvas)' const { width, height } = canvasDimensions(canvas) @@ -52,6 +52,10 @@ export function buildTemplateContext(ctx) { const projectManifest = Array.isArray(project?.manifest) ? project.manifest[0] : project?.manifest const manifestUri = getIRI(canvas?.partOf) ?? getIRI(projectManifest) ?? '(unknown manifest URI)' const userAgentURI = getAgentIRIFromToken(token) ?? '(unable to resolve agent IRI from token)' + const canvasDimsResolution = (width && height) + ? `Canvas dimensions are already resolved as ${width} × ${height} — use these values directly; no fetch required.` + : `Canvas dimensions unknown. GET \`${canvasId}\` and read \`width\`/\`height\`. If that fails, GET \`${manifestUri}\` and find the matching canvas in \`items\` by id.` + const lineCount = Array.isArray(page?.items) ? page.items.length : 0 return { projectID: projectID ?? '', pageID: pageID ?? '', @@ -60,8 +64,10 @@ export function buildTemplateContext(ctx) { canvasWidth, canvasHeight, dims, + canvasDimsResolution, manifestUri, userAgentURI, + lineCount: String(lineCount), projectEndpoint: projectEndpoint ?? '(unknown project endpoint)', pageEndpoint: pageEndpoint ?? '(unknown page endpoint)', token: token ?? '' diff --git a/templates/transcribe-known-lines/PROMPT.md b/templates/transcribe-known-lines/PROMPT.md index be1f2a9..bd7f77e 100644 --- a/templates/transcribe-known-lines/PROMPT.md +++ b/templates/transcribe-known-lines/PROMPT.md @@ -9,32 +9,29 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en - Canvas: {{canvasId}} - Canvas Dimensions: {{canvasWidth}} × {{canvasHeight}} - Image: {{imageUrl}} -- Manifest: {{manifestUri}} -- User Agent URI: {{userAgentURI}} - Page endpoint: {{pageEndpoint}} ## Existing lines -Each entry is `: ` in canvas coordinates. If the list is empty, stop — this template only revises existing lines. +Each entry is `: ` in canvas coordinates. {{existingLines}} ## Preconditions -1. Required context present: `projectID`, `pageID`, `canvasId`, `{{token}}`, and a non-empty existing-lines list above. If any is missing, stop and report. +1. Required context present: `projectID`, `pageID`, `canvasId`, `token`, and at least one existing line. `lineCount` = `{{lineCount}}`; if `0`, stop — this template only revises existing lines. 2. Vision capability: you must be able to load the page image as raw bytes and crop/inspect per-line regions. A fetcher that returns only a prose description of the image does not count. -3. Authorization: `{{token}}` must be usable for PATCH against each line-text endpoint. +3. Authorization: the token shown in the PATCH example below must be usable for PATCH against each line-text endpoint. 4. HTTP PATCH capability (with `Content-Type: text/plain`). If any precondition fails, stop and return a concise failure report naming the missing capability. ## Steps -1. Resolve canvas dimensions. Use `{{canvasWidth}}`/`{{canvasHeight}}` when numeric. If either is `(unknown)`, GET `{{canvasId}}` and read `width`/`height`. If that fails, GET `{{manifestUri}}` and find the matching canvas in `items` by id. -2. Fetch the page image and a per-line crop using each line's `xywh` from the list above. Verify each crop visibly contains a single line of inked text. -3. Run handwriting text recognition over each crop. Apply the recognition rules below. -4. For each line, PATCH the text to its line-text endpoint. -5. Report a per-line summary: how many succeeded, how many failed, and the HTTP status for any failure. +1. Fetch the page image and a per-line crop using each line's `xywh` from the list above. Verify each crop visibly contains a single line of inked text. +2. Run handwriting text recognition over each crop. Apply the recognition rules below. +3. For each line, PATCH the text to its line-text endpoint. +4. Report a per-line summary: how many succeeded, how many failed, and the HTTP status for any failure. ## Rules @@ -58,20 +55,14 @@ Content-Type: text/plain `` is the trailing id segment of the annotation's id (the last path segment of the annotation URI). -Error handling: - -```javascript -if (!response.ok) { - throw new Error(`TPEN API ${response.status}: ${await response.text()}`) -} -``` +On any non-2xx response, include the HTTP status and response body in that line's failure report. ## Completion On success, report: - operation: `PATCH line text` -- target: `{{pageEndpoint}}/line//text` per line +- target: {{pageEndpoint}}/line//text per line - count: number of lines updated On failure, report: @@ -79,7 +70,3 @@ On failure, report: - the failing stage (image fetch, recognition, PATCH, etc.) - HTTP status and error body if applicable - the line id(s) affected and a recommended next step - -## Fallback - -If required resources are unreachable or you lack vision / PATCH capability, do not fabricate transcriptions and do not send partial PATCHes that overwrite real text. Report what is missing and stop. From 8e644e2c5709a95895232e49bc367106fe0eb36f Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Tue, 21 Apr 2026 15:53:57 -0500 Subject: [PATCH 02/47] Change their selection labels --- templates/detect-and-transcribe/index.js | 2 +- templates/detect-columns-and-lines/index.js | 2 +- templates/detect-columns/index.js | 2 +- templates/detect-lines/index.js | 2 +- templates/transcribe-known-lines/index.js | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/templates/detect-and-transcribe/index.js b/templates/detect-and-transcribe/index.js index 32eb56a..55c4aaf 100644 --- a/templates/detect-and-transcribe/index.js +++ b/templates/detect-and-transcribe/index.js @@ -12,7 +12,7 @@ import { buildTemplateContext } from '../inject-context.js' /** @type {import('../../prompt-generator.js').PromptTemplate} */ export const detectAndTranscribeTemplate = { id: 'detect-and-transcribe', - label: 'Auto Main Content Detection + Auto Transcription', + label: 'Line Detection + Transcription', templateUrl: new URL('./PROMPT.md', import.meta.url), buildContext: buildTemplateContext } diff --git a/templates/detect-columns-and-lines/index.js b/templates/detect-columns-and-lines/index.js index decb8e9..b722206 100644 --- a/templates/detect-columns-and-lines/index.js +++ b/templates/detect-columns-and-lines/index.js @@ -12,7 +12,7 @@ import { buildTemplateContext, formatExistingColumns } from '../inject-context.j /** @type {import('../../prompt-generator.js').PromptTemplate} */ export const detectColumnsAndLinesTemplate = { id: 'detect-columns-and-lines', - label: 'Detect Main Text Columns and Individual Lines', + label: 'Line Detection + Column Grouping', templateUrl: new URL('./PROMPT.md', import.meta.url), buildContext: (ctx) => ({ ...buildTemplateContext(ctx), diff --git a/templates/detect-columns/index.js b/templates/detect-columns/index.js index d7c8b16..0afc02d 100644 --- a/templates/detect-columns/index.js +++ b/templates/detect-columns/index.js @@ -12,7 +12,7 @@ import { buildTemplateContext, formatExistingColumns, formatExistingLines } from /** @type {import('../../prompt-generator.js').PromptTemplate} */ export const detectColumnsTemplate = { id: 'detect-columns', - label: 'Detect Main Text Columns', + label: 'Group Lines Into Columns', templateUrl: new URL('./PROMPT.md', import.meta.url), buildContext: (ctx) => ({ ...buildTemplateContext(ctx), diff --git a/templates/detect-lines/index.js b/templates/detect-lines/index.js index d70d494..20b09cc 100644 --- a/templates/detect-lines/index.js +++ b/templates/detect-lines/index.js @@ -12,7 +12,7 @@ import { buildTemplateContext } from '../inject-context.js' /** @type {import('../../prompt-generator.js').PromptTemplate} */ export const detectLinesTemplate = { id: 'detect-lines', - label: 'Detect Individual Lines (No Column Grouping)', + label: 'Line Detection', templateUrl: new URL('./PROMPT.md', import.meta.url), buildContext: buildTemplateContext } diff --git a/templates/transcribe-known-lines/index.js b/templates/transcribe-known-lines/index.js index b46a497..978dbe9 100644 --- a/templates/transcribe-known-lines/index.js +++ b/templates/transcribe-known-lines/index.js @@ -12,7 +12,7 @@ import { buildTemplateContext, formatExistingLines } from '../inject-context.js' /** @type {import('../../prompt-generator.js').PromptTemplate} */ export const transcribeKnownLinesTemplate = { id: 'transcribe-known-lines', - label: 'Auto Transcribe Existing Lines', + label: 'Transcribe Existing Line Detection', templateUrl: new URL('./PROMPT.md', import.meta.url), buildContext: ctx => ({ ...buildTemplateContext(ctx), From d4b4c824a56c1d1af63b9f6fd013629e2e8637d5 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Tue, 21 Apr 2026 17:59:03 -0500 Subject: [PATCH 03/47] cleanup while testing --- main.js | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/main.js b/main.js index 3376704..fe0bdfb 100644 --- a/main.js +++ b/main.js @@ -110,14 +110,32 @@ export class PromptsApp { return } + const hasSelector = item => { + const t = item?.target + if (typeof t === 'string') return t.includes('#xywh=') + const sel = t?.selector + const val = Array.isArray(sel) ? sel[0]?.value : sel?.value + return typeof val === 'string' && val.includes('xywh=') + } + + const isPageHydrated = p => + Array.isArray(p?.items) && + (p.items.length === 0 || hasSelector(p.items[0])) + + const isProjectHydrated = p => + Array.isArray(p?.layers) && + (p.layers.length === 0 || Array.isArray(p.layers[0]?.pages)) + // Upgrade a stub project (no layers) when we have a token. - if (!project.layers && this.token) { + if (this.token && !isProjectHydrated(project)) { + console.warn('[tpen-prompts] refetching project — parent sent unhydrated payload') try { project = await fetchProject(projectID, this.token) } catch (err) { console.warn('fetchProject failed', err) } } const pageID = page ? (trailingId(page) ?? '') : '' - // Upgrade a stub page (no items) when we have a token and a pageID. - if (page && !Array.isArray(page.items) && this.token && pageID) { + // Upgrade a stub page (unhydrated items) when we have a token and a pageID. + if (this.token && pageID && !isPageHydrated(page)) { + console.warn('[tpen-prompts] refetching page — parent sent unhydrated payload') try { page = await fetchPageResolved(projectID, pageID, this.token) ?? page } catch (err) { console.warn('fetchPageResolved failed', err) } } From 92f596accb9dd9683ef85d9516be52e516e358ff Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Tue, 21 Apr 2026 18:33:32 -0500 Subject: [PATCH 04/47] prompt tuning round 2 --- templates/detect-and-transcribe/PROMPT.md | 21 ++++++++----- templates/detect-columns-and-lines/PROMPT.md | 23 +++++++++------ templates/detect-columns/PROMPT.md | 31 ++++++++++++-------- templates/detect-lines/PROMPT.md | 23 +++++++++------ templates/transcribe-known-lines/PROMPT.md | 19 ++++++++---- 5 files changed, 73 insertions(+), 44 deletions(-) diff --git a/templates/detect-and-transcribe/PROMPT.md b/templates/detect-and-transcribe/PROMPT.md index 38ae51e..44c2130 100644 --- a/templates/detect-and-transcribe/PROMPT.md +++ b/templates/detect-and-transcribe/PROMPT.md @@ -13,17 +13,22 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Preconditions -1. Required context present: `projectID`, `pageID`, `canvasId`, `token`. If any is missing, stop and report. -2. Vision capability: you must be able to load the page image as raw bytes, measure pixel coordinates, and crop/inspect per-line regions. -3. Authorization: the token shown in the PUT example below must be usable for PUT against the page endpoint. -4. HTTP PUT capability with `Content-Type: application/json`. +All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: + +1. Vision capability: load the page image as raw bytes, measure pixel coordinates, and crop/inspect per-line regions. +2. HTTP PUT capability with `Content-Type: application/json`. If any precondition fails, stop and return a concise failure report. ## Steps -1. Fetch the page image. Detect every text line in reading order. -2. For each line, measure a bounding box and convert to integer canvas coordinates. Clamp to the canvas and round. +1. Fetch the page image. Read its actual pixel dimensions (`img_w`, `img_h`) — the IIIF server may return a scaled rendering, not the canvas-native resolution. Detect every text line in reading order and measure each line's bounding box in image-pixel space. +2. Convert every bounding box to integer canvas coordinates using: + - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` + - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` + - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` + - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` + Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). 3. Run handwriting text recognition on each line's crop. Apply the recognition rules below. 4. Build one Annotation per line with the recognized text as the `TextualBody` value and `xywh=x,y,w,h` (integer canvas coordinates). 5. PUT the full set of line annotations to the page endpoint in a single request. @@ -36,7 +41,7 @@ If any precondition fails, stop and return a concise failure report. - Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value. - Preserve reading order. Prefer high recall for likely text lines over aggressive pruning. - Keep line boxes tight but do not clip ascenders/descenders. -- Flag ambiguous regions in the report rather than silently dropping them. +- Include borderline regions rather than silently dropping them. ### Recognition (HANDWRITING_TEXT_RECOGNITION) @@ -49,7 +54,7 @@ If any precondition fails, stop and return a concise failure report. ## TPEN API -Save every detected line with its transcription in a single PUT: +Save every detected line with its transcription in a single PUT. The `items` array must contain one annotation per detected line; replace `x,y,w,h` with the integer canvas coordinates computed in step 2, and `` with the recognized text (empty string for fully illegible lines). ``` PUT {{pageEndpoint}} diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index c4cfae8..8fe8290 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -17,18 +17,23 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Preconditions -1. Required context present: `projectID`, `pageID`, `canvasId`, `token`. If any is missing, stop and report. -2. Vision capability: you must be able to load the page image as raw bytes and measure pixel coordinates on it. -3. Authorization: the token shown in the PUT example below must be usable for both POST (column) and PUT (page) against the page endpoints. -4. HTTP POST and PUT capability with `Content-Type: application/json`. +All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: + +1. Vision capability: load the page image as raw bytes and measure pixel coordinates on it. +2. HTTP POST and PUT capability with `Content-Type: application/json`. If any precondition fails, stop and return a concise failure report. ## Steps -1. Resolve canvas dimensions. {{canvasDimsResolution}} -2. Fetch the page image. Detect main text column regions in reading order first, then detect the lines inside each column (reading order preserved within each column). If the page visibly has a single text block, create one column containing every detected line — do not subdivide. -3. For every line, measure a bounding box and convert to integer canvas coordinates. Clamp to the canvas and round. Track each line's column index (an integer, 0-based) as you detect it. +1. Fetch the page image. Read its actual pixel dimensions (`img_w`, `img_h`) — the IIIF server may return a scaled rendering, not the canvas-native resolution. +2. Detect main text column regions in reading order first, then detect the lines inside each column (reading order preserved within each column). If the page visibly has a single text block, create one column containing every detected line — do not subdivide. Track each line's column index (an integer, 0-based) as you detect it. +3. For every line, measure a bounding box in image-pixel space and convert to integer canvas coordinates using: + - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` + - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` + - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` + - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` + Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). 4. PUT every detected line to the page endpoint in a single request (see TPEN API below). The response returns line ids in the same order as the submitted `items` — use positional mapping to recover ids per column index. 5. For each column, POST `{ label, annotations }` where `annotations` is the server-assigned line ids that belong to that column index. Labels must be unique and must not clash with anything in "Existing columns on this page". 6. Report counts: lines saved, columns created, and any failures. @@ -39,12 +44,12 @@ If any precondition fails, stop and return a concise failure report. - Column labels are page-scoped and must be unique. Do not duplicate an existing column label. - Each line annotation belongs to at most one column. - Preserve reading order across columns and within each column. -- Prefer high recall: include borderline columns/lines and flag them, rather than silently dropping them. +- Prefer high recall: include borderline columns/lines rather than silently dropping them. - Keep line boxes tight enough for line-level recognition but generous enough not to clip ascenders/descenders. ## TPEN API -Save all lines via a single PUT: +Save all lines via a single PUT. The `items` array must contain one annotation per detected line; replace `x,y,w,h` with the integer canvas coordinates computed in step 3. ``` PUT {{pageEndpoint}} diff --git a/templates/detect-columns/PROMPT.md b/templates/detect-columns/PROMPT.md index e93d0ca..0fdc8ca 100644 --- a/templates/detect-columns/PROMPT.md +++ b/templates/detect-columns/PROMPT.md @@ -23,33 +23,40 @@ Each entry is `: ` in canvas coordinates. Use these ids v ## Preconditions -1. Required context present: `projectID`, `pageID`, `canvasId`, `token`, and at least one existing line. `lineCount` = `{{lineCount}}`; if this is `0`, stop immediately — this template operates on an existing line set. -2. Vision capability: you must be able to load the page image as raw bytes and measure pixel coordinates on it. -3. Authorization: the token shown in the POST example below must be usable for POST against the page's column endpoint. -4. HTTP POST capability with `Content-Type: application/json`. +All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions, existing-line list) are provided above. This template operates on an existing line set: `lineCount` = `{{lineCount}}`. If `lineCount` is `0`, stop immediately and report. + +You must have: + +1. Vision capability: load the page image as raw bytes and measure pixel coordinates on it. +2. HTTP POST capability with `Content-Type: application/json`. If any precondition fails, stop and return a concise failure report. ## Steps -1. Resolve canvas dimensions. {{canvasDimsResolution}} -2. Fetch the page image and detect main text column regions in reading order. If the page visibly has a single text block, create one column containing every existing line id — do not subdivide. -3. For each detected column, determine which of the existing line ids (from the list above) belong to it. Assign a line to the column that contains the center point of its `xywh`. Each line belongs to exactly one column. -4. Choose a unique label per column (e.g., `Column A`, `Column B`) that does not clash with any label under "Existing columns on this page", then POST `{ label, annotations }` to the column endpoint. `annotations` is the array of line ids assigned to that column. -5. Report the count of created columns and any per-column failures. +1. Fetch the page image. Read its actual pixel dimensions (`img_w`, `img_h`) — the IIIF server may return a scaled rendering, not the canvas-native resolution. +2. Detect main text column regions in reading order in image-pixel space. If the page visibly has a single text block, create one column containing every existing line id — do not subdivide. +3. Convert every detected column region to integer canvas coordinates using: + - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` + - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` + - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` + - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` + Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). +4. For each detected column, determine which of the existing line ids (from the list above) belong to it. Assign a line to the column whose canvas-space region contains the center point of the line's `xywh`. If a line's center falls outside every detected column, assign it to the nearest column by Euclidean distance from the center point to the column's region (distance `0` when the point is inside). Each line belongs to exactly one column. +5. Choose a unique label per column (e.g., `Column A`, `Column B`) that does not clash with any label under "Existing columns on this page", then POST `{ label, annotations }` to the column endpoint. `annotations` is the array of line ids assigned to that column. +6. Report the count of created columns and any per-column failures. ## Rules - Preserve reading order. Columns proceed as the page is read (left→right for Latin-script layouts; adjust for script tradition). -- Prefer high recall: include borderline regions as columns when they contain text, rather than silently dropping them. +- Prefer high recall: include borderline regions as columns when they contain text rather than silently dropping them. - Keep column boundaries tight enough that each line clearly belongs to one column, but generous enough to avoid clipping existing line selectors. -- Flag ambiguous regions (e.g., marginalia that may be a column) in the report rather than silently including or excluding them. - Column labels are page-scoped and must be unique. Do not duplicate an existing column label. - Annotations cannot be assigned to more than one column. If a line clearly sits in an existing column, do not reassign it. ## TPEN API -Create one column: +Create one POST per detected column. Each `annotations` array contains the line ids assigned to that column in step 4, taken verbatim from the "Existing lines" list above (trailing id segment only). ``` POST {{pageEndpoint}}/column diff --git a/templates/detect-lines/PROMPT.md b/templates/detect-lines/PROMPT.md index fb9368c..e7e8a24 100644 --- a/templates/detect-lines/PROMPT.md +++ b/templates/detect-lines/PROMPT.md @@ -13,18 +13,23 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Preconditions -1. Required context present: `projectID`, `pageID`, `canvasId`, `token`. If any is missing, stop and report. -2. Vision capability: you must be able to load the page image as raw bytes and measure pixel coordinates on it. -3. Authorization: the token shown in the PUT example below must be usable for PUT against the page endpoint. -4. HTTP PUT capability with `Content-Type: application/json`. +All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: + +1. Vision capability: load the page image as raw bytes and measure pixel coordinates on it. +2. HTTP PUT capability with `Content-Type: application/json`. If any precondition fails, stop and return a concise failure report. ## Steps -1. Resolve canvas dimensions. {{canvasDimsResolution}} -2. Fetch the page image and detect every text line in reading order. -3. For each detected line, measure a bounding box and convert to integer canvas coordinates. Clamp to the canvas and round. +1. Fetch the page image. Read its actual pixel dimensions (`img_w`, `img_h`) — the IIIF server may return a scaled rendering, not the canvas-native resolution. +2. Detect every text line in reading order and measure each line's bounding box in image-pixel space. +3. Convert every bounding box to integer canvas coordinates using: + - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` + - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` + - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` + - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` + Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). 4. PUT every detected line to the page endpoint in a single request (see TPEN API below). Leave `body` empty — no text yet. 5. Report count and any failure cause. @@ -33,11 +38,11 @@ If any precondition fails, stop and return a concise failure report. - Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value. - Preserve reading order across the whole page. - Keep each line box tight enough for line-level recognition — do not merge adjacent lines — but generous enough not to clip ascenders/descenders. -- Prefer high recall: include borderline lines and flag them, rather than silently dropping them. +- Prefer high recall: include borderline lines rather than silently dropping them. ## TPEN API -Save all detected lines via a single PUT: +Save all detected lines via a single PUT. The `items` array must contain one annotation per detected line; replace `x,y,w,h` with the integer canvas coordinates computed in step 3. ``` PUT {{pageEndpoint}} diff --git a/templates/transcribe-known-lines/PROMPT.md b/templates/transcribe-known-lines/PROMPT.md index bd7f77e..00b3957 100644 --- a/templates/transcribe-known-lines/PROMPT.md +++ b/templates/transcribe-known-lines/PROMPT.md @@ -19,18 +19,25 @@ Each entry is `: ` in canvas coordinates. ## Preconditions -1. Required context present: `projectID`, `pageID`, `canvasId`, `token`, and at least one existing line. `lineCount` = `{{lineCount}}`; if `0`, stop — this template only revises existing lines. -2. Vision capability: you must be able to load the page image as raw bytes and crop/inspect per-line regions. A fetcher that returns only a prose description of the image does not count. -3. Authorization: the token shown in the PATCH example below must be usable for PATCH against each line-text endpoint. -4. HTTP PATCH capability (with `Content-Type: text/plain`). +All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions, existing-line list) are provided above. This template only revises existing lines: `lineCount` = `{{lineCount}}`. If `lineCount` is `0`, stop immediately and report. + +You must have: + +1. Vision capability: load the page image as raw bytes and crop/inspect per-line regions. A fetcher that returns only a prose description of the image does not count. +2. HTTP PATCH capability (with `Content-Type: text/plain`). If any precondition fails, stop and return a concise failure report naming the missing capability. ## Steps -1. Fetch the page image and a per-line crop using each line's `xywh` from the list above. Verify each crop visibly contains a single line of inked text. +1. Fetch the page image. Read its actual pixel dimensions (`img_w`, `img_h`) — the IIIF server may return a scaled rendering, not the canvas-native resolution. The `xywh` selectors above are in canvas space; convert each to image-pixel space before cropping using: + - `pixel_x = round(canvas_x * img_w / {{canvasWidth}})` + - `pixel_y = round(canvas_y * img_h / {{canvasHeight}})` + - `pixel_w = round(canvas_w * img_w / {{canvasWidth}})` + - `pixel_h = round(canvas_h * img_h / {{canvasHeight}})` + Crop each line region and verify it visibly contains a single line of inked text. 2. Run handwriting text recognition over each crop. Apply the recognition rules below. -3. For each line, PATCH the text to its line-text endpoint. +3. For each line, PATCH the text to its line-text endpoint — one PATCH per line in the "Existing lines" list. 4. Report a per-line summary: how many succeeded, how many failed, and the HTTP status for any failure. ## Rules From 0c42a6640d34f8e4bf47f0854d7a44cd837c89f4 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Tue, 21 Apr 2026 19:28:04 -0500 Subject: [PATCH 05/47] accuracy bundle --- templates/detect-and-transcribe/PROMPT.md | 6 ++++-- templates/detect-columns-and-lines/PROMPT.md | 6 ++++-- templates/detect-columns/PROMPT.md | 6 ++++-- templates/detect-lines/PROMPT.md | 6 ++++-- templates/transcribe-known-lines/PROMPT.md | 6 ++++-- 5 files changed, 20 insertions(+), 10 deletions(-) diff --git a/templates/detect-and-transcribe/PROMPT.md b/templates/detect-and-transcribe/PROMPT.md index 44c2130..5241c56 100644 --- a/templates/detect-and-transcribe/PROMPT.md +++ b/templates/detect-and-transcribe/PROMPT.md @@ -15,14 +15,16 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: -1. Vision capability: load the page image as raw bytes, measure pixel coordinates, and crop/inspect per-line regions. +1. Vision capability: load the page image as raw bytes, measure pixel coordinates programmatically from the full-resolution data, and crop/inspect per-line regions. Any image preview rendered back to you is downsampled — never read coordinates off a previewed image by eye. 2. HTTP PUT capability with `Content-Type: application/json`. +Use only tools already available in your environment. Do not install packages, libraries, or system utilities (`pip`, `npm`, `apt`, `brew`, `cargo`, `--break-system-packages`, etc.) — if a required capability is missing, stop and return a failure report naming it rather than installing anything. + If any precondition fails, stop and return a concise failure report. ## Steps -1. Fetch the page image. Read its actual pixel dimensions (`img_w`, `img_h`) — the IIIF server may return a scaled rendering, not the canvas-native resolution. Detect every text line in reading order and measure each line's bounding box in image-pixel space. +1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, prefer a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. When you need to inspect or transcribe a specific region at full fidelity, request it server-side as `{base}/x,y,w,h/max/0/default.jpg` rather than downloading the whole page and cropping locally. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. If you measured coordinates inside a region crop, add the crop's `x,y` origin back before applying the canvas conversion below. Detect every text line in reading order and measure each line's bounding box in image-pixel space. 2. Convert every bounding box to integer canvas coordinates using: - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index 8fe8290..5837541 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -19,14 +19,16 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: -1. Vision capability: load the page image as raw bytes and measure pixel coordinates on it. +1. Vision capability: load the page image as raw bytes and measure coordinates programmatically from the full-resolution pixel data. Any image preview rendered back to you is downsampled — never read coordinates off a previewed image by eye. 2. HTTP POST and PUT capability with `Content-Type: application/json`. +Use only tools already available in your environment. Do not install packages, libraries, or system utilities (`pip`, `npm`, `apt`, `brew`, `cargo`, `--break-system-packages`, etc.) — if a required capability is missing, stop and return a failure report naming it rather than installing anything. + If any precondition fails, stop and return a concise failure report. ## Steps -1. Fetch the page image. Read its actual pixel dimensions (`img_w`, `img_h`) — the IIIF server may return a scaled rendering, not the canvas-native resolution. +1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, prefer a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. When you need to inspect a specific region at full fidelity, request it server-side as `{base}/x,y,w,h/max/0/default.jpg` rather than downloading the whole page and cropping locally. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. If you measured coordinates inside a region crop, add the crop's `x,y` origin back before applying the canvas conversion below. 2. Detect main text column regions in reading order first, then detect the lines inside each column (reading order preserved within each column). If the page visibly has a single text block, create one column containing every detected line — do not subdivide. Track each line's column index (an integer, 0-based) as you detect it. 3. For every line, measure a bounding box in image-pixel space and convert to integer canvas coordinates using: - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` diff --git a/templates/detect-columns/PROMPT.md b/templates/detect-columns/PROMPT.md index 0fdc8ca..809287e 100644 --- a/templates/detect-columns/PROMPT.md +++ b/templates/detect-columns/PROMPT.md @@ -27,14 +27,16 @@ All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, You must have: -1. Vision capability: load the page image as raw bytes and measure pixel coordinates on it. +1. Vision capability: load the page image as raw bytes and measure coordinates programmatically from the full-resolution pixel data. Any image preview rendered back to you is downsampled — never read coordinates off a previewed image by eye. 2. HTTP POST capability with `Content-Type: application/json`. +Use only tools already available in your environment. Do not install packages, libraries, or system utilities (`pip`, `npm`, `apt`, `brew`, `cargo`, `--break-system-packages`, etc.) — if a required capability is missing, stop and return a failure report naming it rather than installing anything. + If any precondition fails, stop and return a concise failure report. ## Steps -1. Fetch the page image. Read its actual pixel dimensions (`img_w`, `img_h`) — the IIIF server may return a scaled rendering, not the canvas-native resolution. +1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, prefer a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. When you need to inspect a specific region at full fidelity, request it server-side as `{base}/x,y,w,h/max/0/default.jpg` rather than downloading the whole page and cropping locally. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. If you measured coordinates inside a region crop, add the crop's `x,y` origin back before applying the canvas conversion below. 2. Detect main text column regions in reading order in image-pixel space. If the page visibly has a single text block, create one column containing every existing line id — do not subdivide. 3. Convert every detected column region to integer canvas coordinates using: - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` diff --git a/templates/detect-lines/PROMPT.md b/templates/detect-lines/PROMPT.md index e7e8a24..b11b899 100644 --- a/templates/detect-lines/PROMPT.md +++ b/templates/detect-lines/PROMPT.md @@ -15,14 +15,16 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: -1. Vision capability: load the page image as raw bytes and measure pixel coordinates on it. +1. Vision capability: load the page image as raw bytes and measure coordinates programmatically from the full-resolution pixel data. Any image preview rendered back to you is downsampled — never read coordinates off a previewed image by eye. 2. HTTP PUT capability with `Content-Type: application/json`. +Use only tools already available in your environment. Do not install packages, libraries, or system utilities (`pip`, `npm`, `apt`, `brew`, `cargo`, `--break-system-packages`, etc.) — if a required capability is missing, stop and return a failure report naming it rather than installing anything. + If any precondition fails, stop and return a concise failure report. ## Steps -1. Fetch the page image. Read its actual pixel dimensions (`img_w`, `img_h`) — the IIIF server may return a scaled rendering, not the canvas-native resolution. +1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, prefer a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. When you need to inspect a specific region at full fidelity, request it server-side as `{base}/x,y,w,h/max/0/default.jpg` rather than downloading the whole page and cropping locally. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. If you measured coordinates inside a region crop, add the crop's `x,y` origin back before applying the canvas conversion below. 2. Detect every text line in reading order and measure each line's bounding box in image-pixel space. 3. Convert every bounding box to integer canvas coordinates using: - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` diff --git a/templates/transcribe-known-lines/PROMPT.md b/templates/transcribe-known-lines/PROMPT.md index 00b3957..e6cc69d 100644 --- a/templates/transcribe-known-lines/PROMPT.md +++ b/templates/transcribe-known-lines/PROMPT.md @@ -23,14 +23,16 @@ All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, You must have: -1. Vision capability: load the page image as raw bytes and crop/inspect per-line regions. A fetcher that returns only a prose description of the image does not count. +1. Vision capability: load the page image as raw bytes and crop/inspect per-line regions. A fetcher that returns only a prose description of the image does not count. Any image preview rendered back to you is downsampled — never read coordinates off a previewed image by eye. 2. HTTP PATCH capability (with `Content-Type: text/plain`). +Use only tools already available in your environment. Do not install packages, libraries, or system utilities (`pip`, `npm`, `apt`, `brew`, `cargo`, `--break-system-packages`, etc.) — if a required capability is missing, stop and return a failure report naming it rather than installing anything. + If any precondition fails, stop and return a concise failure report naming the missing capability. ## Steps -1. Fetch the page image. Read its actual pixel dimensions (`img_w`, `img_h`) — the IIIF server may return a scaled rendering, not the canvas-native resolution. The `xywh` selectors above are in canvas space; convert each to image-pixel space before cropping using: +1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}` and GET `{base}/info.json` for the dimensions; fetch each line's region server-side as `{base}/x,y,w,h/max/0/default.jpg` rather than downloading the whole page and cropping locally. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. The `xywh` selectors above are in canvas space; convert each to image-pixel space (for the IIIF region URL or the local crop) using: - `pixel_x = round(canvas_x * img_w / {{canvasWidth}})` - `pixel_y = round(canvas_y * img_h / {{canvasHeight}})` - `pixel_w = round(canvas_w * img_w / {{canvasWidth}})` From a36c1a1f3c9a065e11d9126cae39b82ae1f851ab Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Tue, 21 Apr 2026 20:49:16 -0500 Subject: [PATCH 06/47] Tuning round 3 --- templates/detect-and-transcribe/PROMPT.md | 24 +++++++++++--------- templates/detect-columns-and-lines/PROMPT.md | 14 +++++++----- templates/detect-columns/PROMPT.md | 11 +++++---- templates/detect-columns/index.js | 2 +- templates/detect-lines/PROMPT.md | 10 ++++---- templates/inject-context.js | 10 ++++---- templates/transcribe-known-lines/PROMPT.md | 6 ++--- templates/transcribe-known-lines/index.js | 2 +- 8 files changed, 43 insertions(+), 36 deletions(-) diff --git a/templates/detect-and-transcribe/PROMPT.md b/templates/detect-and-transcribe/PROMPT.md index 5241c56..11ad66e 100644 --- a/templates/detect-and-transcribe/PROMPT.md +++ b/templates/detect-and-transcribe/PROMPT.md @@ -15,7 +15,7 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: -1. Vision capability: load the page image as raw bytes, measure pixel coordinates programmatically from the full-resolution data, and crop/inspect per-line regions. Any image preview rendered back to you is downsampled — never read coordinates off a previewed image by eye. +1. Programmatic pixel measurement. You must be able to open the full-resolution image and read its pixel array directly (e.g. an image-decoding library that yields a 2D/3D numeric buffer your code can iterate over). Reading the file bytes is not enough — you need pixel access. **Eyeballing coordinates from any rendered/previewed image is forbidden and counts as a missing capability**, because every preview shown back to you is downsampled and visually estimated bounds will be wrong. Run a one-line probe that proves you can read pixel data programmatically. If the probe fails — module not found, no decoder available, or any other reason you cannot get a numeric pixel array out of the image without installing anything — stop immediately. You may suggestion options for your given environment or LLM capabilities in your failure report. 2. HTTP PUT capability with `Content-Type: application/json`. Use only tools already available in your environment. Do not install packages, libraries, or system utilities (`pip`, `npm`, `apt`, `brew`, `cargo`, `--break-system-packages`, etc.) — if a required capability is missing, stop and return a failure report naming it rather than installing anything. @@ -24,26 +24,28 @@ If any precondition fails, stop and return a concise failure report. ## Steps -1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, prefer a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. When you need to inspect or transcribe a specific region at full fidelity, request it server-side as `{base}/x,y,w,h/max/0/default.jpg` rather than downloading the whole page and cropping locally. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. If you measured coordinates inside a region crop, add the crop's `x,y` origin back before applying the canvas conversion below. Detect every text line in reading order and measure each line's bounding box in image-pixel space. -2. Convert every bounding box to integer canvas coordinates using: +1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, prefer a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. When you need to inspect a specific region at full fidelity, request it server-side as `{base}/x,y,w,h/max/0/default.jpg` rather than downloading the whole page and cropping locally. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. If you measured coordinates inside a region crop, add the crop's `x,y` origin back before applying the canvas conversion below. +2. Detect text lines across the whole page in reading order. This task does not create TPEN columns. +3. For every line, measure a bounding box in image-pixel space and convert to integer canvas coordinates using: - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). -3. Run handwriting text recognition on each line's crop. Apply the recognition rules below. -4. Build one Annotation per line with the recognized text as the `TextualBody` value and `xywh=x,y,w,h` (integer canvas coordinates). -5. PUT the full set of line annotations to the page endpoint in a single request. -6. Report counts (lines saved) and notable ambiguities (e.g., illegible lines transcribed as empty or flagged). +4. Run handwriting text recognition on each line's crop. Apply the recognition rules below. +5. Build one Annotation per line with the recognized text as the `TextualBody` value and `xywh=x,y,w,h` as the bounding box fragment selector. +6. PUT every detected line to the page endpoint in a single request (see TPEN API below). +7. Report counts: lines saved, lines with non-empty text, lines flagged uncertain. +8. Report notable ambiguities (e.g., illegible lines transcribed as empty or flagged). ## Rules ### Detection (IMAGE_ANALYSIS) - Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value. -- Preserve reading order. Prefer high recall for likely text lines over aggressive pruning. -- Keep line boxes tight but do not clip ascenders/descenders. -- Include borderline regions rather than silently dropping them. +- Preserve reading order across the whole page. +- Lines must be tight. Bound the actual text stroke run and nothing more. Never emit a single line that covers what a human reader would call two or more lines; when uncertain whether a tall run is one line or several, split it. +- Do not include decorative borders, frame rules, ornaments, illustrations, or the inter-line whitespace above/below text as part of a line. ### Recognition (HANDWRITING_TEXT_RECOGNITION) @@ -56,7 +58,7 @@ If any precondition fails, stop and return a concise failure report. ## TPEN API -Save every detected line with its transcription in a single PUT. The `items` array must contain one annotation per detected line; replace `x,y,w,h` with the integer canvas coordinates computed in step 2, and `` with the recognized text (empty string for fully illegible lines). +Save every detected line with its transcription in a single PUT. The `items` array must contain one annotation per detected line; replace `x,y,w,h` with the integer canvas coordinates computed in step 3, and `` with the recognized text (empty string for fully illegible lines). ``` PUT {{pageEndpoint}} diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index 5837541..8b1c225 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -19,7 +19,7 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: -1. Vision capability: load the page image as raw bytes and measure coordinates programmatically from the full-resolution pixel data. Any image preview rendered back to you is downsampled — never read coordinates off a previewed image by eye. +1. Programmatic pixel measurement. You must be able to open the full-resolution image and read its pixel array directly (e.g. an image-decoding library that yields a 2D/3D numeric buffer your code can iterate over). Reading the file bytes is not enough — you need pixel access. **Eyeballing coordinates from any rendered/previewed image is forbidden and counts as a missing capability**, because every preview shown back to you is downsampled and visually estimated bounds will be wrong. Run a one-line probe that proves you can read pixel data programmatically. If the probe fails — module not found, no decoder available, or any other reason you cannot get a numeric pixel array out of the image without installing anything — stop immediately. You may suggestion options for your given environment or LLM capabilities in your failure report. 2. HTTP POST and PUT capability with `Content-Type: application/json`. Use only tools already available in your environment. Do not install packages, libraries, or system utilities (`pip`, `npm`, `apt`, `brew`, `cargo`, `--break-system-packages`, etc.) — if a required capability is missing, stop and return a failure report naming it rather than installing anything. @@ -36,8 +36,8 @@ If any precondition fails, stop and return a concise failure report. - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). -4. PUT every detected line to the page endpoint in a single request (see TPEN API below). The response returns line ids in the same order as the submitted `items` — use positional mapping to recover ids per column index. -5. For each column, POST `{ label, annotations }` where `annotations` is the server-assigned line ids that belong to that column index. Labels must be unique and must not clash with anything in "Existing columns on this page". +4. PUT every detected line to the page endpoint in a single request (see TPEN API below). Leave `body` empty — no text yet. The response returns line ids in the same order as the submitted `items` — use positional mapping to recover ids per column index. +5. For each column, POST `{ label, annotations }` where each entry in `annotations` is the full annotation id (URI) returned by the PUT — not a trailing-segment shorthand. Labels must be unique and must not clash with anything in "Existing columns on this page". 6. Report counts: lines saved, columns created, and any failures. ## Rules @@ -46,8 +46,10 @@ If any precondition fails, stop and return a concise failure report. - Column labels are page-scoped and must be unique. Do not duplicate an existing column label. - Each line annotation belongs to at most one column. - Preserve reading order across columns and within each column. -- Prefer high recall: include borderline columns/lines rather than silently dropping them. -- Keep line boxes tight enough for line-level recognition but generous enough not to clip ascenders/descenders. +- Line geometry is the primary accuracy target. Column grouping is secondary — for a single-column page, one column containing every line is correct. +- Lines must be tight. Bound the actual text stroke run and nothing more. Never emit a single line that covers what a human reader would call two or more lines; when uncertain whether a tall run is one line or several, split it. +- Do not include decorative borders, frame rules, ornaments, illustrations, or the inter-line whitespace above/below text as part of a line. +- Do not POST a column with an empty `annotations` array — the server rejects it. Skip any detected column that ends up with zero assigned lines. ## TPEN API @@ -88,7 +90,7 @@ Content-Type: application/json { "label": "Column A", - "annotations": ["", ""] + "annotations": ["", ""] } ``` diff --git a/templates/detect-columns/PROMPT.md b/templates/detect-columns/PROMPT.md index 809287e..0d5261b 100644 --- a/templates/detect-columns/PROMPT.md +++ b/templates/detect-columns/PROMPT.md @@ -17,7 +17,7 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Existing lines -Each entry is `: ` in canvas coordinates. Use these ids verbatim when assigning lines to columns. +Each entry is `: ` in canvas coordinates. Use the full annotation URI verbatim when assigning lines to columns. {{existingLines}} @@ -27,7 +27,7 @@ All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, You must have: -1. Vision capability: load the page image as raw bytes and measure coordinates programmatically from the full-resolution pixel data. Any image preview rendered back to you is downsampled — never read coordinates off a previewed image by eye. +1. Programmatic pixel measurement. You must be able to open the full-resolution image and read its pixel array directly (e.g. an image-decoding library that yields a 2D/3D numeric buffer your code can iterate over). Reading the file bytes is not enough — you need pixel access. **Eyeballing coordinates from any rendered/previewed image is forbidden and counts as a missing capability**, because every preview shown back to you is downsampled and visually estimated bounds will be wrong. Run a one-line probe that proves you can read pixel data programmatically. If the probe fails — module not found, no decoder available, or any other reason you cannot get a numeric pixel array out of the image without installing anything — stop immediately. You may suggestion options for your given environment or LLM capabilities in your failure report. 2. HTTP POST capability with `Content-Type: application/json`. Use only tools already available in your environment. Do not install packages, libraries, or system utilities (`pip`, `npm`, `apt`, `brew`, `cargo`, `--break-system-packages`, etc.) — if a required capability is missing, stop and return a failure report naming it rather than installing anything. @@ -55,10 +55,11 @@ If any precondition fails, stop and return a concise failure report. - Keep column boundaries tight enough that each line clearly belongs to one column, but generous enough to avoid clipping existing line selectors. - Column labels are page-scoped and must be unique. Do not duplicate an existing column label. - Annotations cannot be assigned to more than one column. If a line clearly sits in an existing column, do not reassign it. +- Do not POST a column with an empty `annotations` array — the server rejects it. Skip any detected column that ends up with zero assigned lines. ## TPEN API -Create one POST per detected column. Each `annotations` array contains the line ids assigned to that column in step 4, taken verbatim from the "Existing lines" list above (trailing id segment only). +Create one POST per detected column. Each `annotations` array contains the full annotation URIs assigned to that column in step 4, taken verbatim from the "Existing lines" list above. ``` POST {{pageEndpoint}}/column @@ -67,11 +68,11 @@ Content-Type: application/json { "label": "Column A", - "annotations": ["", "", ""] + "annotations": ["", "", ""] } ``` -Each `` is the trailing id segment of a line annotation listed above. +Each `` is the full id of a line annotation listed above, used verbatim. On any non-2xx response, stop the column in progress and include the HTTP status and response body in the failure report. diff --git a/templates/detect-columns/index.js b/templates/detect-columns/index.js index 0afc02d..6ef3271 100644 --- a/templates/detect-columns/index.js +++ b/templates/detect-columns/index.js @@ -12,7 +12,7 @@ import { buildTemplateContext, formatExistingColumns, formatExistingLines } from /** @type {import('../../prompt-generator.js').PromptTemplate} */ export const detectColumnsTemplate = { id: 'detect-columns', - label: 'Group Lines Into Columns', + label: 'Group Existing Lines Into Columns', templateUrl: new URL('./PROMPT.md', import.meta.url), buildContext: (ctx) => ({ ...buildTemplateContext(ctx), diff --git a/templates/detect-lines/PROMPT.md b/templates/detect-lines/PROMPT.md index b11b899..3216539 100644 --- a/templates/detect-lines/PROMPT.md +++ b/templates/detect-lines/PROMPT.md @@ -15,7 +15,7 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: -1. Vision capability: load the page image as raw bytes and measure coordinates programmatically from the full-resolution pixel data. Any image preview rendered back to you is downsampled — never read coordinates off a previewed image by eye. +1. Programmatic pixel measurement. You must be able to open the full-resolution image and read its pixel array directly (e.g. an image-decoding library that yields a 2D/3D numeric buffer your code can iterate over). Reading the file bytes is not enough — you need pixel access. **Eyeballing coordinates from any rendered/previewed image is forbidden and counts as a missing capability**, because every preview shown back to you is downsampled and visually estimated bounds will be wrong. Run a one-line probe that proves you can read pixel data programmatically. If the probe fails — module not found, no decoder available, or any other reason you cannot get a numeric pixel array out of the image without installing anything — stop immediately. You may suggestion options for your given environment or LLM capabilities in your failure report. 2. HTTP PUT capability with `Content-Type: application/json`. Use only tools already available in your environment. Do not install packages, libraries, or system utilities (`pip`, `npm`, `apt`, `brew`, `cargo`, `--break-system-packages`, etc.) — if a required capability is missing, stop and return a failure report naming it rather than installing anything. @@ -25,8 +25,8 @@ If any precondition fails, stop and return a concise failure report. ## Steps 1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, prefer a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. When you need to inspect a specific region at full fidelity, request it server-side as `{base}/x,y,w,h/max/0/default.jpg` rather than downloading the whole page and cropping locally. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. If you measured coordinates inside a region crop, add the crop's `x,y` origin back before applying the canvas conversion below. -2. Detect every text line in reading order and measure each line's bounding box in image-pixel space. -3. Convert every bounding box to integer canvas coordinates using: +2. Detect text lines across the whole page in reading order. This task does not create TPEN columns. +3. For every line, measure a bounding box in image-pixel space and convert to integer canvas coordinates using: - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` @@ -39,8 +39,8 @@ If any precondition fails, stop and return a concise failure report. - Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value. - Preserve reading order across the whole page. -- Keep each line box tight enough for line-level recognition — do not merge adjacent lines — but generous enough not to clip ascenders/descenders. -- Prefer high recall: include borderline lines rather than silently dropping them. +- Lines must be tight. Bound the actual text stroke run and nothing more. Never emit a single line that covers what a human reader would call two or more lines; when uncertain whether a tall run is one line or several, split it. +- Do not include decorative borders, frame rules, ornaments, illustrations, or the inter-line whitespace above/below text as part of a line. ## TPEN API diff --git a/templates/inject-context.js b/templates/inject-context.js index 84a8ed5..9f86193 100644 --- a/templates/inject-context.js +++ b/templates/inject-context.js @@ -97,8 +97,10 @@ function extractXywh(item) { /** * Render the current line annotations on a page as a markdown bullet list - * keyed by trailing line id and xywh selector. Pre-resolving this list in the - * parent saves the LLM a GET + parse round trip. + * keyed by full annotation URI and xywh selector. Pre-resolving this list in + * the parent saves the LLM a GET + parse round trip. Column POSTs require the + * full URI to match `page.items[].id` server-side; PATCH-line-text consumers + * can split the URI's trailing segment themselves. * @param {any} fetchedPage the page object returned by `fetchPageResolved`. * @returns {string} */ @@ -108,9 +110,9 @@ export function formatExistingLines(fetchedPage) { return '- (No existing lines on this page.)' } return items.map(item => { - const lineId = trailingId(item) ?? '(unknown)' + const lineUri = getIRI(item) ?? '(unknown)' const xywh = extractXywh(item) ?? '(no xywh selector)' - return `- ${lineId}: ${xywh}` + return `- ${lineUri}: ${xywh}` }).join('\n') } diff --git a/templates/transcribe-known-lines/PROMPT.md b/templates/transcribe-known-lines/PROMPT.md index e6cc69d..eb0a098 100644 --- a/templates/transcribe-known-lines/PROMPT.md +++ b/templates/transcribe-known-lines/PROMPT.md @@ -13,7 +13,7 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Existing lines -Each entry is `: ` in canvas coordinates. +Each entry is `: ` in canvas coordinates. {{existingLines}} @@ -23,7 +23,7 @@ All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, You must have: -1. Vision capability: load the page image as raw bytes and crop/inspect per-line regions. A fetcher that returns only a prose description of the image does not count. Any image preview rendered back to you is downsampled — never read coordinates off a previewed image by eye. +1. Vision capability: fetch each line's region as image bytes (e.g. via a IIIF region URL) and read the inked glyphs directly. A fetcher that returns only a prose description of the image does not count, and any preview rendered back into chat is downsampled — do not transcribe from a preview. 2. HTTP PATCH capability (with `Content-Type: text/plain`). Use only tools already available in your environment. Do not install packages, libraries, or system utilities (`pip`, `npm`, `apt`, `brew`, `cargo`, `--break-system-packages`, etc.) — if a required capability is missing, stop and return a failure report naming it rather than installing anything. @@ -62,7 +62,7 @@ Content-Type: text/plain ``` -`` is the trailing id segment of the annotation's id (the last path segment of the annotation URI). +`` is the trailing path segment of the annotation URI listed above (the last `/`-separated segment). On any non-2xx response, include the HTTP status and response body in that line's failure report. diff --git a/templates/transcribe-known-lines/index.js b/templates/transcribe-known-lines/index.js index 978dbe9..b72b0a3 100644 --- a/templates/transcribe-known-lines/index.js +++ b/templates/transcribe-known-lines/index.js @@ -12,7 +12,7 @@ import { buildTemplateContext, formatExistingLines } from '../inject-context.js' /** @type {import('../../prompt-generator.js').PromptTemplate} */ export const transcribeKnownLinesTemplate = { id: 'transcribe-known-lines', - label: 'Transcribe Existing Line Detection', + label: 'Transcribe Over Existing Lines', templateUrl: new URL('./PROMPT.md', import.meta.url), buildContext: ctx => ({ ...buildTemplateContext(ctx), From b5d8ff97a8c719c8c37fd9d8812a99510c1a43d1 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Tue, 21 Apr 2026 21:10:53 -0500 Subject: [PATCH 07/47] Changes from review --- main.js | 13 +++++-------- templates/detect-and-transcribe/PROMPT.md | 4 ++-- templates/detect-columns-and-lines/PROMPT.md | 4 ++-- templates/detect-columns/PROMPT.md | 4 ++-- templates/detect-lines/PROMPT.md | 4 ++-- templates/inject-context.js | 10 ---------- templates/transcribe-known-lines/PROMPT.md | 4 ++-- 7 files changed, 15 insertions(+), 28 deletions(-) diff --git a/main.js b/main.js index fe0bdfb..39ae391 100644 --- a/main.js +++ b/main.js @@ -110,17 +110,14 @@ export class PromptsApp { return } - const hasSelector = item => { - const t = item?.target - if (typeof t === 'string') return t.includes('#xywh=') - const sel = t?.selector - const val = Array.isArray(sel) ? sel[0]?.value : sel?.value - return typeof val === 'string' && val.includes('xywh=') - } + // Hydrated items carry more than `{id, type}`; unhydrated vault refs + // carry only those two keys. + const isHydratedItem = it => it && typeof it === 'object' && + (it.target !== undefined || it.body !== undefined || it.motivation !== undefined) const isPageHydrated = p => Array.isArray(p?.items) && - (p.items.length === 0 || hasSelector(p.items[0])) + (p.items.length === 0 || isHydratedItem(p.items[0])) const isProjectHydrated = p => Array.isArray(p?.layers) && diff --git a/templates/detect-and-transcribe/PROMPT.md b/templates/detect-and-transcribe/PROMPT.md index 11ad66e..c6ec445 100644 --- a/templates/detect-and-transcribe/PROMPT.md +++ b/templates/detect-and-transcribe/PROMPT.md @@ -15,10 +15,10 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: -1. Programmatic pixel measurement. You must be able to open the full-resolution image and read its pixel array directly (e.g. an image-decoding library that yields a 2D/3D numeric buffer your code can iterate over). Reading the file bytes is not enough — you need pixel access. **Eyeballing coordinates from any rendered/previewed image is forbidden and counts as a missing capability**, because every preview shown back to you is downsampled and visually estimated bounds will be wrong. Run a one-line probe that proves you can read pixel data programmatically. If the probe fails — module not found, no decoder available, or any other reason you cannot get a numeric pixel array out of the image without installing anything — stop immediately. You may suggestion options for your given environment or LLM capabilities in your failure report. +1. Programmatic pixel access to the full-resolution image — a numeric pixel buffer you can iterate over. A prose description of the image, or any measurement taken from a rendered or previewed image, does not qualify; previews are downsampled and visually estimated bounds will be wrong. **If you cannot obtain pixel data with the capabilities already available to you, stop now and return a failure report naming the missing capability.** 2. HTTP PUT capability with `Content-Type: application/json`. -Use only tools already available in your environment. Do not install packages, libraries, or system utilities (`pip`, `npm`, `apt`, `brew`, `cargo`, `--break-system-packages`, etc.) — if a required capability is missing, stop and return a failure report naming it rather than installing anything. +Use only tools already available in your environment. Do not install packages, libraries, or system utilities. If a required capability is missing, stop and return a failure report naming it rather than installing anything. If any precondition fails, stop and return a concise failure report. diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index 8b1c225..52c4f6b 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -19,10 +19,10 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: -1. Programmatic pixel measurement. You must be able to open the full-resolution image and read its pixel array directly (e.g. an image-decoding library that yields a 2D/3D numeric buffer your code can iterate over). Reading the file bytes is not enough — you need pixel access. **Eyeballing coordinates from any rendered/previewed image is forbidden and counts as a missing capability**, because every preview shown back to you is downsampled and visually estimated bounds will be wrong. Run a one-line probe that proves you can read pixel data programmatically. If the probe fails — module not found, no decoder available, or any other reason you cannot get a numeric pixel array out of the image without installing anything — stop immediately. You may suggestion options for your given environment or LLM capabilities in your failure report. +1. Programmatic pixel access to the full-resolution image — a numeric pixel buffer you can iterate over. A prose description of the image, or any measurement taken from a rendered or previewed image, does not qualify; previews are downsampled and visually estimated bounds will be wrong. **If you cannot obtain pixel data with the capabilities already available to you, stop now and return a failure report naming the missing capability.** 2. HTTP POST and PUT capability with `Content-Type: application/json`. -Use only tools already available in your environment. Do not install packages, libraries, or system utilities (`pip`, `npm`, `apt`, `brew`, `cargo`, `--break-system-packages`, etc.) — if a required capability is missing, stop and return a failure report naming it rather than installing anything. +Use only tools already available in your environment. Do not install packages, libraries, or system utilities. If a required capability is missing, stop and return a failure report naming it rather than installing anything. If any precondition fails, stop and return a concise failure report. diff --git a/templates/detect-columns/PROMPT.md b/templates/detect-columns/PROMPT.md index 0d5261b..9a4087f 100644 --- a/templates/detect-columns/PROMPT.md +++ b/templates/detect-columns/PROMPT.md @@ -27,10 +27,10 @@ All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, You must have: -1. Programmatic pixel measurement. You must be able to open the full-resolution image and read its pixel array directly (e.g. an image-decoding library that yields a 2D/3D numeric buffer your code can iterate over). Reading the file bytes is not enough — you need pixel access. **Eyeballing coordinates from any rendered/previewed image is forbidden and counts as a missing capability**, because every preview shown back to you is downsampled and visually estimated bounds will be wrong. Run a one-line probe that proves you can read pixel data programmatically. If the probe fails — module not found, no decoder available, or any other reason you cannot get a numeric pixel array out of the image without installing anything — stop immediately. You may suggestion options for your given environment or LLM capabilities in your failure report. +1. Programmatic pixel access to the full-resolution image — a numeric pixel buffer you can iterate over. A prose description of the image, or any measurement taken from a rendered or previewed image, does not qualify; previews are downsampled and visually estimated bounds will be wrong. **If you cannot obtain pixel data with the capabilities already available to you, stop now and return a failure report naming the missing capability.** 2. HTTP POST capability with `Content-Type: application/json`. -Use only tools already available in your environment. Do not install packages, libraries, or system utilities (`pip`, `npm`, `apt`, `brew`, `cargo`, `--break-system-packages`, etc.) — if a required capability is missing, stop and return a failure report naming it rather than installing anything. +Use only tools already available in your environment. Do not install packages, libraries, or system utilities. If a required capability is missing, stop and return a failure report naming it rather than installing anything. If any precondition fails, stop and return a concise failure report. diff --git a/templates/detect-lines/PROMPT.md b/templates/detect-lines/PROMPT.md index 3216539..05e3cf6 100644 --- a/templates/detect-lines/PROMPT.md +++ b/templates/detect-lines/PROMPT.md @@ -15,10 +15,10 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: -1. Programmatic pixel measurement. You must be able to open the full-resolution image and read its pixel array directly (e.g. an image-decoding library that yields a 2D/3D numeric buffer your code can iterate over). Reading the file bytes is not enough — you need pixel access. **Eyeballing coordinates from any rendered/previewed image is forbidden and counts as a missing capability**, because every preview shown back to you is downsampled and visually estimated bounds will be wrong. Run a one-line probe that proves you can read pixel data programmatically. If the probe fails — module not found, no decoder available, or any other reason you cannot get a numeric pixel array out of the image without installing anything — stop immediately. You may suggestion options for your given environment or LLM capabilities in your failure report. +1. Programmatic pixel access to the full-resolution image — a numeric pixel buffer you can iterate over. A prose description of the image, or any measurement taken from a rendered or previewed image, does not qualify; previews are downsampled and visually estimated bounds will be wrong. **If you cannot obtain pixel data with the capabilities already available to you, stop now and return a failure report naming the missing capability.** 2. HTTP PUT capability with `Content-Type: application/json`. -Use only tools already available in your environment. Do not install packages, libraries, or system utilities (`pip`, `npm`, `apt`, `brew`, `cargo`, `--break-system-packages`, etc.) — if a required capability is missing, stop and return a failure report naming it rather than installing anything. +Use only tools already available in your environment. Do not install packages, libraries, or system utilities. If a required capability is missing, stop and return a failure report naming it rather than installing anything. If any precondition fails, stop and return a concise failure report. diff --git a/templates/inject-context.js b/templates/inject-context.js index 9f86193..eebd863 100644 --- a/templates/inject-context.js +++ b/templates/inject-context.js @@ -10,7 +10,6 @@ * @author thehabes */ -import { getAgentIRIFromToken } from '../auth.js' import { getIRI, trailingId } from '../iiif-ids.js' /** @@ -49,12 +48,6 @@ export function buildTemplateContext(ctx) { const canvasWidth = width != null ? String(width) : '(unknown)' const canvasHeight = height != null ? String(height) : '(unknown)' const dims = (width && height) ? `${width} × ${height}` : 'unknown (use the IIIF Image API info.json)' - const projectManifest = Array.isArray(project?.manifest) ? project.manifest[0] : project?.manifest - const manifestUri = getIRI(canvas?.partOf) ?? getIRI(projectManifest) ?? '(unknown manifest URI)' - const userAgentURI = getAgentIRIFromToken(token) ?? '(unable to resolve agent IRI from token)' - const canvasDimsResolution = (width && height) - ? `Canvas dimensions are already resolved as ${width} × ${height} — use these values directly; no fetch required.` - : `Canvas dimensions unknown. GET \`${canvasId}\` and read \`width\`/\`height\`. If that fails, GET \`${manifestUri}\` and find the matching canvas in \`items\` by id.` const lineCount = Array.isArray(page?.items) ? page.items.length : 0 return { projectID: projectID ?? '', @@ -64,9 +57,6 @@ export function buildTemplateContext(ctx) { canvasWidth, canvasHeight, dims, - canvasDimsResolution, - manifestUri, - userAgentURI, lineCount: String(lineCount), projectEndpoint: projectEndpoint ?? '(unknown project endpoint)', pageEndpoint: pageEndpoint ?? '(unknown page endpoint)', diff --git a/templates/transcribe-known-lines/PROMPT.md b/templates/transcribe-known-lines/PROMPT.md index eb0a098..6588f3f 100644 --- a/templates/transcribe-known-lines/PROMPT.md +++ b/templates/transcribe-known-lines/PROMPT.md @@ -23,10 +23,10 @@ All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, You must have: -1. Vision capability: fetch each line's region as image bytes (e.g. via a IIIF region URL) and read the inked glyphs directly. A fetcher that returns only a prose description of the image does not count, and any preview rendered back into chat is downsampled — do not transcribe from a preview. +1. Vision capability: fetch each line's region as image bytes (e.g. via a IIIF region URL) and read the inked glyphs directly. A fetcher that returns only a prose description of the image does not qualify, and any preview rendered back into chat is downsampled — do not transcribe from a preview. **If you cannot read image bytes directly with the capabilities already available to you, stop now and return a failure report naming the missing capability.** 2. HTTP PATCH capability (with `Content-Type: text/plain`). -Use only tools already available in your environment. Do not install packages, libraries, or system utilities (`pip`, `npm`, `apt`, `brew`, `cargo`, `--break-system-packages`, etc.) — if a required capability is missing, stop and return a failure report naming it rather than installing anything. +Use only tools already available in your environment. Do not install packages, libraries, or system utilities. If a required capability is missing, stop and return a failure report naming it rather than installing anything. If any precondition fails, stop and return a concise failure report naming the missing capability. From ce1d9c26061de7d3e122a25740f7a17248c192bc Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Tue, 21 Apr 2026 21:55:46 -0500 Subject: [PATCH 08/47] Now that's a demo --- templates/detect-lines/PROMPT.md | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/templates/detect-lines/PROMPT.md b/templates/detect-lines/PROMPT.md index 05e3cf6..508ace1 100644 --- a/templates/detect-lines/PROMPT.md +++ b/templates/detect-lines/PROMPT.md @@ -15,16 +15,14 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: -1. Programmatic pixel access to the full-resolution image — a numeric pixel buffer you can iterate over. A prose description of the image, or any measurement taken from a rendered or previewed image, does not qualify; previews are downsampled and visually estimated bounds will be wrong. **If you cannot obtain pixel data with the capabilities already available to you, stop now and return a failure report naming the missing capability.** +1. Ability to fetch the image bytes (or a derivative) and identify line bounds from them. Precise pixel measurement is preferred when available; visual estimation from the fetched image is acceptable otherwise. 2. HTTP PUT capability with `Content-Type: application/json`. -Use only tools already available in your environment. Do not install packages, libraries, or system utilities. If a required capability is missing, stop and return a failure report naming it rather than installing anything. - -If any precondition fails, stop and return a concise failure report. +Use only tools already available in your environment. Do not install packages, libraries, or system utilities. If a required capability is genuinely missing (e.g. no way to issue an HTTP PUT), stop and return a failure report naming it rather than installing anything. ## Steps -1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, prefer a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. When you need to inspect a specific region at full fidelity, request it server-side as `{base}/x,y,w,h/max/0/default.jpg` rather than downloading the whole page and cropping locally. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. If you measured coordinates inside a region crop, add the crop's `x,y` origin back before applying the canvas conversion below. +1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, fetch a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. If you have precise pixel tooling and want tighter bounds on a region, request it server-side as `{base}/x,y,w,h/max/0/default.jpg` and add the crop's `x,y` origin back before applying the canvas conversion below. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. 2. Detect text lines across the whole page in reading order. This task does not create TPEN columns. 3. For every line, measure a bounding box in image-pixel space and convert to integer canvas coordinates using: - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` @@ -39,8 +37,9 @@ If any precondition fails, stop and return a concise failure report. - Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value. - Preserve reading order across the whole page. -- Lines must be tight. Bound the actual text stroke run and nothing more. Never emit a single line that covers what a human reader would call two or more lines; when uncertain whether a tall run is one line or several, split it. -- Do not include decorative borders, frame rules, ornaments, illustrations, or the inter-line whitespace above/below text as part of a line. +- Prefer tight bounds when you can measure them; best-effort bounds are acceptable. When uncertain whether a tall run is one line or several, prefer splitting over merging. +- Do not include decorative borders, frame rules, ornaments, or illustrations as part of a line. +- Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected downstream. ## TPEN API From d565c74654e8dd3229c4441eabcb40056687b07d Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Tue, 21 Apr 2026 23:16:33 -0500 Subject: [PATCH 09/47] Now that's a demo --- templates/detect-and-transcribe/PROMPT.md | 13 ++++++------- templates/detect-columns-and-lines/PROMPT.md | 13 ++++++------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/templates/detect-and-transcribe/PROMPT.md b/templates/detect-and-transcribe/PROMPT.md index c6ec445..9e3366f 100644 --- a/templates/detect-and-transcribe/PROMPT.md +++ b/templates/detect-and-transcribe/PROMPT.md @@ -15,16 +15,14 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: -1. Programmatic pixel access to the full-resolution image — a numeric pixel buffer you can iterate over. A prose description of the image, or any measurement taken from a rendered or previewed image, does not qualify; previews are downsampled and visually estimated bounds will be wrong. **If you cannot obtain pixel data with the capabilities already available to you, stop now and return a failure report naming the missing capability.** +1. Ability to fetch the image bytes (or a derivative) and identify line bounds and text from them. Precise pixel measurement is preferred when available; visual estimation and on-sight transcription from the fetched image are acceptable otherwise. 2. HTTP PUT capability with `Content-Type: application/json`. -Use only tools already available in your environment. Do not install packages, libraries, or system utilities. If a required capability is missing, stop and return a failure report naming it rather than installing anything. - -If any precondition fails, stop and return a concise failure report. +Use only tools already available in your environment. Do not install packages, libraries, or system utilities. If a required capability is genuinely missing (e.g. no way to issue an HTTP PUT), stop and return a failure report naming it rather than installing anything. ## Steps -1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, prefer a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. When you need to inspect a specific region at full fidelity, request it server-side as `{base}/x,y,w,h/max/0/default.jpg` rather than downloading the whole page and cropping locally. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. If you measured coordinates inside a region crop, add the crop's `x,y` origin back before applying the canvas conversion below. +1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, fetch a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. If you have precise pixel tooling and want tighter bounds or a clearer crop for transcription, request a region server-side as `{base}/x,y,w,h/max/0/default.jpg` and add the crop's `x,y` origin back before applying the canvas conversion below. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. 2. Detect text lines across the whole page in reading order. This task does not create TPEN columns. 3. For every line, measure a bounding box in image-pixel space and convert to integer canvas coordinates using: - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` @@ -44,8 +42,9 @@ If any precondition fails, stop and return a concise failure report. - Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value. - Preserve reading order across the whole page. -- Lines must be tight. Bound the actual text stroke run and nothing more. Never emit a single line that covers what a human reader would call two or more lines; when uncertain whether a tall run is one line or several, split it. -- Do not include decorative borders, frame rules, ornaments, illustrations, or the inter-line whitespace above/below text as part of a line. +- Prefer tight bounds when you can measure them; best-effort bounds are acceptable. When uncertain whether a tall run is one line or several, prefer splitting over merging. +- Do not include decorative borders, frame rules, ornaments, or illustrations as part of a line. +- Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected downstream. ### Recognition (HANDWRITING_TEXT_RECOGNITION) diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index 52c4f6b..c0b1488 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -19,16 +19,14 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: -1. Programmatic pixel access to the full-resolution image — a numeric pixel buffer you can iterate over. A prose description of the image, or any measurement taken from a rendered or previewed image, does not qualify; previews are downsampled and visually estimated bounds will be wrong. **If you cannot obtain pixel data with the capabilities already available to you, stop now and return a failure report naming the missing capability.** +1. Ability to fetch the image bytes (or a derivative) and identify line and column bounds from them. Precise pixel measurement is preferred when available; visual estimation from the fetched image is acceptable otherwise. 2. HTTP POST and PUT capability with `Content-Type: application/json`. -Use only tools already available in your environment. Do not install packages, libraries, or system utilities. If a required capability is missing, stop and return a failure report naming it rather than installing anything. - -If any precondition fails, stop and return a concise failure report. +Use only tools already available in your environment. Do not install packages, libraries, or system utilities. If a required capability is genuinely missing (e.g. no way to issue an HTTP PUT or POST), stop and return a failure report naming it rather than installing anything. ## Steps -1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, prefer a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. When you need to inspect a specific region at full fidelity, request it server-side as `{base}/x,y,w,h/max/0/default.jpg` rather than downloading the whole page and cropping locally. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. If you measured coordinates inside a region crop, add the crop's `x,y` origin back before applying the canvas conversion below. +1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, fetch a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. If you have precise pixel tooling and want tighter bounds on a region, request it server-side as `{base}/x,y,w,h/max/0/default.jpg` and add the crop's `x,y` origin back before applying the canvas conversion below. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. 2. Detect main text column regions in reading order first, then detect the lines inside each column (reading order preserved within each column). If the page visibly has a single text block, create one column containing every detected line — do not subdivide. Track each line's column index (an integer, 0-based) as you detect it. 3. For every line, measure a bounding box in image-pixel space and convert to integer canvas coordinates using: - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` @@ -47,9 +45,10 @@ If any precondition fails, stop and return a concise failure report. - Each line annotation belongs to at most one column. - Preserve reading order across columns and within each column. - Line geometry is the primary accuracy target. Column grouping is secondary — for a single-column page, one column containing every line is correct. -- Lines must be tight. Bound the actual text stroke run and nothing more. Never emit a single line that covers what a human reader would call two or more lines; when uncertain whether a tall run is one line or several, split it. -- Do not include decorative borders, frame rules, ornaments, illustrations, or the inter-line whitespace above/below text as part of a line. +- Prefer tight bounds when you can measure them; best-effort bounds are acceptable. When uncertain whether a tall run is one line or several, prefer splitting over merging. +- Do not include decorative borders, frame rules, ornaments, or illustrations as part of a line. - Do not POST a column with an empty `annotations` array — the server rejects it. Skip any detected column that ends up with zero assigned lines. +- Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected downstream. ## TPEN API From 9af6429ae86007bbffee6ea53751bb25e1eb50d6 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Tue, 21 Apr 2026 23:21:54 -0500 Subject: [PATCH 10/47] Now that's a demo --- templates/detect-and-transcribe/PROMPT.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/detect-and-transcribe/PROMPT.md b/templates/detect-and-transcribe/PROMPT.md index 9e3366f..e1e7168 100644 --- a/templates/detect-and-transcribe/PROMPT.md +++ b/templates/detect-and-transcribe/PROMPT.md @@ -85,7 +85,7 @@ Content-Type: application/json } ``` -On any non-2xx response, stop the operation in progress and include the HTTP status and response body in the failure report. +On any non-2xx response, stop and include the HTTP status and response body in the failure report. ## Completion From 0255000c52eff56dc46280a28d4e43a0e2cc06bf Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Wed, 22 Apr 2026 13:48:18 -0500 Subject: [PATCH 11/47] Order principle applied. New prompt to transcribe and include ordering principle. --- prompt-generator.js | 2 + templates/detect-columns-and-lines/PROMPT.md | 6 +- templates/detect-columns/PROMPT.md | 49 +++++++-- .../detect-order-and-transcribe/PROMPT.md | 103 ++++++++++++++++++ .../detect-order-and-transcribe/index.js | 19 ++++ 5 files changed, 167 insertions(+), 12 deletions(-) create mode 100644 templates/detect-order-and-transcribe/PROMPT.md create mode 100644 templates/detect-order-and-transcribe/index.js diff --git a/prompt-generator.js b/prompt-generator.js index fd3695c..72aec8d 100644 --- a/prompt-generator.js +++ b/prompt-generator.js @@ -14,6 +14,7 @@ import { detectColumnsTemplate } from './templates/detect-columns/index.js' import { detectLinesTemplate } from './templates/detect-lines/index.js' import { detectColumnsAndLinesTemplate } from './templates/detect-columns-and-lines/index.js' import { detectAndTranscribeTemplate } from './templates/detect-and-transcribe/index.js' +import { detectOrderAndTranscribeTemplate } from './templates/detect-order-and-transcribe/index.js' /** * @typedef {object} PromptTemplate @@ -42,6 +43,7 @@ register(detectColumnsTemplate) register(detectLinesTemplate) register(detectColumnsAndLinesTemplate) register(detectAndTranscribeTemplate) +register(detectOrderAndTranscribeTemplate) /** * Fetch every registered template's markdown body once and cache it. Must be diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index c0b1488..3f8093b 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -27,15 +27,15 @@ Use only tools already available in your environment. Do not install packages, l ## Steps 1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, fetch a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. If you have precise pixel tooling and want tighter bounds on a region, request it server-side as `{base}/x,y,w,h/max/0/default.jpg` and add the crop's `x,y` origin back before applying the canvas conversion below. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. -2. Detect main text column regions in reading order first, then detect the lines inside each column (reading order preserved within each column). If the page visibly has a single text block, create one column containing every detected line — do not subdivide. Track each line's column index (an integer, 0-based) as you detect it. +2. Detect main text column regions in reading order first, then detect the lines inside each column (reading order preserved within each column). If the page visibly has a single text block, create one column containing every detected line — do not subdivide. Track each line's column index (an integer, 0-based) as you detect it. Then flatten into a single global reading-order sequence across columns (column-major: every line in the first column, then the second, etc., adjusted for script tradition). 3. For every line, measure a bounding box in image-pixel space and convert to integer canvas coordinates using: - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). -4. PUT every detected line to the page endpoint in a single request (see TPEN API below). Leave `body` empty — no text yet. The response returns line ids in the same order as the submitted `items` — use positional mapping to recover ids per column index. -5. For each column, POST `{ label, annotations }` where each entry in `annotations` is the full annotation id (URI) returned by the PUT — not a trailing-segment shorthand. Labels must be unique and must not clash with anything in "Existing columns on this page". +4. PUT every detected line to the page endpoint in a single request (see TPEN API below). The `items` array MUST be in the global reading-order sequence from step 2 — this fixes the page's canonical line order. Leave `body` empty — no text yet. The response returns line ids in the same order as the submitted `items`, so each column's lines are a contiguous slice of the returned id list. +5. For each column, POST `{ label, annotations }` where each entry in `annotations` is the full annotation id (URI) returned by the PUT — not a trailing-segment shorthand. The `annotations` slice must match that column's contiguous run in the reading-order id list. Labels must be unique and must not clash with anything in "Existing columns on this page". 6. Report counts: lines saved, columns created, and any failures. ## Rules diff --git a/templates/detect-columns/PROMPT.md b/templates/detect-columns/PROMPT.md index 9a4087f..e548087 100644 --- a/templates/detect-columns/PROMPT.md +++ b/templates/detect-columns/PROMPT.md @@ -28,7 +28,7 @@ All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, You must have: 1. Programmatic pixel access to the full-resolution image — a numeric pixel buffer you can iterate over. A prose description of the image, or any measurement taken from a rendered or previewed image, does not qualify; previews are downsampled and visually estimated bounds will be wrong. **If you cannot obtain pixel data with the capabilities already available to you, stop now and return a failure report naming the missing capability.** -2. HTTP POST capability with `Content-Type: application/json`. +2. HTTP PUT and POST capability with `Content-Type: application/json`. Use only tools already available in your environment. Do not install packages, libraries, or system utilities. If a required capability is missing, stop and return a failure report naming it rather than installing anything. @@ -45,8 +45,9 @@ If any precondition fails, stop and return a concise failure report. - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). 4. For each detected column, determine which of the existing line ids (from the list above) belong to it. Assign a line to the column whose canvas-space region contains the center point of the line's `xywh`. If a line's center falls outside every detected column, assign it to the nearest column by Euclidean distance from the center point to the column's region (distance `0` when the point is inside). Each line belongs to exactly one column. -5. Choose a unique label per column (e.g., `Column A`, `Column B`) that does not clash with any label under "Existing columns on this page", then POST `{ label, annotations }` to the column endpoint. `annotations` is the array of line ids assigned to that column. -6. Report the count of created columns and any per-column failures. +5. Build a global reading-order sequence of all existing line ids: columns in reading order; within each column, lines sorted top-to-bottom by the `xywh` y-center. Then PUT the page with `items` in that order so the page's canonical line list matches reading order (see TPEN API below). Each `items` entry re-uses the existing annotation URI verbatim as its `id` — the server preserves ids (and any already-attached body text) rather than minting new ones. +6. For each column, POST `{ label, annotations }` to the column endpoint. `annotations` is the contiguous slice of the reading-order id sequence that belongs to that column. Choose a unique label per column (e.g., `Column A`, `Column B`) that does not clash with any label under "Existing columns on this page". +7. Report the count of created columns and any per-column failures. ## Rules @@ -56,10 +57,40 @@ If any precondition fails, stop and return a concise failure report. - Column labels are page-scoped and must be unique. Do not duplicate an existing column label. - Annotations cannot be assigned to more than one column. If a line clearly sits in an existing column, do not reassign it. - Do not POST a column with an empty `annotations` array — the server rejects it. Skip any detected column that ends up with zero assigned lines. +- The PUT `items` order defines the page's reading order; column `annotations` slices must match that same order. +- The PUT must carry every existing line id exactly once. Do not drop, duplicate, or mint new ids; do not modify `body` or `target`. ## TPEN API -Create one POST per detected column. Each `annotations` array contains the full annotation URIs assigned to that column in step 4, taken verbatim from the "Existing lines" list above. +First, reorder the page's line list via a single PUT. The `items` array must contain every existing line — each entry carrying the existing annotation URI verbatim as `id` — in the reading-order sequence from step 5. Reuse each line's original `target` (the `xywh` selector listed under "Existing lines") unchanged. + +``` +PUT {{pageEndpoint}} +Authorization: Bearer {{token}} +Content-Type: application/json + +{ + "items": [ + { + "id": "", + "type": "Annotation", + "@context": "http://www.w3.org/ns/anno.jsonld", + "target": { + "source": "{{canvasId}}", + "type": "SpecificResource", + "selector": { + "type": "FragmentSelector", + "conformsTo": "http://www.w3.org/TR/media-frags/", + "value": "xywh=x,y,w,h" + } + }, + "motivation": "transcribing" + } + ] +} +``` + +Then create one POST per detected column. Each `annotations` array is a contiguous slice of the reading-order id sequence, taken verbatim from the "Existing lines" list. ``` POST {{pageEndpoint}}/column @@ -74,19 +105,19 @@ Content-Type: application/json Each `` is the full id of a line annotation listed above, used verbatim. -On any non-2xx response, stop the column in progress and include the HTTP status and response body in the failure report. +On any non-2xx response, stop the operation in progress and include the HTTP status and response body in the failure report. ## Completion On success, report: -- operation: `POST column` -- target: `{{pageEndpoint}}/column` +- operations: `PUT page`, `POST column` (×N) +- target: {{pageEndpoint}} (page) and `{{pageEndpoint}}/column` - count: number of columns created - per-column line counts On failure, report: -- the failing stage (image fetch, detection, POST) -- HTTP status and error body for any failed POST +- the failing stage (image fetch, detection, PUT, or a specific POST) +- HTTP status and error body - recommended next step (e.g., choose a different label, reassign lines) diff --git a/templates/detect-order-and-transcribe/PROMPT.md b/templates/detect-order-and-transcribe/PROMPT.md new file mode 100644 index 0000000..26632dd --- /dev/null +++ b/templates/detect-order-and-transcribe/PROMPT.md @@ -0,0 +1,103 @@ +# Task: detect, order, and transcribe every text line on a TPEN3 page end-to-end + +You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services. + +## Context + +- Project: {{projectID}} +- Page: {{pageID}} +- Canvas: {{canvasId}} +- Canvas Dimensions: {{canvasWidth}} × {{canvasHeight}} +- Image: {{imageUrl}} +- Page endpoint: {{pageEndpoint}} + +## Preconditions + +All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: + +1. Ability to fetch the image bytes (or a derivative) and identify line bounds and text from them. Precise pixel measurement is preferred when available; visual estimation and on-sight transcription from the fetched image are acceptable otherwise. +2. HTTP PUT capability with `Content-Type: application/json`. + +Use only tools already available in your environment. Do not install packages, libraries, or system utilities. If a required capability is genuinely missing (e.g. no way to issue an HTTP PUT), stop and return a failure report naming it rather than installing anything. + +## Steps + +1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, fetch a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. If you have precise pixel tooling and want tighter bounds or a clearer crop for transcription, request a region server-side as `{base}/x,y,w,h/max/0/default.jpg` and add the crop's `x,y` origin back before applying the canvas conversion below. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. +2. Identify the page's layout. If the page has multiple text blocks side-by-side, determine their reading order (left→right for Latin-script layouts; adjust for script tradition). Within each block, detect lines top-to-bottom. Then flatten into a single global reading-order sequence across blocks (block-major: every line in the first block, then the second, etc.). Single-block pages collapse to one top-to-bottom sequence. This task does not create TPEN columns. +3. For every line, measure a bounding box in image-pixel space and convert to integer canvas coordinates using: + - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` + - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` + - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` + - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` + Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). +4. Run handwriting text recognition on each line's crop. Apply the recognition rules below. +5. Build one Annotation per line with the recognized text as the `TextualBody` value and `xywh=x,y,w,h` as the bounding box fragment selector. +6. PUT every detected line to the page endpoint in a single request (see TPEN API below). The `items` array MUST be in the global reading-order sequence from step 2 — this fixes the page's canonical line order. +7. Report counts: lines saved, lines with non-empty text, lines flagged uncertain. +8. Report notable ambiguities (e.g., illegible lines transcribed as empty or flagged) and the detected block count so reviewers know whether a multi-block layout was recognised. + +## Rules + +### Detection (IMAGE_ANALYSIS) + +- Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value. +- The PUT `items` order is the page's canonical reading order; do not interleave lines from different blocks. +- Prefer tight bounds when you can measure them; best-effort bounds are acceptable. When uncertain whether a tall run is one line or several, prefer splitting over merging. +- Do not include decorative borders, frame rules, ornaments, or illustrations as part of a line. +- Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected downstream. + +### Recognition (HANDWRITING_TEXT_RECOGNITION) + +- Prioritize diplomatic transcription over normalization. +- Preserve orthography and punctuation as observed. +- Use explicit uncertainty markers for unclear glyphs (for example `[a?]`). Do not force certainty. +- Do not invent expansions. If an abbreviation mark is present, transcribe the mark; do not silently expand. +- Keep line segmentation stable even when text is partially uncertain. +- If a crop is fully illegible, save the annotation with an empty text body and flag the line id in the report — do not fabricate text. + +## TPEN API + +Save every detected line with its transcription in a single PUT. The `items` array must contain one annotation per detected line, in the reading-order sequence from step 2; replace `x,y,w,h` with the integer canvas coordinates computed in step 3, and `` with the recognized text (empty string for fully illegible lines). + +``` +PUT {{pageEndpoint}} +Authorization: Bearer {{token}} +Content-Type: application/json + +{ + "items": [ + { + "type": "Annotation", + "@context": "http://www.w3.org/ns/anno.jsonld", + "body": [{ "type": "TextualBody", "value": "", "format": "text/plain" }], + "target": { + "source": "{{canvasId}}", + "type": "SpecificResource", + "selector": { + "type": "FragmentSelector", + "conformsTo": "http://www.w3.org/TR/media-frags/", + "value": "xywh=x,y,w,h" + } + }, + "motivation": "transcribing" + } + ] +} +``` + +On any non-2xx response, stop and include the HTTP status and response body in the failure report. + +## Completion + +On success, report: + +- operation: `PUT page` +- target: {{pageEndpoint}} +- counts: lines saved, lines with non-empty text, lines flagged uncertain, text blocks detected +- notable ambiguities worth a human review + +On failure, report: + +- the failing stage (image fetch, detection, recognition, PUT) +- HTTP status and error body +- recommended next step diff --git a/templates/detect-order-and-transcribe/index.js b/templates/detect-order-and-transcribe/index.js new file mode 100644 index 0000000..255c5cb --- /dev/null +++ b/templates/detect-order-and-transcribe/index.js @@ -0,0 +1,19 @@ +/** + * @file Template: "Detect lines + order + transcribe → PUT page". + * + * Combines the multi-block reading-order detection from + * detect-columns-and-lines with the handwriting recognition from + * detect-and-transcribe, without creating column annotations. + * + * @author thehabes + */ + +import { buildTemplateContext } from '../inject-context.js' + +/** @type {import('../../prompt-generator.js').PromptTemplate} */ +export const detectOrderAndTranscribeTemplate = { + id: 'detect-order-and-transcribe', + label: 'Line Detection + Ordering + Transcription', + templateUrl: new URL('./PROMPT.md', import.meta.url), + buildContext: buildTemplateContext +} From 1c14e543772ca0f30d1c18593a11848d4b6e5a84 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Thu, 23 Apr 2026 14:36:07 -0500 Subject: [PATCH 12/47] Restore JSON-paste fallback submission flow Ports the fallback mechanism from api-usage-fallback (which was branched from main and carried conflicting prompt edits) onto prompt-tuning: authedJson refactor + putPage/postColumn exports in tpen-service.js, and the paste-JSON
panel + handler in ui-manager.js with styles. Co-Authored-By: Claude Opus 4.7 (1M context) --- styles.css | 21 ++++++++ tpen-service.js | 64 ++++++++++++++++++++++--- ui-manager.js | 125 +++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 201 insertions(+), 9 deletions(-) diff --git a/styles.css b/styles.css index dce914f..a6fbd39 100644 --- a/styles.css +++ b/styles.css @@ -163,3 +163,24 @@ textarea:focus-visible { .tool-header { text-align: center; } + +.fallback { + margin-top: 1rem; + border: 1px solid var(--border); + border-radius: 4px; + padding: 0.5rem 0.75rem; + background: var(--surface); +} +.fallback summary { + cursor: pointer; + font-size: 0.9rem; + color: var(--muted); + padding: 0.25rem 0; +} +.fallback summary:hover { color: var(--text); } +.fallback .hint { + margin: 0.5rem 0; + font-size: 0.85rem; + color: var(--muted); +} +.fallback textarea { margin-top: 0.25rem; } diff --git a/tpen-service.js b/tpen-service.js index 7a041a7..ab1c2bd 100644 --- a/tpen-service.js +++ b/tpen-service.js @@ -9,29 +9,45 @@ import { CONFIG } from './config.js' /** - * GET a services endpoint with the user's Bearer token and a 15s timeout. + * Call a services endpoint with the user's Bearer token and a 15s timeout. * On non-2xx responses throws an Error whose `.status` matches the response. * @param {string} path path beginning with `/`, relative to `CONFIG.servicesURL`. + * @param {string} method HTTP verb (`GET`, `PUT`, `POST`, `PATCH`). + * @param {any} [body] JSON-serializable body; omitted for GET. * @param {string} token JWT. - * @returns {Promise} parsed JSON body. + * @returns {Promise} parsed JSON body (or `null` when the response has no body). */ -async function authedGet(path, token) { +async function authedJson(path, method, body, token) { if (!token) throw new Error(`Missing auth token for ${path}`) - const res = await fetch(`${CONFIG.servicesURL}${path}`, { - method: 'GET', + const init = { + method, headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${token}` }, signal: AbortSignal.timeout(15000) - }) + } + if (body !== undefined) init.body = JSON.stringify(body) + const res = await fetch(`${CONFIG.servicesURL}${path}`, init) if (!res.ok) { const detail = await res.text().catch(() => '') const err = new Error(`${res.status} ${res.statusText} — ${path}${detail ? `: ${detail}` : ''}`) err.status = res.status throw err } - return res.json() + const text = await res.text() + if (!text) return null + try { return JSON.parse(text) } catch { return text } +} + +/** + * GET a services endpoint with the user's Bearer token. + * @param {string} path path beginning with `/`, relative to `CONFIG.servicesURL`. + * @param {string} token JWT. + * @returns {Promise} parsed JSON body. + */ +function authedGet(path, token) { + return authedJson(path, 'GET', undefined, token) } /** @@ -57,6 +73,40 @@ export function fetchPageResolved(projectID, pageID, token) { return authedGet(`/project/${encodeURIComponent(projectID)}/page/${encodeURIComponent(pageID)}/resolved`, token) } +/** + * PUT a page body (`{ items: [...] }`). Used by the fallback JSON-paste flow + * when the user's LLM cannot issue writes itself. Items may be new (no `id`, + * or a non-http local id) or updates (item `id` is the line's full IRI). + * @param {string} projectID + * @param {string} pageID + * @param {{ items: Array }} body + * @param {string} token + * @returns {Promise} + */ +export function putPage(projectID, pageID, body, token) { + return authedJson( + `/project/${encodeURIComponent(projectID)}/page/${encodeURIComponent(pageID)}`, + 'PUT', body, token + ) +} + +/** + * POST a single column to a page. Body is `{ label, annotations }` where each + * `annotations[i]` must match an existing `page.items[*].id`; the server + * rejects duplicate labels within the page. + * @param {string} projectID + * @param {string} pageID + * @param {{ label: string, annotations: Array }} body + * @param {string} token + * @returns {Promise} + */ +export function postColumn(projectID, pageID, body, token) { + return authedJson( + `/project/${encodeURIComponent(projectID)}/page/${encodeURIComponent(pageID)}/column`, + 'POST', body, token + ) +} + /** * Build the project endpoint URL (project/index.js). Templates use this for * best-effort GETs that verify project-level state mid-task. diff --git a/ui-manager.js b/ui-manager.js index f38ca92..654426f 100644 --- a/ui-manager.js +++ b/ui-manager.js @@ -9,7 +9,7 @@ */ import { listTemplates, renderTemplate } from './prompt-generator.js' -import { pageEndpoint, projectEndpoint } from './tpen-service.js' +import { pageEndpoint, projectEndpoint, putPage, postColumn } from './tpen-service.js' import { trailingId } from './iiif-ids.js' /** @@ -69,6 +69,8 @@ export class UIManager { #workspaceBody = null /** Pending timer for clearing the Copy feedback message. */ #feedbackTimer = null + /** Pending timer for clearing the fallback panel feedback message. */ + #fallbackFeedbackTimer = null /** * @param {string} [rootId='app'] id of the element to render into. @@ -234,13 +236,132 @@ export class UIManager { el('div', { class: 'controls' }, generateControls), el('label', { class: 'output-label', htmlFor: 'output', text: 'Generated prompt' }), output, - el('div', { class: 'controls' }, [copyBtn, feedback]) + el('div', { class: 'controls' }, [copyBtn, feedback]), + this.#buildFallbackPanel() ]) this.#workspaceBody = body this.#replace(el('section', { class: 'card' }, [header, body])) } + /** + * Build the paste-JSON fallback panel. Shown inside the workspace body so + * it inherits the token-gate (hidden until `token` is present). Enables the + * submit button only when `projectID` and `pageID` are also held, since the + * dispatcher always targets a specific page. + * @returns {HTMLElement} + */ + #buildFallbackPanel() { + const { projectID, pageID } = this.state + const ready = Boolean(projectID && pageID) + const textarea = el('textarea', { + id: 'fallback-input', rows: 10, spellcheck: false, autocomplete: 'off', + placeholder: '{ "items": [ … ] }\nor\n{ "label": "Column A", "annotations": ["…"] }\nor\n[ { "label": "…", "annotations": ["…"] }, … ]', + attrs: { 'aria-label': 'JSON payload to submit to TPEN' } + }) + const submit = el('button', { + type: 'button', id: 'fallback-submit', + text: 'Submit to TPEN', + disabled: !ready + }) + const feedback = el('span', { class: 'feedback', attrs: { 'aria-live': 'polite' } }) + submit.addEventListener('click', () => this.#onFallbackSubmit(textarea, submit, feedback)) + const children = [ + el('summary', { text: 'Paste JSON from LLM (fallback)' }), + el('p', { class: 'hint', text: 'Use this when your chat LLM produced the JSON payload but could not call the TPEN API itself. The tool will submit it using the token you authorized.' }) + ] + if (!ready) children.push(el('p', { class: 'hint', text: 'Needs a page context before submission is possible.' })) + children.push(textarea, el('div', { class: 'controls' }, [submit, feedback])) + return el('details', { class: 'fallback' }, children) + } + + /** + * Parse the pasted JSON, classify its shape, and dispatch the matching + * TPEN write. Shapes accepted: + * - `{ items: [...] }` → `PUT page` + * - `{ label, annotations: [...] }` → single `POST column` + * - `[ { label, annotations }, ... ]` → iterate `POST column`, stop at first failure + * @param {HTMLTextAreaElement} textarea + * @param {HTMLButtonElement} button + * @param {HTMLElement} feedback + */ + async #onFallbackSubmit(textarea, button, feedback) { + const { projectID, pageID, token } = this.state + const raw = textarea.value.trim() + const setFeedback = (msg, autoClear = false) => { + feedback.textContent = msg + if (this.#fallbackFeedbackTimer) { + clearTimeout(this.#fallbackFeedbackTimer) + this.#fallbackFeedbackTimer = null + } + if (autoClear) { + this.#fallbackFeedbackTimer = setTimeout(() => { + feedback.textContent = '' + this.#fallbackFeedbackTimer = null + }, 3000) + } + } + if (!projectID || !pageID || !token) { + setFeedback('Missing project, page, or token — cannot submit.') + return + } + if (!raw) { + setFeedback('Paste a JSON payload first.') + return + } + let payload + try { payload = JSON.parse(raw) } + catch { setFeedback('Payload must be valid JSON.'); return } + + button.disabled = true + setFeedback('Submitting…') + try { + if (payload && typeof payload === 'object' && !Array.isArray(payload) && Array.isArray(payload.items)) { + for (const item of payload.items) { + if (!item || typeof item !== 'object' || Array.isArray(item)) { + setFeedback('Each item in `items` must be an annotation object.') + return + } + } + const result = await putPage(projectID, pageID, payload, token) + const saved = payload.items.length + textarea.value = result && typeof result === 'object' + ? JSON.stringify(result, null, 2) + : '' + setFeedback(`Saved ${saved} line item${saved === 1 ? '' : 's'}. Server response (with ids) is in the textarea.`, true) + return + } + if (payload && typeof payload === 'object' && !Array.isArray(payload) + && typeof payload.label === 'string' && Array.isArray(payload.annotations)) { + await postColumn(projectID, pageID, payload, token) + setFeedback(`Created column "${payload.label}".`, true) + textarea.value = '' + return + } + if (Array.isArray(payload) && payload.every(c => + c && typeof c === 'object' && typeof c.label === 'string' && Array.isArray(c.annotations))) { + for (let i = 0; i < payload.length; i++) { + const col = payload[i] + try { await postColumn(projectID, pageID, col, token) } + catch (err) { + textarea.value = JSON.stringify(payload.slice(i), null, 2) + setFeedback(`Created ${i} of ${payload.length} columns; failed on "${col.label}" — ${err.message}. Remaining columns kept in the textarea for retry.`) + return + } + } + setFeedback(`Created ${payload.length} column${payload.length === 1 ? '' : 's'}.`, true) + textarea.value = '' + return + } + setFeedback('Unrecognized payload shape — expected `{items: [...]}`, `{label, annotations}`, or an array of `{label, annotations}`.') + } catch (err) { + const status = err?.status ? `TPEN API ${err.status}: ` : '' + setFeedback(`${status}${err?.message ?? 'Submission failed.'}`) + } finally { + button.disabled = !(this.state.projectID && this.state.pageID && this.state.token) + } + } + /** * Update the stored token and remove the in-workspace consent button if * it's on screen. Called from `PromptsApp.acceptAuth` when the parent From 94a981b765458c9b28056308b4cf44b6356358af Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Thu, 23 Apr 2026 14:46:22 -0500 Subject: [PATCH 13/47] recovering --- templates/detect-and-transcribe/PROMPT.md | 28 ++++---- templates/detect-columns-and-lines/PROMPT.md | 30 ++++---- templates/detect-columns/PROMPT.md | 38 ++++++---- templates/detect-lines/PROMPT.md | 26 ++++--- .../detect-order-and-transcribe/PROMPT.md | 28 ++++---- templates/transcribe-known-lines/PROMPT.md | 69 ++++++++++++------- 6 files changed, 132 insertions(+), 87 deletions(-) diff --git a/templates/detect-and-transcribe/PROMPT.md b/templates/detect-and-transcribe/PROMPT.md index e1e7168..7bbcf46 100644 --- a/templates/detect-and-transcribe/PROMPT.md +++ b/templates/detect-and-transcribe/PROMPT.md @@ -1,6 +1,6 @@ # Task: detect and transcribe every text line on a TPEN3 page end-to-end -You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services. +You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services (direct) or emitted as a fallback JSON payload for the user to paste. ## Context @@ -16,9 +16,9 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: 1. Ability to fetch the image bytes (or a derivative) and identify line bounds and text from them. Precise pixel measurement is preferred when available; visual estimation and on-sight transcription from the fetched image are acceptable otherwise. -2. HTTP PUT capability with `Content-Type: application/json`. +2. Either HTTP PUT capability with `Content-Type: application/json`, or the ability to emit the payload as a fallback JSON code block in your report. If HTTP PUT is not available, skip straight to the Fallback section — do not retry. -Use only tools already available in your environment. Do not install packages, libraries, or system utilities. If a required capability is genuinely missing (e.g. no way to issue an HTTP PUT), stop and return a failure report naming it rather than installing anything. +Use only tools already available in your environment. Do not install packages, libraries, or system utilities. ## Steps @@ -31,9 +31,9 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). 4. Run handwriting text recognition on each line's crop. Apply the recognition rules below. -5. Build one Annotation per line with the recognized text as the `TextualBody` value and `xywh=x,y,w,h` as the bounding box fragment selector. -6. PUT every detected line to the page endpoint in a single request (see TPEN API below). -7. Report counts: lines saved, lines with non-empty text, lines flagged uncertain. +5. Build the `{ "items": [...] }` payload described under TPEN API — one Annotation per line with the recognized text as the `TextualBody` value and `xywh=x,y,w,h` as the fragment selector. +6. If HTTP PUT is available, send the request once. On any non-2xx response, do not retry — fall back. If HTTP PUT is unavailable from the start, go directly to the fallback. +7. Report counts (lines saved/in payload, non-empty text, uncertain) and which path was used (direct PUT or fallback). 8. Report notable ambiguities (e.g., illegible lines transcribed as empty or flagged). ## Rules @@ -85,19 +85,23 @@ Content-Type: application/json } ``` -On any non-2xx response, stop and include the HTTP status and response body in the failure report. +## Fallback + +When the direct PUT is impossible or returns non-2xx, emit the `{ "items": [...] }` body from TPEN API as the final code block of your report. It must be valid JSON (no comments, no placeholders — substitute the real coordinates and recognized text). The user will paste it into the TPEN splitscreen tool, which submits it with their authorized token. ## Completion -On success, report: +Direct PUT path, report: - operation: `PUT page` - target: {{pageEndpoint}} - counts: lines saved, lines with non-empty text, lines flagged uncertain - notable ambiguities worth a human review -On failure, report: +Fallback path, report: -- the failing stage (image fetch, detection, recognition, PUT) -- HTTP status and error body -- recommended next step +- path: `fallback` +- counts: lines in payload, lines with non-empty text, lines flagged uncertain +- HTTP status and error body if a PUT was attempted first +- notable ambiguities worth a human review +- final code block: the full `{ "items": [...] }` JSON for the user to paste diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index 3f8093b..a38f11c 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -1,6 +1,6 @@ # Task: detect columns AND lines on a TPEN3 page and save both to the page -You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services. +You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services (direct) or emitted as fallback JSON payloads for the user to paste. ## Context @@ -20,9 +20,9 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: 1. Ability to fetch the image bytes (or a derivative) and identify line and column bounds from them. Precise pixel measurement is preferred when available; visual estimation from the fetched image is acceptable otherwise. -2. HTTP POST and PUT capability with `Content-Type: application/json`. +2. Either HTTP PUT and POST capability with `Content-Type: application/json`, or the ability to emit the payloads as fallback JSON code blocks in your report. If either verb is unavailable, skip straight to the Fallback section — do not retry. -Use only tools already available in your environment. Do not install packages, libraries, or system utilities. If a required capability is genuinely missing (e.g. no way to issue an HTTP PUT or POST), stop and return a failure report naming it rather than installing anything. +Use only tools already available in your environment. Do not install packages, libraries, or system utilities. ## Steps @@ -34,9 +34,10 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). -4. PUT every detected line to the page endpoint in a single request (see TPEN API below). The `items` array MUST be in the global reading-order sequence from step 2 — this fixes the page's canonical line order. Leave `body` empty — no text yet. The response returns line ids in the same order as the submitted `items`, so each column's lines are a contiguous slice of the returned id list. -5. For each column, POST `{ label, annotations }` where each entry in `annotations` is the full annotation id (URI) returned by the PUT — not a trailing-segment shorthand. The `annotations` slice must match that column's contiguous run in the reading-order id list. Labels must be unique and must not clash with anything in "Existing columns on this page". -6. Report counts: lines saved, columns created, and any failures. +4. Build the `{ "items": [...] }` payload described under TPEN API. The `items` array MUST be in the global reading-order sequence from step 2 — this fixes the page's canonical line order. Leave `body` empty — no text yet. Track each line's column index (0-based) as you emit items so you can slice them into columns in step 5. +5. If HTTP PUT and POST are available: PUT the items once, then for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of server-returned ids for that column. Labels must be unique and must not clash with anything in "Existing columns on this page". On any non-2xx, stop and fall back for everything not yet persisted. +6. If HTTP PUT/POST are unavailable from the start, go directly to the Fallback — emit the items payload for the user to paste. Column creation in fallback requires a follow-up pass (see Fallback). +7. Report counts (lines saved/in payload, columns created/in payload) and which path was used. ## Rules @@ -93,18 +94,23 @@ Content-Type: application/json } ``` -On any non-2xx response, stop the operation in progress and include the HTTP status and response body in the failure report. +## Fallback + +When the direct path is unavailable or returns non-2xx, emit only the `{ "items": [...] }` body from TPEN API as the final code block of your report, in the global reading-order sequence from step 2. It must be valid JSON. The user pastes it into the TPEN splitscreen tool, which PUTs it with their authorized token and fills the textarea with the server response (the persisted lines, each with its assigned id). Column creation in fallback is a separate pass — instruct the user to re-run with the `detect-columns` template, which takes the now-persisted lines and emits the `[{label, annotations}, ...]` column payload. ## Completion -On success, report: +Direct path, report: - operations: `PUT page`, `POST column` (×N) - target: {{pageEndpoint}} (page) and {{pageEndpoint}}/column - counts: lines saved, columns created +- whether lines were saved even if a column POST failed (partial success is acceptable — describe what persists) -On failure, report: +Fallback path, report: -- the failing stage (image fetch, detection, PUT, or a specific POST) -- HTTP status and error body -- whether lines were saved even if column creation failed (partial success is acceptable — describe what persists) +- path: `fallback` +- counts: lines in payload, columns not yet created +- HTTP status and error body if a request was attempted first +- final code block: the full `{ "items": [...] }` JSON for the user to paste +- next step: re-run with `detect-columns` after the items paste succeeds, to create the columns diff --git a/templates/detect-columns/PROMPT.md b/templates/detect-columns/PROMPT.md index e548087..18bcab5 100644 --- a/templates/detect-columns/PROMPT.md +++ b/templates/detect-columns/PROMPT.md @@ -1,6 +1,6 @@ # Task: detect column regions on a TPEN3 page and assign existing lines to them -You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services. +You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services (direct) or emitted as fallback JSON payloads for the user to paste. ## Context @@ -27,12 +27,10 @@ All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, You must have: -1. Programmatic pixel access to the full-resolution image — a numeric pixel buffer you can iterate over. A prose description of the image, or any measurement taken from a rendered or previewed image, does not qualify; previews are downsampled and visually estimated bounds will be wrong. **If you cannot obtain pixel data with the capabilities already available to you, stop now and return a failure report naming the missing capability.** -2. HTTP PUT and POST capability with `Content-Type: application/json`. +1. Programmatic pixel access to the full-resolution image — a numeric pixel buffer you can iterate over. A prose description of the image, or any measurement taken from a rendered or previewed image, does not qualify; previews are downsampled and visually estimated bounds will be wrong. **If you cannot obtain pixel data with the capabilities already available to you, stop now and return a failure report naming the missing capability.** This precondition is hard — fallback does not rescue missing vision. +2. Either HTTP PUT and POST capability with `Content-Type: application/json`, or the ability to emit the payloads as fallback JSON code blocks in your report. If either verb is unavailable, skip straight to the Fallback section — do not retry. -Use only tools already available in your environment. Do not install packages, libraries, or system utilities. If a required capability is missing, stop and return a failure report naming it rather than installing anything. - -If any precondition fails, stop and return a concise failure report. +Use only tools already available in your environment. Do not install packages, libraries, or system utilities. ## Steps @@ -45,9 +43,11 @@ If any precondition fails, stop and return a concise failure report. - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). 4. For each detected column, determine which of the existing line ids (from the list above) belong to it. Assign a line to the column whose canvas-space region contains the center point of the line's `xywh`. If a line's center falls outside every detected column, assign it to the nearest column by Euclidean distance from the center point to the column's region (distance `0` when the point is inside). Each line belongs to exactly one column. -5. Build a global reading-order sequence of all existing line ids: columns in reading order; within each column, lines sorted top-to-bottom by the `xywh` y-center. Then PUT the page with `items` in that order so the page's canonical line list matches reading order (see TPEN API below). Each `items` entry re-uses the existing annotation URI verbatim as its `id` — the server preserves ids (and any already-attached body text) rather than minting new ones. -6. For each column, POST `{ label, annotations }` to the column endpoint. `annotations` is the contiguous slice of the reading-order id sequence that belongs to that column. Choose a unique label per column (e.g., `Column A`, `Column B`) that does not clash with any label under "Existing columns on this page". -7. Report the count of created columns and any per-column failures. +5. Build a global reading-order sequence of all existing line ids: columns in reading order; within each column, lines sorted top-to-bottom by the `xywh` y-center. +6. Build the `{ "items": [...] }` payload described under TPEN API from that sequence. Each `items` entry re-uses the existing annotation URI verbatim as its `id` — the server preserves ids (and any already-attached body text) rather than minting new ones. +7. Build the column payload `[{ label, annotations }, ...]` where each `annotations` array is the contiguous slice of the reading-order id sequence that belongs to that column. Choose a unique label per column (e.g., `Column A`, `Column B`) that does not clash with any label under "Existing columns on this page". Both the direct and fallback paths use the same ids — the existing annotation URIs listed above — so the same column payload works in either path. +8. If HTTP PUT and POST are available: PUT the page once, then POST each column once. On any non-2xx, stop and fall back for whatever has not yet persisted. Otherwise go directly to the Fallback. +9. Report counts (columns created or in payload) and which path was used. ## Rules @@ -105,19 +105,27 @@ Content-Type: application/json Each `` is the full id of a line annotation listed above, used verbatim. -On any non-2xx response, stop the operation in progress and include the HTTP status and response body in the failure report. +## Fallback + +When the direct path is unavailable or returns non-2xx, emit two final code blocks in your report, in order: + +1. The `{ "items": [...] }` body from TPEN API — the reading-order reorder of existing lines. +2. The `[{ "label": "…", "annotations": [ "", … ] }, …]` column array — one entry per detected column, annotations drawn verbatim from "Existing lines". + +Both must be valid JSON. The user pastes each block into the TPEN splitscreen tool; the tool PUTs the first block with their authorized token, then POSTs each column from the second block in one paste. ## Completion -On success, report: +Direct path, report: - operations: `PUT page`, `POST column` (×N) - target: {{pageEndpoint}} (page) and `{{pageEndpoint}}/column` - count: number of columns created - per-column line counts -On failure, report: +Fallback path, report: -- the failing stage (image fetch, detection, PUT, or a specific POST) -- HTTP status and error body -- recommended next step (e.g., choose a different label, reassign lines) +- path: `fallback` +- counts: columns in payload, per-column line counts +- HTTP status and error body if a request was attempted first +- final code blocks (in order): the `{ "items": [...] }` JSON, then the `[{label, annotations}, ...]` column JSON, for the user to paste diff --git a/templates/detect-lines/PROMPT.md b/templates/detect-lines/PROMPT.md index 508ace1..9df5b6e 100644 --- a/templates/detect-lines/PROMPT.md +++ b/templates/detect-lines/PROMPT.md @@ -1,6 +1,6 @@ # Task: detect every text line on a TPEN3 page and save them to the page -You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services. +You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services (direct) or emitted as a fallback JSON payload for the user to paste. ## Context @@ -16,9 +16,9 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: 1. Ability to fetch the image bytes (or a derivative) and identify line bounds from them. Precise pixel measurement is preferred when available; visual estimation from the fetched image is acceptable otherwise. -2. HTTP PUT capability with `Content-Type: application/json`. +2. Either HTTP PUT capability with `Content-Type: application/json`, or the ability to emit the payload as a fallback JSON code block in your report. If HTTP PUT is not available, skip straight to the Fallback section — do not retry. -Use only tools already available in your environment. Do not install packages, libraries, or system utilities. If a required capability is genuinely missing (e.g. no way to issue an HTTP PUT), stop and return a failure report naming it rather than installing anything. +Use only tools already available in your environment. Do not install packages, libraries, or system utilities. ## Steps @@ -30,8 +30,9 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). -4. PUT every detected line to the page endpoint in a single request (see TPEN API below). Leave `body` empty — no text yet. -5. Report count and any failure cause. +4. Build the `{ "items": [...] }` payload described under TPEN API. Leave `body` empty — no text yet. +5. If HTTP PUT is available, send the request once. On any non-2xx response, do not retry — fall back. If HTTP PUT is unavailable from the start, go directly to the fallback. +6. Report count and which path was used (direct PUT or fallback). ## Rules @@ -71,18 +72,21 @@ Content-Type: application/json } ``` -On any non-2xx response, stop and include the HTTP status and response body in the failure report. +## Fallback + +When the direct PUT is impossible or returns non-2xx, emit the `{ "items": [...] }` body from TPEN API as the final code block of your report. It must be valid JSON (no comments, no placeholders — substitute the real coordinates). The user will paste it into the TPEN splitscreen tool, which submits it with their authorized token. ## Completion -On success, report: +Direct PUT path, report: - operation: `PUT page` - target: {{pageEndpoint}} - count: number of line annotations saved -On failure, report: +Fallback path, report: -- the failing stage (image fetch, detection, PUT) -- HTTP status and error body -- recommended next step +- path: `fallback` +- count: number of line annotations in the payload +- HTTP status and error body if a PUT was attempted first +- final code block: the full `{ "items": [...] }` JSON for the user to paste diff --git a/templates/detect-order-and-transcribe/PROMPT.md b/templates/detect-order-and-transcribe/PROMPT.md index 26632dd..2c6a137 100644 --- a/templates/detect-order-and-transcribe/PROMPT.md +++ b/templates/detect-order-and-transcribe/PROMPT.md @@ -1,6 +1,6 @@ # Task: detect, order, and transcribe every text line on a TPEN3 page end-to-end -You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services. +You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services (direct) or emitted as a fallback JSON payload for the user to paste. ## Context @@ -16,9 +16,9 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: 1. Ability to fetch the image bytes (or a derivative) and identify line bounds and text from them. Precise pixel measurement is preferred when available; visual estimation and on-sight transcription from the fetched image are acceptable otherwise. -2. HTTP PUT capability with `Content-Type: application/json`. +2. Either HTTP PUT capability with `Content-Type: application/json`, or the ability to emit the payload as a fallback JSON code block in your report. If HTTP PUT is not available, skip straight to the Fallback section — do not retry. -Use only tools already available in your environment. Do not install packages, libraries, or system utilities. If a required capability is genuinely missing (e.g. no way to issue an HTTP PUT), stop and return a failure report naming it rather than installing anything. +Use only tools already available in your environment. Do not install packages, libraries, or system utilities. ## Steps @@ -31,9 +31,9 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). 4. Run handwriting text recognition on each line's crop. Apply the recognition rules below. -5. Build one Annotation per line with the recognized text as the `TextualBody` value and `xywh=x,y,w,h` as the bounding box fragment selector. -6. PUT every detected line to the page endpoint in a single request (see TPEN API below). The `items` array MUST be in the global reading-order sequence from step 2 — this fixes the page's canonical line order. -7. Report counts: lines saved, lines with non-empty text, lines flagged uncertain. +5. Build the `{ "items": [...] }` payload described under TPEN API — one Annotation per line with the recognized text as the `TextualBody` value and `xywh=x,y,w,h` as the fragment selector. The `items` array MUST be in the global reading-order sequence from step 2 — this fixes the page's canonical line order. +6. If HTTP PUT is available, send the request once. On any non-2xx response, do not retry — fall back. If HTTP PUT is unavailable from the start, go directly to the fallback. +7. Report counts (lines saved/in payload, non-empty text, uncertain) and which path was used (direct PUT or fallback). 8. Report notable ambiguities (e.g., illegible lines transcribed as empty or flagged) and the detected block count so reviewers know whether a multi-block layout was recognised. ## Rules @@ -85,19 +85,23 @@ Content-Type: application/json } ``` -On any non-2xx response, stop and include the HTTP status and response body in the failure report. +## Fallback + +When the direct PUT is impossible or returns non-2xx, emit the `{ "items": [...] }` body from TPEN API as the final code block of your report, in the reading-order sequence from step 2. It must be valid JSON (no comments, no placeholders — substitute the real coordinates and recognized text). The user will paste it into the TPEN splitscreen tool, which submits it with their authorized token. ## Completion -On success, report: +Direct PUT path, report: - operation: `PUT page` - target: {{pageEndpoint}} - counts: lines saved, lines with non-empty text, lines flagged uncertain, text blocks detected - notable ambiguities worth a human review -On failure, report: +Fallback path, report: -- the failing stage (image fetch, detection, recognition, PUT) -- HTTP status and error body -- recommended next step +- path: `fallback` +- counts: lines in payload, lines with non-empty text, lines flagged uncertain, text blocks detected +- HTTP status and error body if a PUT was attempted first +- notable ambiguities worth a human review +- final code block: the full `{ "items": [...] }` JSON for the user to paste diff --git a/templates/transcribe-known-lines/PROMPT.md b/templates/transcribe-known-lines/PROMPT.md index 6588f3f..272b43d 100644 --- a/templates/transcribe-known-lines/PROMPT.md +++ b/templates/transcribe-known-lines/PROMPT.md @@ -1,6 +1,6 @@ # Task: transcribe the existing lines on a TPEN3 page -You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services. +You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services (direct) or emitted as a fallback JSON payload for the user to paste. ## Context @@ -13,7 +13,7 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Existing lines -Each entry is `: ` in canvas coordinates. +Each entry is `: ` in canvas coordinates. Use the full annotation URI verbatim as the `id` of each item in the PUT payload; the server preserves these ids and updates only the body text. {{existingLines}} @@ -23,12 +23,10 @@ All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, You must have: -1. Vision capability: fetch each line's region as image bytes (e.g. via a IIIF region URL) and read the inked glyphs directly. A fetcher that returns only a prose description of the image does not qualify, and any preview rendered back into chat is downsampled — do not transcribe from a preview. **If you cannot read image bytes directly with the capabilities already available to you, stop now and return a failure report naming the missing capability.** -2. HTTP PATCH capability (with `Content-Type: text/plain`). +1. Vision capability: fetch each line's region as image bytes (e.g. via a IIIF region URL) and read the inked glyphs directly. A fetcher that returns only a prose description of the image does not qualify, and any preview rendered back into chat is downsampled — do not transcribe from a preview. **If you cannot read image bytes directly with the capabilities already available to you, stop now and return a failure report naming the missing capability.** This precondition is hard — fallback does not rescue missing vision. +2. Either HTTP PUT capability with `Content-Type: application/json`, or the ability to emit the payload as a fallback JSON code block in your report. If HTTP PUT is not available, skip straight to the Fallback section — do not retry. -Use only tools already available in your environment. Do not install packages, libraries, or system utilities. If a required capability is missing, stop and return a failure report naming it rather than installing anything. - -If any precondition fails, stop and return a concise failure report naming the missing capability. +Use only tools already available in your environment. Do not install packages, libraries, or system utilities. ## Steps @@ -39,8 +37,9 @@ If any precondition fails, stop and return a concise failure report naming the m - `pixel_h = round(canvas_h * img_h / {{canvasHeight}})` Crop each line region and verify it visibly contains a single line of inked text. 2. Run handwriting text recognition over each crop. Apply the recognition rules below. -3. For each line, PATCH the text to its line-text endpoint — one PATCH per line in the "Existing lines" list. -4. Report a per-line summary: how many succeeded, how many failed, and the HTTP status for any failure. +3. Build the `{ "items": [...] }` payload described under TPEN API. There is exactly one item per entry in "Existing lines", each item re-using that entry's annotation URI verbatim as its `id`, preserving its `target` (the `xywh` selector shown above) unchanged, and carrying the recognized text as the `TextualBody` value. Item order must match the order of "Existing lines" — do not reorder. +4. If HTTP PUT is available, send the request once. On any non-2xx response, do not retry — fall back. If HTTP PUT is unavailable from the start, go directly to the fallback. +5. Report counts (lines updated/in payload, lines flagged illegible) and which path was used. ## Rules @@ -48,34 +47,54 @@ If any precondition fails, stop and return a concise failure report naming the m - Use explicit uncertainty markers for unclear glyphs (for example `[a?]`). Do not force certainty. - Do not invent expansions. If an abbreviation mark is present, transcribe the mark; do not silently expand. - Keep line segmentation stable — one transcription string per existing line annotation. -- If a line's crop is illegible, send an empty body or skip the PATCH and report the line id as unresolved — do not fabricate text. +- If a line's crop is illegible, emit the item with an empty `TextualBody` value and report the line id as unresolved — do not fabricate text, and do not drop the item from `items`. ## TPEN API -Update one line's text via PATCH with a plain-text body: +Save every transcription in a single PUT. The `items` array re-uses each existing annotation's URI verbatim as `id` so the server updates in place; replace `` with the URI, `xywh=x,y,w,h` with the exact selector value shown in "Existing lines" (copied verbatim, not recomputed), and `` with the transcription (empty string for fully illegible lines). ``` -PATCH {{pageEndpoint}}/line//text +PUT {{pageEndpoint}} Authorization: Bearer {{token}} -Content-Type: text/plain - - +Content-Type: application/json + +{ + "items": [ + { + "id": "", + "type": "Annotation", + "@context": "http://www.w3.org/ns/anno.jsonld", + "body": [{ "type": "TextualBody", "value": "", "format": "text/plain" }], + "target": { + "source": "{{canvasId}}", + "type": "SpecificResource", + "selector": { + "type": "FragmentSelector", + "conformsTo": "http://www.w3.org/TR/media-frags/", + "value": "xywh=x,y,w,h" + } + }, + "motivation": "transcribing" + } + ] +} ``` -`` is the trailing path segment of the annotation URI listed above (the last `/`-separated segment). +## Fallback -On any non-2xx response, include the HTTP status and response body in that line's failure report. +When the direct PUT is impossible or returns non-2xx, emit the `{ "items": [...] }` body from TPEN API as the final code block of your report. It must be valid JSON (no comments, no placeholders — substitute the real URIs, xywh selectors, and recognized text). The user will paste it into the TPEN splitscreen tool, which submits it with their authorized token. ## Completion -On success, report: +Direct PUT path, report: -- operation: `PATCH line text` -- target: {{pageEndpoint}}/line//text per line -- count: number of lines updated +- operation: `PUT page` +- target: {{pageEndpoint}} +- counts: lines updated, lines flagged illegible -On failure, report: +Fallback path, report: -- the failing stage (image fetch, recognition, PATCH, etc.) -- HTTP status and error body if applicable -- the line id(s) affected and a recommended next step +- path: `fallback` +- counts: lines in payload, lines flagged illegible +- HTTP status and error body if a PUT was attempted first +- final code block: the full `{ "items": [...] }` JSON for the user to paste From d99910469213a8c78534d60976e1573182cb2f52 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Thu, 23 Apr 2026 14:56:17 -0500 Subject: [PATCH 14/47] recovering --- templates/transcribe-known-lines/PROMPT.md | 41 +++++++++++++--------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/templates/transcribe-known-lines/PROMPT.md b/templates/transcribe-known-lines/PROMPT.md index 272b43d..33b3fd0 100644 --- a/templates/transcribe-known-lines/PROMPT.md +++ b/templates/transcribe-known-lines/PROMPT.md @@ -13,7 +13,7 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Existing lines -Each entry is `: ` in canvas coordinates. Use the full annotation URI verbatim as the `id` of each item in the PUT payload; the server preserves these ids and updates only the body text. +Each entry is `: ` in canvas coordinates. The fallback payload re-uses the full annotation URI verbatim as the `id` of each item; the server preserves these ids and updates only the body text. {{existingLines}} @@ -24,7 +24,7 @@ All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, You must have: 1. Vision capability: fetch each line's region as image bytes (e.g. via a IIIF region URL) and read the inked glyphs directly. A fetcher that returns only a prose description of the image does not qualify, and any preview rendered back into chat is downsampled — do not transcribe from a preview. **If you cannot read image bytes directly with the capabilities already available to you, stop now and return a failure report naming the missing capability.** This precondition is hard — fallback does not rescue missing vision. -2. Either HTTP PUT capability with `Content-Type: application/json`, or the ability to emit the payload as a fallback JSON code block in your report. If HTTP PUT is not available, skip straight to the Fallback section — do not retry. +2. Either HTTP PATCH capability (with `Content-Type: text/plain`), or the ability to emit a fallback JSON code block in your report. If HTTP PATCH is not available, skip straight to the Fallback section — do not retry. Use only tools already available in your environment. Do not install packages, libraries, or system utilities. @@ -37,9 +37,8 @@ Use only tools already available in your environment. Do not install packages, l - `pixel_h = round(canvas_h * img_h / {{canvasHeight}})` Crop each line region and verify it visibly contains a single line of inked text. 2. Run handwriting text recognition over each crop. Apply the recognition rules below. -3. Build the `{ "items": [...] }` payload described under TPEN API. There is exactly one item per entry in "Existing lines", each item re-using that entry's annotation URI verbatim as its `id`, preserving its `target` (the `xywh` selector shown above) unchanged, and carrying the recognized text as the `TextualBody` value. Item order must match the order of "Existing lines" — do not reorder. -4. If HTTP PUT is available, send the request once. On any non-2xx response, do not retry — fall back. If HTTP PUT is unavailable from the start, go directly to the fallback. -5. Report counts (lines updated/in payload, lines flagged illegible) and which path was used. +3. If HTTP PATCH is available, PATCH the text to each line's line-text endpoint — one PATCH per line in the "Existing lines" list. On any non-2xx, record the status and continue with the remaining lines. If HTTP PATCH is unavailable from the start, go directly to the fallback. +4. Report counts (lines updated, lines flagged illegible, lines failed) and which path was used. ## Rules @@ -47,15 +46,27 @@ Use only tools already available in your environment. Do not install packages, l - Use explicit uncertainty markers for unclear glyphs (for example `[a?]`). Do not force certainty. - Do not invent expansions. If an abbreviation mark is present, transcribe the mark; do not silently expand. - Keep line segmentation stable — one transcription string per existing line annotation. -- If a line's crop is illegible, emit the item with an empty `TextualBody` value and report the line id as unresolved — do not fabricate text, and do not drop the item from `items`. +- If a line's crop is illegible, send an empty body (direct) or emit an empty `TextualBody` value (fallback) and report the line id as unresolved — do not fabricate text. In the fallback payload, do not drop the item. ## TPEN API -Save every transcription in a single PUT. The `items` array re-uses each existing annotation's URI verbatim as `id` so the server updates in place; replace `` with the URI, `xywh=x,y,w,h` with the exact selector value shown in "Existing lines" (copied verbatim, not recomputed), and `` with the transcription (empty string for fully illegible lines). +Update one line's text via PATCH with a plain-text body. `` is the trailing path segment of the annotation URI listed above (the last `/`-separated segment). ``` -PUT {{pageEndpoint}} +PATCH {{pageEndpoint}}/line//text Authorization: Bearer {{token}} +Content-Type: text/plain + + +``` + +## Fallback + +The fallback tool only accepts JSON, so it uses a single page-level PUT instead of per-line PATCH. When PATCH is unavailable or every attempt returned non-2xx, emit the payload below as the final code block of your report. There must be exactly one item per entry in "Existing lines", each re-using that entry's annotation URI verbatim as its `id`, preserving its `target` (the `xywh` selector shown above) unchanged, and carrying the recognized text as the `TextualBody` value (empty string for fully illegible lines). Item order must match the order of "Existing lines" — do not reorder. It must be valid JSON (no comments, no placeholders — substitute the real URIs, xywh selectors, and recognized text). + +``` +PUT {{pageEndpoint}} +Authorization: Bearer Content-Type: application/json { @@ -80,21 +91,19 @@ Content-Type: application/json } ``` -## Fallback - -When the direct PUT is impossible or returns non-2xx, emit the `{ "items": [...] }` body from TPEN API as the final code block of your report. It must be valid JSON (no comments, no placeholders — substitute the real URIs, xywh selectors, and recognized text). The user will paste it into the TPEN splitscreen tool, which submits it with their authorized token. +The user will paste this into the TPEN splitscreen tool, which submits it with their authorized token. ## Completion -Direct PUT path, report: +Direct PATCH path, report: -- operation: `PUT page` -- target: {{pageEndpoint}} -- counts: lines updated, lines flagged illegible +- operation: `PATCH line text` +- target: {{pageEndpoint}}/line//text per line +- counts: lines updated, lines flagged illegible, lines failed (with HTTP status per failure) Fallback path, report: - path: `fallback` - counts: lines in payload, lines flagged illegible -- HTTP status and error body if a PUT was attempted first +- HTTP status and error body if a PATCH was attempted first - final code block: the full `{ "items": [...] }` JSON for the user to paste From a9ad8bb0e8c94773d5263672bdf7c82550162f31 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Thu, 23 Apr 2026 15:07:50 -0500 Subject: [PATCH 15/47] changes during review --- styles.css | 6 ++++-- tpen-service.js | 7 ++++--- ui-manager.js | 39 ++++++++++++++++++++++++++------------- 3 files changed, 34 insertions(+), 18 deletions(-) diff --git a/styles.css b/styles.css index a6fbd39..ac3cc8f 100644 --- a/styles.css +++ b/styles.css @@ -115,7 +115,8 @@ button:hover:not(:disabled) { filter: brightness(1.05); } button:focus-visible, input:focus-visible, select:focus-visible, -textarea:focus-visible { +textarea:focus-visible, +summary:focus-visible { outline: 2px solid var(--accent); outline-offset: 2px; } @@ -177,7 +178,8 @@ textarea:focus-visible { color: var(--muted); padding: 0.25rem 0; } -.fallback summary:hover { color: var(--text); } +.fallback summary:hover, +.fallback summary:focus-visible { color: var(--text); } .fallback .hint { margin: 0.5rem 0; font-size: 0.85rem; diff --git a/tpen-service.js b/tpen-service.js index ab1c2bd..f4539f8 100644 --- a/tpen-service.js +++ b/tpen-service.js @@ -30,14 +30,15 @@ async function authedJson(path, method, body, token) { if (body !== undefined) init.body = JSON.stringify(body) const res = await fetch(`${CONFIG.servicesURL}${path}`, init) if (!res.ok) { - const detail = await res.text().catch(() => '') - const err = new Error(`${res.status} ${res.statusText} — ${path}${detail ? `: ${detail}` : ''}`) + const detail = await res.json().catch(() => null) + const msg = detail?.message ?? res.statusText + const err = new Error(`${res.status} ${path}: ${msg}`) err.status = res.status throw err } const text = await res.text() if (!text) return null - try { return JSON.parse(text) } catch { return text } + return JSON.parse(text) } /** diff --git a/ui-manager.js b/ui-manager.js index 654426f..3f959d1 100644 --- a/ui-manager.js +++ b/ui-manager.js @@ -71,6 +71,8 @@ export class UIManager { #feedbackTimer = null /** Pending timer for clearing the fallback panel feedback message. */ #fallbackFeedbackTimer = null + /** Fallback-panel submit button; toggled by `updateToken`. */ + #fallbackSubmit = null /** * @param {string} [rootId='app'] id of the element to render into. @@ -245,32 +247,36 @@ export class UIManager { } /** - * Build the paste-JSON fallback panel. Shown inside the workspace body so - * it inherits the token-gate (hidden until `token` is present). Enables the - * submit button only when `projectID` and `pageID` are also held, since the - * dispatcher always targets a specific page. + * Build the paste-JSON fallback panel. The submit button requires + * `projectID`, `pageID`, AND `token` — `renderWorkspace` still draws the + * body (minus generate/copy) when the token is absent and shows an auth + * button, so the panel cannot rely on a workspace-level token gate. + * `updateToken` flips the submit-disabled state when the token arrives + * after the panel was built. * @returns {HTMLElement} */ #buildFallbackPanel() { - const { projectID, pageID } = this.state - const ready = Boolean(projectID && pageID) + const { projectID, pageID, token } = this.state + const hasPage = Boolean(projectID && pageID) + const ready = hasPage && Boolean(token) const textarea = el('textarea', { - id: 'fallback-input', rows: 10, spellcheck: false, autocomplete: 'off', + rows: 10, spellcheck: false, autocomplete: 'off', placeholder: '{ "items": [ … ] }\nor\n{ "label": "Column A", "annotations": ["…"] }\nor\n[ { "label": "…", "annotations": ["…"] }, … ]', attrs: { 'aria-label': 'JSON payload to submit to TPEN' } }) const submit = el('button', { - type: 'button', id: 'fallback-submit', + type: 'button', text: 'Submit to TPEN', disabled: !ready }) + this.#fallbackSubmit = submit const feedback = el('span', { class: 'feedback', attrs: { 'aria-live': 'polite' } }) submit.addEventListener('click', () => this.#onFallbackSubmit(textarea, submit, feedback)) const children = [ el('summary', { text: 'Paste JSON from LLM (fallback)' }), el('p', { class: 'hint', text: 'Use this when your chat LLM produced the JSON payload but could not call the TPEN API itself. The tool will submit it using the token you authorized.' }) ] - if (!ready) children.push(el('p', { class: 'hint', text: 'Needs a page context before submission is possible.' })) + if (!hasPage) children.push(el('p', { class: 'hint', text: 'Needs a page context before submission is possible.' })) children.push(textarea, el('div', { class: 'controls' }, [submit, feedback])) return el('details', { class: 'fallback' }, children) } @@ -325,10 +331,12 @@ export class UIManager { } const result = await putPage(projectID, pageID, payload, token) const saved = payload.items.length - textarea.value = result && typeof result === 'object' - ? JSON.stringify(result, null, 2) - : '' - setFeedback(`Saved ${saved} line item${saved === 1 ? '' : 's'}. Server response (with ids) is in the textarea.`, true) + if (result && typeof result === 'object') { + textarea.value = JSON.stringify(result, null, 2) + setFeedback(`Saved ${saved} line item${saved === 1 ? '' : 's'}. Server response (with ids) is in the textarea.`, true) + } else { + setFeedback(`Saved ${saved} line item${saved === 1 ? '' : 's'}. Server returned no body; pasted payload left in the textarea.`, true) + } return } if (payload && typeof payload === 'object' && !Array.isArray(payload) @@ -381,6 +389,7 @@ export class UIManager { return } if (this.#generateBtn) this.#generateBtn.disabled = true + if (this.#fallbackSubmit) this.#fallbackSubmit.disabled = true if (this.#workspaceBody) this.#workspaceBody.hidden = true return } @@ -389,6 +398,10 @@ export class UIManager { this.#authButton = null } if (this.#generateBtn) this.#generateBtn.disabled = false + if (this.#fallbackSubmit) { + const { projectID, pageID } = this.state + this.#fallbackSubmit.disabled = !(projectID && pageID) + } if (this.#workspaceBody) this.#workspaceBody.hidden = false } From 99623b1c625f6c426ac6dd0238fc26ab4f1861d2 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Thu, 23 Apr 2026 15:45:54 -0500 Subject: [PATCH 16/47] changes during review --- auth.js | 20 ------------------ templates/detect-and-transcribe/PROMPT.md | 2 -- templates/detect-columns-and-lines/PROMPT.md | 2 -- templates/detect-columns/PROMPT.md | 10 +++++---- templates/detect-lines/PROMPT.md | 2 -- .../detect-order-and-transcribe/PROMPT.md | 2 -- templates/inject-context.js | 21 +++++++++++-------- templates/transcribe-known-lines/PROMPT.md | 4 +--- tpen-service.js | 10 --------- ui-manager.js | 16 +++++++------- 10 files changed, 28 insertions(+), 61 deletions(-) diff --git a/auth.js b/auth.js index 81f4ec8..516e36b 100644 --- a/auth.js +++ b/auth.js @@ -99,23 +99,3 @@ export function persistToken(token) { localStorage.setItem(TOKEN_KEY, token) return token } - -/** - * Extract the agent IRI from a TPEN JWT. - * - * Mirrors tpen3-interfaces/components/iiif-tools/index.js:getAgentIRIFromToken. - * The agent IRI lives in a custom claim whose key ends with `/agent` - * (typically `http://store.rerum.io/agent`). - * @param {string} token - * @returns {string|null} - */ -export function getAgentIRIFromToken(token) { - try { - const decoded = decodeJwt(token) - if (!decoded || typeof decoded !== 'object') return null - const key = Object.keys(decoded).find(k => k.endsWith('/agent')) || 'http://store.rerum.io/agent' - return decoded[key] ?? null - } catch { - return null - } -} diff --git a/templates/detect-and-transcribe/PROMPT.md b/templates/detect-and-transcribe/PROMPT.md index 7bbcf46..1f227c3 100644 --- a/templates/detect-and-transcribe/PROMPT.md +++ b/templates/detect-and-transcribe/PROMPT.md @@ -67,8 +67,6 @@ Content-Type: application/json { "items": [ { - "type": "Annotation", - "@context": "http://www.w3.org/ns/anno.jsonld", "body": [{ "type": "TextualBody", "value": "", "format": "text/plain" }], "target": { "source": "{{canvasId}}", diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index a38f11c..2d2680e 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -63,8 +63,6 @@ Content-Type: application/json { "items": [ { - "type": "Annotation", - "@context": "http://www.w3.org/ns/anno.jsonld", "body": [], "target": { "source": "{{canvasId}}", diff --git a/templates/detect-columns/PROMPT.md b/templates/detect-columns/PROMPT.md index 18bcab5..c251f04 100644 --- a/templates/detect-columns/PROMPT.md +++ b/templates/detect-columns/PROMPT.md @@ -17,7 +17,7 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Existing lines -Each entry is `: ` in canvas coordinates. Use the full annotation URI verbatim when assigning lines to columns. +Each entry is ` | xywh= | body=` in canvas coordinates. Use the full annotation URI verbatim when assigning lines to columns, and echo each line's `body` JSON verbatim in the PUT — the services API overwrites `body` with `[]` when an item omits it. {{existingLines}} @@ -44,7 +44,7 @@ Use only tools already available in your environment. Do not install packages, l Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). 4. For each detected column, determine which of the existing line ids (from the list above) belong to it. Assign a line to the column whose canvas-space region contains the center point of the line's `xywh`. If a line's center falls outside every detected column, assign it to the nearest column by Euclidean distance from the center point to the column's region (distance `0` when the point is inside). Each line belongs to exactly one column. 5. Build a global reading-order sequence of all existing line ids: columns in reading order; within each column, lines sorted top-to-bottom by the `xywh` y-center. -6. Build the `{ "items": [...] }` payload described under TPEN API from that sequence. Each `items` entry re-uses the existing annotation URI verbatim as its `id` — the server preserves ids (and any already-attached body text) rather than minting new ones. +6. Build the `{ "items": [...] }` payload described under TPEN API from that sequence. Each `items` entry re-uses the existing annotation URI verbatim as its `id`, its `target` verbatim, and its `body` JSON verbatim from the "Existing lines" list — the services API replaces `body` with `[]` when an item omits it, so the echo is required to keep existing transcriptions. 7. Build the column payload `[{ label, annotations }, ...]` where each `annotations` array is the contiguous slice of the reading-order id sequence that belongs to that column. Choose a unique label per column (e.g., `Column A`, `Column B`) that does not clash with any label under "Existing columns on this page". Both the direct and fallback paths use the same ids — the existing annotation URIs listed above — so the same column payload works in either path. 8. If HTTP PUT and POST are available: PUT the page once, then POST each column once. On any non-2xx, stop and fall back for whatever has not yet persisted. Otherwise go directly to the Fallback. 9. Report counts (columns created or in payload) and which path was used. @@ -58,11 +58,12 @@ Use only tools already available in your environment. Do not install packages, l - Annotations cannot be assigned to more than one column. If a line clearly sits in an existing column, do not reassign it. - Do not POST a column with an empty `annotations` array — the server rejects it. Skip any detected column that ends up with zero assigned lines. - The PUT `items` order defines the page's reading order; column `annotations` slices must match that same order. -- The PUT must carry every existing line id exactly once. Do not drop, duplicate, or mint new ids; do not modify `body` or `target`. +- The PUT must carry every existing line id exactly once. Do not drop, duplicate, or mint new ids. +- Echo each line's existing `body` unchanged. Do not add, remove, or edit any `TextualBody` value. Echo each line's existing `target` unchanged. ## TPEN API -First, reorder the page's line list via a single PUT. The `items` array must contain every existing line — each entry carrying the existing annotation URI verbatim as `id` — in the reading-order sequence from step 5. Reuse each line's original `target` (the `xywh` selector listed under "Existing lines") unchanged. +First, reorder the page's line list via a single PUT. The `items` array must contain every existing line — each entry carrying the existing annotation URI verbatim as `id`, its `body` JSON verbatim from the "Existing lines" list, and its original `target` — in the reading-order sequence from step 5. ``` PUT {{pageEndpoint}} @@ -75,6 +76,7 @@ Content-Type: application/json "id": "", "type": "Annotation", "@context": "http://www.w3.org/ns/anno.jsonld", + "body": , "target": { "source": "{{canvasId}}", "type": "SpecificResource", diff --git a/templates/detect-lines/PROMPT.md b/templates/detect-lines/PROMPT.md index 9df5b6e..ebe08a3 100644 --- a/templates/detect-lines/PROMPT.md +++ b/templates/detect-lines/PROMPT.md @@ -54,8 +54,6 @@ Content-Type: application/json { "items": [ { - "type": "Annotation", - "@context": "http://www.w3.org/ns/anno.jsonld", "body": [], "target": { "source": "{{canvasId}}", diff --git a/templates/detect-order-and-transcribe/PROMPT.md b/templates/detect-order-and-transcribe/PROMPT.md index 2c6a137..5c27a50 100644 --- a/templates/detect-order-and-transcribe/PROMPT.md +++ b/templates/detect-order-and-transcribe/PROMPT.md @@ -67,8 +67,6 @@ Content-Type: application/json { "items": [ { - "type": "Annotation", - "@context": "http://www.w3.org/ns/anno.jsonld", "body": [{ "type": "TextualBody", "value": "", "format": "text/plain" }], "target": { "source": "{{canvasId}}", diff --git a/templates/inject-context.js b/templates/inject-context.js index eebd863..205af71 100644 --- a/templates/inject-context.js +++ b/templates/inject-context.js @@ -41,13 +41,12 @@ function canvasDimensions(canvas) { * @returns {Record} */ export function buildTemplateContext(ctx) { - const { canvas, project, page, projectID, pageID, projectEndpoint, pageEndpoint, token } = ctx + const { canvas, page, projectID, pageID, pageEndpoint, token } = ctx const canvasId = getIRI(canvas) ?? '(unknown canvas id)' const imageUrl = extractImageUrl(canvas) ?? '(no image body found on canvas)' const { width, height } = canvasDimensions(canvas) const canvasWidth = width != null ? String(width) : '(unknown)' const canvasHeight = height != null ? String(height) : '(unknown)' - const dims = (width && height) ? `${width} × ${height}` : 'unknown (use the IIIF Image API info.json)' const lineCount = Array.isArray(page?.items) ? page.items.length : 0 return { projectID: projectID ?? '', @@ -56,9 +55,7 @@ export function buildTemplateContext(ctx) { imageUrl, canvasWidth, canvasHeight, - dims, lineCount: String(lineCount), - projectEndpoint: projectEndpoint ?? '(unknown project endpoint)', pageEndpoint: pageEndpoint ?? '(unknown page endpoint)', token: token ?? '' } @@ -87,10 +84,15 @@ function extractXywh(item) { /** * Render the current line annotations on a page as a markdown bullet list - * keyed by full annotation URI and xywh selector. Pre-resolving this list in - * the parent saves the LLM a GET + parse round trip. Column POSTs require the - * full URI to match `page.items[].id` server-side; PATCH-line-text consumers - * can split the URI's trailing segment themselves. + * carrying the fields needed to echo each line back in a page PUT without + * losing data. Pre-resolving this list in the parent saves the LLM a GET + + * parse round trip. Column POSTs require the full URI to match + * `page.items[].id` server-side; PATCH-line-text consumers can split the + * URI's trailing segment themselves. + * + * Body is included verbatim (as compact JSON) because the services API + * replaces `body` with `[]` if the PUT item omits it — echoing the existing + * body prevents accidental transcription wipes. * @param {any} fetchedPage the page object returned by `fetchPageResolved`. * @returns {string} */ @@ -102,7 +104,8 @@ export function formatExistingLines(fetchedPage) { return items.map(item => { const lineUri = getIRI(item) ?? '(unknown)' const xywh = extractXywh(item) ?? '(no xywh selector)' - return `- ${lineUri}: ${xywh}` + const body = JSON.stringify(item?.body ?? []) + return `- ${lineUri} | xywh=${xywh} | body=${body}` }).join('\n') } diff --git a/templates/transcribe-known-lines/PROMPT.md b/templates/transcribe-known-lines/PROMPT.md index 33b3fd0..3736a06 100644 --- a/templates/transcribe-known-lines/PROMPT.md +++ b/templates/transcribe-known-lines/PROMPT.md @@ -13,7 +13,7 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Existing lines -Each entry is `: ` in canvas coordinates. The fallback payload re-uses the full annotation URI verbatim as the `id` of each item; the server preserves these ids and updates only the body text. +Each entry is ` | xywh= | body=` in canvas coordinates. The fallback payload re-uses the full annotation URI verbatim as the `id` of each item; the server preserves these ids and updates only the body text. {{existingLines}} @@ -73,8 +73,6 @@ Content-Type: application/json "items": [ { "id": "", - "type": "Annotation", - "@context": "http://www.w3.org/ns/anno.jsonld", "body": [{ "type": "TextualBody", "value": "", "format": "text/plain" }], "target": { "source": "{{canvasId}}", diff --git a/tpen-service.js b/tpen-service.js index f4539f8..71bda03 100644 --- a/tpen-service.js +++ b/tpen-service.js @@ -108,16 +108,6 @@ export function postColumn(projectID, pageID, body, token) { ) } -/** - * Build the project endpoint URL (project/index.js). Templates use this for - * best-effort GETs that verify project-level state mid-task. - * @param {string} projectID - * @returns {string} absolute URL. - */ -export function projectEndpoint(projectID) { - return `${CONFIG.servicesURL}/project/${encodeURIComponent(projectID)}` -} - /** * Build the page endpoint URL (page/index.js). Templates use this for PUT/PATCH * operations that target the page or its sub-resources (lines, columns). diff --git a/ui-manager.js b/ui-manager.js index 3f959d1..39d1795 100644 --- a/ui-manager.js +++ b/ui-manager.js @@ -9,7 +9,7 @@ */ import { listTemplates, renderTemplate } from './prompt-generator.js' -import { pageEndpoint, projectEndpoint, putPage, postColumn } from './tpen-service.js' +import { pageEndpoint, putPage, postColumn } from './tpen-service.js' import { trailingId } from './iiif-ids.js' /** @@ -210,10 +210,10 @@ export class UIManager { select.append(el('option', { value: t.id, text: t.label })) } - // Prompts embed the auth token; generating before consent yields an - // unusable prompt (templates render "(unable to resolve agent IRI…)"). - // Gate Generate on token presence and nudge the user toward the consent - // button in the header. + // Prompts embed the auth token in `{{token}}`; generating before + // consent yields a prompt whose Authorization header is `Bearer ` with + // nothing after it. Gate Generate on token presence and nudge the user + // toward the consent button in the header. const generateBtn = el('button', { type: 'button', id: 'generate-btn', text: 'Generate prompt', disabled: !token @@ -329,7 +329,10 @@ export class UIManager { return } } - const result = await putPage(projectID, pageID, payload, token) + // Narrow to the minimal PUT body the services API needs. + // Top-level keys beyond `items` would otherwise be applied to + // the page record by the server's property-copy loop. + const result = await putPage(projectID, pageID, { items: payload.items }, token) const saved = payload.items.length if (result && typeof result === 'object') { textarea.value = JSON.stringify(result, null, 2) @@ -442,7 +445,6 @@ export class UIManager { projectID: s.projectID, pageID: s.pageID, layerID: s.layerID, columnID: s.columnID, lineID: s.lineID, token: s.token, - projectEndpoint: s.projectID ? projectEndpoint(s.projectID) : null, pageEndpoint: (s.projectID && s.pageID) ? pageEndpoint(s.projectID, s.pageID) : null }) this.#fullPrompt = full From 6114cd6f0d33d1625291454fd1987f1ce86a35e9 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Thu, 23 Apr 2026 16:05:10 -0500 Subject: [PATCH 17/47] changes during review --- templates/detect-columns-and-lines/PROMPT.md | 2 +- templates/detect-columns/PROMPT.md | 18 ++++++---- templates/inject-context.js | 38 +++++++++++++++++--- templates/transcribe-known-lines/PROMPT.md | 2 +- tpen-service.js | 11 ++++-- ui-manager.js | 19 +++++++--- 6 files changed, 70 insertions(+), 20 deletions(-) diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index 2d2680e..c5933e7 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -111,4 +111,4 @@ Fallback path, report: - counts: lines in payload, columns not yet created - HTTP status and error body if a request was attempted first - final code block: the full `{ "items": [...] }` JSON for the user to paste -- next step: re-run with `detect-columns` after the items paste succeeds, to create the columns +- next step: re-run with `detect-columns` after the items paste succeeds, to create the columns. If any columns were already created before the failure (list their labels in the report), the follow-up run must avoid duplicating those labels — `detect-columns` receives the "Existing columns on this page" list and will honour it. diff --git a/templates/detect-columns/PROMPT.md b/templates/detect-columns/PROMPT.md index c251f04..061ee0c 100644 --- a/templates/detect-columns/PROMPT.md +++ b/templates/detect-columns/PROMPT.md @@ -17,7 +17,13 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Existing lines -Each entry is ` | xywh= | body=` in canvas coordinates. Use the full annotation URI verbatim when assigning lines to columns, and echo each line's `body` JSON verbatim in the PUT — the services API overwrites `body` with `[]` when an item omits it. +Each entry is ` | xywh= | ` in canvas coordinates. Use the full annotation URI verbatim when assigning lines to columns, and reconstruct each line's `body` verbatim in the PUT — the services API overwrites `body` with `[]` when an item omits it. + +The body form is one of: + +- `body=[]` — echo as `[]`. +- `text=""` — echo as `[{ "type": "TextualBody", "value": , "format": "text/plain" }]`. +- `body=` — echo the JSON verbatim. {{existingLines}} @@ -44,7 +50,7 @@ Use only tools already available in your environment. Do not install packages, l Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). 4. For each detected column, determine which of the existing line ids (from the list above) belong to it. Assign a line to the column whose canvas-space region contains the center point of the line's `xywh`. If a line's center falls outside every detected column, assign it to the nearest column by Euclidean distance from the center point to the column's region (distance `0` when the point is inside). Each line belongs to exactly one column. 5. Build a global reading-order sequence of all existing line ids: columns in reading order; within each column, lines sorted top-to-bottom by the `xywh` y-center. -6. Build the `{ "items": [...] }` payload described under TPEN API from that sequence. Each `items` entry re-uses the existing annotation URI verbatim as its `id`, its `target` verbatim, and its `body` JSON verbatim from the "Existing lines" list — the services API replaces `body` with `[]` when an item omits it, so the echo is required to keep existing transcriptions. +6. Build the `{ "items": [...] }` payload described under TPEN API from that sequence. Each `items` entry re-uses the existing annotation URI verbatim as its `id`, its `target` verbatim, and its `body` reconstructed from the entry's body form (see "Existing lines") — the services API replaces `body` with `[]` when an item omits it, so the reconstruction is required to keep existing transcriptions. 7. Build the column payload `[{ label, annotations }, ...]` where each `annotations` array is the contiguous slice of the reading-order id sequence that belongs to that column. Choose a unique label per column (e.g., `Column A`, `Column B`) that does not clash with any label under "Existing columns on this page". Both the direct and fallback paths use the same ids — the existing annotation URIs listed above — so the same column payload works in either path. 8. If HTTP PUT and POST are available: PUT the page once, then POST each column once. On any non-2xx, stop and fall back for whatever has not yet persisted. Otherwise go directly to the Fallback. 9. Report counts (columns created or in payload) and which path was used. @@ -63,7 +69,7 @@ Use only tools already available in your environment. Do not install packages, l ## TPEN API -First, reorder the page's line list via a single PUT. The `items` array must contain every existing line — each entry carrying the existing annotation URI verbatim as `id`, its `body` JSON verbatim from the "Existing lines" list, and its original `target` — in the reading-order sequence from step 5. +First, reorder the page's line list via a single PUT. The `items` array must contain every existing line — each entry carrying the existing annotation URI verbatim as `id`, its `body` reconstructed from the entry's body form in "Existing lines", and its original `target` — in the reading-order sequence from step 5. ``` PUT {{pageEndpoint}} @@ -74,9 +80,7 @@ Content-Type: application/json "items": [ { "id": "", - "type": "Annotation", - "@context": "http://www.w3.org/ns/anno.jsonld", - "body": , + "body": [], "target": { "source": "{{canvasId}}", "type": "SpecificResource", @@ -92,6 +96,8 @@ Content-Type: application/json } ``` +The example above shows an empty body. Replace `[]` with the reconstructed body for each line (from its `body=…` / `text=…` form). + Then create one POST per detected column. Each `annotations` array is a contiguous slice of the reading-order id sequence, taken verbatim from the "Existing lines" list. ``` diff --git a/templates/inject-context.js b/templates/inject-context.js index 205af71..c2be8e3 100644 --- a/templates/inject-context.js +++ b/templates/inject-context.js @@ -82,6 +82,35 @@ function extractXywh(item) { return raw ? raw.replace(/^xywh=pixel:/, 'xywh=') : null } +/** + * Summarize a line's body for the "Existing lines" listing. + * + * Three forms, chosen to keep the listing compact while still letting PUT + * consumers reconstruct an existing body verbatim (the services API replaces + * `body` with `[]` when a PUT item omits it): + * + * - `body=[]` — empty body; echo as `[]`. + * - `text="…"` — single plain-text `TextualBody`; echo as + * `[{ "type": "TextualBody", "value": , "format": "text/plain" }]`. + * The common case, so it's worth the shorter display. + * - `body=` — anything else; echo the JSON verbatim. + * @param {any} body an annotation `body` value. + * @returns {string} + */ +function formatBody(body) { + if (!Array.isArray(body) || body.length === 0) return 'body=[]' + if (body.length === 1) { + const only = body[0] + const isPlainTextual = only + && typeof only === 'object' + && only.type === 'TextualBody' + && typeof only.value === 'string' + && (only.format === undefined || only.format === 'text/plain') + if (isPlainTextual) return `text=${JSON.stringify(only.value)}` + } + return `body=${JSON.stringify(body)}` +} + /** * Render the current line annotations on a page as a markdown bullet list * carrying the fields needed to echo each line back in a page PUT without @@ -90,9 +119,9 @@ function extractXywh(item) { * `page.items[].id` server-side; PATCH-line-text consumers can split the * URI's trailing segment themselves. * - * Body is included verbatim (as compact JSON) because the services API - * replaces `body` with `[]` if the PUT item omits it — echoing the existing - * body prevents accidental transcription wipes. + * Each entry exposes the body as one of three forms — `body=[]`, `text="…"`, + * or `body=` — consumed by the `detect-columns` and + * `transcribe-known-lines` prompts, which document how to reconstruct each. * @param {any} fetchedPage the page object returned by `fetchPageResolved`. * @returns {string} */ @@ -104,8 +133,7 @@ export function formatExistingLines(fetchedPage) { return items.map(item => { const lineUri = getIRI(item) ?? '(unknown)' const xywh = extractXywh(item) ?? '(no xywh selector)' - const body = JSON.stringify(item?.body ?? []) - return `- ${lineUri} | xywh=${xywh} | body=${body}` + return `- ${lineUri} | xywh=${xywh} | ${formatBody(item?.body)}` }).join('\n') } diff --git a/templates/transcribe-known-lines/PROMPT.md b/templates/transcribe-known-lines/PROMPT.md index 3736a06..65bf522 100644 --- a/templates/transcribe-known-lines/PROMPT.md +++ b/templates/transcribe-known-lines/PROMPT.md @@ -13,7 +13,7 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Existing lines -Each entry is ` | xywh= | body=` in canvas coordinates. The fallback payload re-uses the full annotation URI verbatim as the `id` of each item; the server preserves these ids and updates only the body text. +Each entry is ` | xywh= | ` in canvas coordinates. The body form is `body=[]` (empty), `text=""` (single plain-text `TextualBody`), or `body=` (anything else) — use it as context for what's already on the line. The fallback payload re-uses the full annotation URI verbatim as the `id` of each item; the server preserves these ids and updates only the body text. {{existingLines}} diff --git a/tpen-service.js b/tpen-service.js index 71bda03..79dceae 100644 --- a/tpen-service.js +++ b/tpen-service.js @@ -30,8 +30,10 @@ async function authedJson(path, method, body, token) { if (body !== undefined) init.body = JSON.stringify(body) const res = await fetch(`${CONFIG.servicesURL}${path}`, init) if (!res.ok) { - const detail = await res.json().catch(() => null) - const msg = detail?.message ?? res.statusText + // TPEN services always emit JSON errors (see tpen3-services + // utilities/shared.js#respondWithError and utilities/routeErrorHandler.js). + const detail = await res.json().catch(() => ({})) + const msg = detail.message ?? detail.error ?? res.statusText const err = new Error(`${res.status} ${path}: ${msg}`) err.status = res.status throw err @@ -78,6 +80,11 @@ export function fetchPageResolved(projectID, pageID, token) { * PUT a page body (`{ items: [...] }`). Used by the fallback JSON-paste flow * when the user's LLM cannot issue writes itself. Items may be new (no `id`, * or a non-http local id) or updates (item `id` is the line's full IRI). + * + * Note: items whose `body` is omitted get `body=[]` on the server — the Line + * class sets `body: this.body ?? []` before saving, which spreads over the existing + * RERUM document. Echo each existing item's body back to preserve its + * transcription. * @param {string} projectID * @param {string} pageID * @param {{ items: Array }} body diff --git a/ui-manager.js b/ui-manager.js index 39d1795..0c34ac6 100644 --- a/ui-manager.js +++ b/ui-manager.js @@ -294,7 +294,14 @@ export class UIManager { async #onFallbackSubmit(textarea, button, feedback) { const { projectID, pageID, token } = this.state const raw = textarea.value.trim() + // `renderWorkspace` can re-run mid-submit (e.g., token changes via + // `updateToken` during an await), detaching the nodes this handler + // closed over. Guard each UI write so a detached panel doesn't get + // silent stale mutations. + const alive = () => textarea.isConnected + const writeTextarea = (val) => { if (alive()) textarea.value = val } const setFeedback = (msg, autoClear = false) => { + if (!alive()) return feedback.textContent = msg if (this.#fallbackFeedbackTimer) { clearTimeout(this.#fallbackFeedbackTimer) @@ -335,7 +342,7 @@ export class UIManager { const result = await putPage(projectID, pageID, { items: payload.items }, token) const saved = payload.items.length if (result && typeof result === 'object') { - textarea.value = JSON.stringify(result, null, 2) + writeTextarea(JSON.stringify(result, null, 2)) setFeedback(`Saved ${saved} line item${saved === 1 ? '' : 's'}. Server response (with ids) is in the textarea.`, true) } else { setFeedback(`Saved ${saved} line item${saved === 1 ? '' : 's'}. Server returned no body; pasted payload left in the textarea.`, true) @@ -346,7 +353,7 @@ export class UIManager { && typeof payload.label === 'string' && Array.isArray(payload.annotations)) { await postColumn(projectID, pageID, payload, token) setFeedback(`Created column "${payload.label}".`, true) - textarea.value = '' + writeTextarea('') return } if (Array.isArray(payload) && payload.every(c => @@ -355,13 +362,13 @@ export class UIManager { const col = payload[i] try { await postColumn(projectID, pageID, col, token) } catch (err) { - textarea.value = JSON.stringify(payload.slice(i), null, 2) + writeTextarea(JSON.stringify(payload.slice(i), null, 2)) setFeedback(`Created ${i} of ${payload.length} columns; failed on "${col.label}" — ${err.message}. Remaining columns kept in the textarea for retry.`) return } } setFeedback(`Created ${payload.length} column${payload.length === 1 ? '' : 's'}.`, true) - textarea.value = '' + writeTextarea('') return } setFeedback('Unrecognized payload shape — expected `{items: [...]}`, `{label, annotations}`, or an array of `{label, annotations}`.') @@ -369,7 +376,9 @@ export class UIManager { const status = err?.status ? `TPEN API ${err.status}: ` : '' setFeedback(`${status}${err?.message ?? 'Submission failed.'}`) } finally { - button.disabled = !(this.state.projectID && this.state.pageID && this.state.token) + if (button.isConnected) { + button.disabled = !(this.state.projectID && this.state.pageID && this.state.token) + } } } From 31a298318bc12f3720cb5f90be642bb0b2adf9c6 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Thu, 23 Apr 2026 16:28:44 -0500 Subject: [PATCH 18/47] changes during review --- templates/detect-columns-and-lines/PROMPT.md | 12 +-- templates/detect-columns/PROMPT.md | 99 ++++++++++---------- templates/detect-columns/index.js | 8 +- ui-manager.js | 16 ++-- 4 files changed, 65 insertions(+), 70 deletions(-) diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index c5933e7..053c456 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -34,9 +34,9 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). -4. Build the `{ "items": [...] }` payload described under TPEN API. The `items` array MUST be in the global reading-order sequence from step 2 — this fixes the page's canonical line order. Leave `body` empty — no text yet. Track each line's column index (0-based) as you emit items so you can slice them into columns in step 5. -5. If HTTP PUT and POST are available: PUT the items once, then for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of server-returned ids for that column. Labels must be unique and must not clash with anything in "Existing columns on this page". On any non-2xx, stop and fall back for everything not yet persisted. -6. If HTTP PUT/POST are unavailable from the start, go directly to the Fallback — emit the items payload for the user to paste. Column creation in fallback requires a follow-up pass (see Fallback). +4. Build the `{ "items": [...] }` payload described under TPEN API. The `items` array MUST be in the global reading-order sequence from step 2 — this fixes the page's canonical line order. Leave `body` empty — no text yet. +5. If HTTP PUT and POST are available: PUT the items once, then for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The response returns `items` in submission order, so use each line's column index from step 2 to slice the returned ids. Labels must be unique and must not clash with anything in "Existing columns on this page". On any non-2xx, stop and fall back for everything not yet persisted. +6. If HTTP PUT/POST are unavailable from the start, go directly to the Fallback — emit the items payload for the user to paste. Column creation is out of scope for the fallback path. 7. Report counts (lines saved/in payload, columns created/in payload) and which path was used. ## Rules @@ -94,7 +94,7 @@ Content-Type: application/json ## Fallback -When the direct path is unavailable or returns non-2xx, emit only the `{ "items": [...] }` body from TPEN API as the final code block of your report, in the global reading-order sequence from step 2. It must be valid JSON. The user pastes it into the TPEN splitscreen tool, which PUTs it with their authorized token and fills the textarea with the server response (the persisted lines, each with its assigned id). Column creation in fallback is a separate pass — instruct the user to re-run with the `detect-columns` template, which takes the now-persisted lines and emits the `[{label, annotations}, ...]` column payload. +When the direct path is unavailable or returns non-2xx, emit only the `{ "items": [...] }` body from TPEN API as the final code block of your report, in the global reading-order sequence from step 2. It must be valid JSON. The user pastes it into the TPEN splitscreen tool, which PUTs it with their authorized token. Column creation is out of scope for this fallback. ## Completion @@ -108,7 +108,7 @@ Direct path, report: Fallback path, report: - path: `fallback` -- counts: lines in payload, columns not yet created +- counts: lines in payload - HTTP status and error body if a request was attempted first - final code block: the full `{ "items": [...] }` JSON for the user to paste -- next step: re-run with `detect-columns` after the items paste succeeds, to create the columns. If any columns were already created before the failure (list their labels in the report), the follow-up run must avoid duplicating those labels — `detect-columns` receives the "Existing columns on this page" list and will honour it. +- list the labels of any columns already created before the failure, so a follow-up pass can avoid duplicating them. diff --git a/templates/detect-columns/PROMPT.md b/templates/detect-columns/PROMPT.md index 061ee0c..fd7f0ab 100644 --- a/templates/detect-columns/PROMPT.md +++ b/templates/detect-columns/PROMPT.md @@ -1,6 +1,6 @@ -# Task: detect column regions on a TPEN3 page and assign existing lines to them +# Task: order existing lines on a TPEN3 page into reading order and group them into columns -You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services (direct) or emitted as fallback JSON payloads for the user to paste. +You are assisting with TPEN manuscript transcription. This task rebuilds the column layout on a page that already has line annotations. It has no fallback: on any precondition failure, image-analysis failure, or non-2xx response from a TPEN API call, stop and return a failure report. ## Context @@ -17,7 +17,7 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Existing lines -Each entry is ` | xywh= | ` in canvas coordinates. Use the full annotation URI verbatim when assigning lines to columns, and reconstruct each line's `body` verbatim in the PUT — the services API overwrites `body` with `[]` when an item omits it. +Each entry is ` | xywh= | ` in canvas coordinates, printed in the page's current order. Use the full annotation URI verbatim when assigning lines to columns and when echoing lines in the page PUT. Compare the current order against the reading-order sequence you compute in step 5 to decide whether the PUT in step 8 is necessary. The body form is one of: @@ -29,12 +29,12 @@ The body form is one of: ## Preconditions -All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions, existing-line list) are provided above. This template operates on an existing line set: `lineCount` = `{{lineCount}}`. If `lineCount` is `0`, stop immediately and report. +All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions, existing-line list) are provided above. This task operates on an existing line set: `lineCount` = `{{lineCount}}`. If `lineCount` is `0`, stop immediately and return a failure report — this task cannot create lines. You must have: -1. Programmatic pixel access to the full-resolution image — a numeric pixel buffer you can iterate over. A prose description of the image, or any measurement taken from a rendered or previewed image, does not qualify; previews are downsampled and visually estimated bounds will be wrong. **If you cannot obtain pixel data with the capabilities already available to you, stop now and return a failure report naming the missing capability.** This precondition is hard — fallback does not rescue missing vision. -2. Either HTTP PUT and POST capability with `Content-Type: application/json`, or the ability to emit the payloads as fallback JSON code blocks in your report. If either verb is unavailable, skip straight to the Fallback section — do not retry. +1. Programmatic pixel access to the full-resolution image — a numeric pixel buffer you can iterate over. A prose description of the image, or any measurement taken from a rendered or previewed image, does not qualify; previews are downsampled and visually estimated bounds will be wrong. **If you cannot obtain pixel data with the capabilities already available to you, stop now and return a failure report naming the missing capability.** +2. HTTP DELETE, POST, and PUT capability with `Content-Type: application/json` (DELETE carries no body). **If any verb is unavailable, stop now and return a failure report naming the missing capability.** Use only tools already available in your environment. Do not install packages, libraries, or system utilities. @@ -50,26 +50,46 @@ Use only tools already available in your environment. Do not install packages, l Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). 4. For each detected column, determine which of the existing line ids (from the list above) belong to it. Assign a line to the column whose canvas-space region contains the center point of the line's `xywh`. If a line's center falls outside every detected column, assign it to the nearest column by Euclidean distance from the center point to the column's region (distance `0` when the point is inside). Each line belongs to exactly one column. 5. Build a global reading-order sequence of all existing line ids: columns in reading order; within each column, lines sorted top-to-bottom by the `xywh` y-center. -6. Build the `{ "items": [...] }` payload described under TPEN API from that sequence. Each `items` entry re-uses the existing annotation URI verbatim as its `id`, its `target` verbatim, and its `body` reconstructed from the entry's body form (see "Existing lines") — the services API replaces `body` with `[]` when an item omits it, so the reconstruction is required to keep existing transcriptions. -7. Build the column payload `[{ label, annotations }, ...]` where each `annotations` array is the contiguous slice of the reading-order id sequence that belongs to that column. Choose a unique label per column (e.g., `Column A`, `Column B`) that does not clash with any label under "Existing columns on this page". Both the direct and fallback paths use the same ids — the existing annotation URIs listed above — so the same column payload works in either path. -8. If HTTP PUT and POST are available: PUT the page once, then POST each column once. On any non-2xx, stop and fall back for whatever has not yet persisted. Otherwise go directly to the Fallback. -9. Report counts (columns created or in payload) and which path was used. +6. DELETE every existing column on the page (see TPEN API below). On any non-2xx, stop and report. Do not POST or PUT after a DELETE failure. +7. For each detected column, POST `{ label, annotations }` where `annotations` is the contiguous slice of the reading-order id sequence from step 5 that belongs to that column. Choose a unique label per column (e.g., `Column A`, `Column B`) that does not clash with any other label chosen in this run. On any non-2xx, stop and report — columns POSTed before the failure remain persisted. +8. If the reading-order sequence from step 5 differs from the order of "Existing lines", PUT the page with `items` in the new order. Each entry re-uses the existing annotation URI verbatim as its `id`, its `body` reconstructed from the entry's body form, and its `target` rebuilt from the entry's `xywh` selector. The PUT must not change any URI, or the columns POSTed in step 7 will reference dead ids. If the sequence is already in the current order, skip the PUT. On any non-2xx, stop and report. +9. Report: columns deleted, columns created, whether the page order was updated, and per-column line counts. ## Rules - Preserve reading order. Columns proceed as the page is read (left→right for Latin-script layouts; adjust for script tradition). - Prefer high recall: include borderline regions as columns when they contain text rather than silently dropping them. - Keep column boundaries tight enough that each line clearly belongs to one column, but generous enough to avoid clipping existing line selectors. -- Column labels are page-scoped and must be unique. Do not duplicate an existing column label. -- Annotations cannot be assigned to more than one column. If a line clearly sits in an existing column, do not reassign it. -- Do not POST a column with an empty `annotations` array — the server rejects it. Skip any detected column that ends up with zero assigned lines. -- The PUT `items` order defines the page's reading order; column `annotations` slices must match that same order. -- The PUT must carry every existing line id exactly once. Do not drop, duplicate, or mint new ids. -- Echo each line's existing `body` unchanged. Do not add, remove, or edit any `TextualBody` value. Echo each line's existing `target` unchanged. +- Column labels must be unique within this run. The DELETE in step 6 clears every existing column, so no pre-existing label can collide. +- Each existing line belongs to exactly one column. +- Do not POST a column with an empty `annotations` array — the server rejects it. If a detected column would end up with zero assigned lines, merge its assignments into the nearest populated column instead. +- Echo each line's existing `body` and `target` unchanged in the PUT. Changing either mints a new line URI on the server, which breaks the columns created in step 7. ## TPEN API -First, reorder the page's line list via a single PUT. The `items` array must contain every existing line — each entry carrying the existing annotation URI verbatim as `id`, its `body` reconstructed from the entry's body form in "Existing lines", and its original `target` — in the reading-order sequence from step 5. +First, delete all existing columns on the page. Expect `204 No Content` on success (including when the page had no columns): + +``` +DELETE {{pageEndpoint}}/clear-columns +Authorization: Bearer {{token}} +``` + +Then POST each new column — one request per column: + +``` +POST {{pageEndpoint}}/column +Authorization: Bearer {{token}} +Content-Type: application/json + +{ + "label": "Column A", + "annotations": ["", "", ""] +} +``` + +Each `` is the full id of an existing line listed above, used verbatim. + +Finally, if step 8 determined the reading order changed, PUT the page to rewrite its canonical line order. Each `items` entry carries the existing annotation URI verbatim as `id`, its `body` reconstructed from the entry's body form in "Existing lines", and its `target` rebuilt from the entry's `xywh` selector: ``` PUT {{pageEndpoint}} @@ -96,44 +116,19 @@ Content-Type: application/json } ``` -The example above shows an empty body. Replace `[]` with the reconstructed body for each line (from its `body=…` / `text=…` form). +## Failure -Then create one POST per detected column. Each `annotations` array is a contiguous slice of the reading-order id sequence, taken verbatim from the "Existing lines" list. +There is no fallback. If image analysis cannot be performed or any TPEN API call returns non-2xx, stop and report: -``` -POST {{pageEndpoint}}/column -Authorization: Bearer {{token}} -Content-Type: application/json - -{ - "label": "Column A", - "annotations": ["", "", ""] -} -``` - -Each `` is the full id of a line annotation listed above, used verbatim. - -## Fallback - -When the direct path is unavailable or returns non-2xx, emit two final code blocks in your report, in order: - -1. The `{ "items": [...] }` body from TPEN API — the reading-order reorder of existing lines. -2. The `[{ "label": "…", "annotations": [ "", … ] }, …]` column array — one entry per detected column, annotations drawn verbatim from "Existing lines". - -Both must be valid JSON. The user pastes each block into the TPEN splitscreen tool; the tool PUTs the first block with their authorized token, then POSTs each column from the second block in one paste. +- the failing stage (precondition, image analysis, DELETE clear-columns, POST column, or PUT page) +- HTTP status and error body when applicable +- which operations persisted before the failure (e.g., `DELETE succeeded, POST Column A succeeded, POST Column B failed`) so the resulting page state is clear ## Completion -Direct path, report: - -- operations: `PUT page`, `POST column` (×N) -- target: {{pageEndpoint}} (page) and `{{pageEndpoint}}/column` -- count: number of columns created -- per-column line counts - -Fallback path, report: +On success, report: -- path: `fallback` -- counts: columns in payload, per-column line counts -- HTTP status and error body if a request was attempted first -- final code blocks (in order): the `{ "items": [...] }` JSON, then the `[{label, annotations}, ...]` column JSON, for the user to paste +- operations: `DELETE clear-columns`, `POST column` (×N), optionally `PUT page` +- targets: `{{pageEndpoint}}/clear-columns`, `{{pageEndpoint}}/column`, `{{pageEndpoint}}` (page) +- counts: columns deleted, columns created, per-column line counts +- whether the page order was updated diff --git a/templates/detect-columns/index.js b/templates/detect-columns/index.js index 6ef3271..e26199a 100644 --- a/templates/detect-columns/index.js +++ b/templates/detect-columns/index.js @@ -1,8 +1,8 @@ /** - * @file Template: "Detect columns → POST column". - * - * Targets workflow #2 from the absorbed cubap `_tools/COMMON_TASKS.md`: - * Column Detection. + * @file Template: "Detect columns → clear-columns DELETE, then POST column per + * detected column, then PUT page to reorder lines (if needed)". Operates on + * an existing line set; no fallback — fails and reports if image analysis or + * any of the three HTTP verbs are unavailable. * * @author thehabes */ diff --git a/ui-manager.js b/ui-manager.js index 0c34ac6..a4059f9 100644 --- a/ui-manager.js +++ b/ui-manager.js @@ -247,12 +247,12 @@ export class UIManager { } /** - * Build the paste-JSON fallback panel. The submit button requires - * `projectID`, `pageID`, AND `token` — `renderWorkspace` still draws the - * body (minus generate/copy) when the token is absent and shows an auth - * button, so the panel cannot rely on a workspace-level token gate. - * `updateToken` flips the submit-disabled state when the token arrives - * after the panel was built. + * Build the paste-JSON fallback panel. Submit requires `projectID`, + * `pageID`, AND `token`. The workspace body is hidden when no token is + * held (`renderWorkspace` sets `hidden: !token`), so the disabled state + * below is belt-and-suspenders against a stale reference being clicked + * programmatically. `updateToken` still flips it when the token arrives + * after the panel was built so the pageID gate remains authoritative. * @returns {HTMLElement} */ #buildFallbackPanel() { @@ -273,8 +273,8 @@ export class UIManager { const feedback = el('span', { class: 'feedback', attrs: { 'aria-live': 'polite' } }) submit.addEventListener('click', () => this.#onFallbackSubmit(textarea, submit, feedback)) const children = [ - el('summary', { text: 'Paste JSON from LLM (fallback)' }), - el('p', { class: 'hint', text: 'Use this when your chat LLM produced the JSON payload but could not call the TPEN API itself. The tool will submit it using the token you authorized.' }) + el('summary', { text: `Couldn't Use the API? Paste JSON from LLM here` }), + el('p', { class: 'hint', text: 'Use this when your chat LLM produced the JSON payload but could not call the TPEN API itself. This tool will submit it using the token you authorized.' }) ] if (!hasPage) children.push(el('p', { class: 'hint', text: 'Needs a page context before submission is possible.' })) children.push(textarea, el('div', { class: 'controls' }, [submit, feedback])) From f85b13528ba3faebb9797a3106a37b7b88f2f068 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Thu, 23 Apr 2026 16:44:42 -0500 Subject: [PATCH 19/47] changes during review --- templates/detect-and-transcribe/PROMPT.md | 4 +--- templates/detect-columns-and-lines/PROMPT.md | 4 +--- templates/detect-columns/PROMPT.md | 10 ++++------ templates/detect-lines/PROMPT.md | 4 +--- templates/detect-order-and-transcribe/PROMPT.md | 4 +--- templates/inject-context.js | 12 ++++++++---- templates/transcribe-known-lines/PROMPT.md | 4 +--- 7 files changed, 17 insertions(+), 25 deletions(-) diff --git a/templates/detect-and-transcribe/PROMPT.md b/templates/detect-and-transcribe/PROMPT.md index 1f227c3..e97e53f 100644 --- a/templates/detect-and-transcribe/PROMPT.md +++ b/templates/detect-and-transcribe/PROMPT.md @@ -4,8 +4,6 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Context -- Project: {{projectID}} -- Page: {{pageID}} - Canvas: {{canvasId}} - Canvas Dimensions: {{canvasWidth}} × {{canvasHeight}} - Image: {{imageUrl}} @@ -13,7 +11,7 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Preconditions -All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: +All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: 1. Ability to fetch the image bytes (or a derivative) and identify line bounds and text from them. Precise pixel measurement is preferred when available; visual estimation and on-sight transcription from the fetched image are acceptable otherwise. 2. Either HTTP PUT capability with `Content-Type: application/json`, or the ability to emit the payload as a fallback JSON code block in your report. If HTTP PUT is not available, skip straight to the Fallback section — do not retry. diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index 053c456..959376c 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -4,8 +4,6 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Context -- Project: {{projectID}} -- Page: {{pageID}} - Canvas: {{canvasId}} - Canvas Dimensions: {{canvasWidth}} × {{canvasHeight}} - Image: {{imageUrl}} @@ -17,7 +15,7 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Preconditions -All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: +All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: 1. Ability to fetch the image bytes (or a derivative) and identify line and column bounds from them. Precise pixel measurement is preferred when available; visual estimation from the fetched image is acceptable otherwise. 2. Either HTTP PUT and POST capability with `Content-Type: application/json`, or the ability to emit the payloads as fallback JSON code blocks in your report. If either verb is unavailable, skip straight to the Fallback section — do not retry. diff --git a/templates/detect-columns/PROMPT.md b/templates/detect-columns/PROMPT.md index fd7f0ab..220020f 100644 --- a/templates/detect-columns/PROMPT.md +++ b/templates/detect-columns/PROMPT.md @@ -4,8 +4,6 @@ You are assisting with TPEN manuscript transcription. This task rebuilds the col ## Context -- Project: {{projectID}} -- Page: {{pageID}} - Canvas: {{canvasId}} - Canvas Dimensions: {{canvasWidth}} × {{canvasHeight}} - Image: {{imageUrl}} @@ -29,7 +27,7 @@ The body form is one of: ## Preconditions -All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions, existing-line list) are provided above. This task operates on an existing line set: `lineCount` = `{{lineCount}}`. If `lineCount` is `0`, stop immediately and return a failure report — this task cannot create lines. +All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions, existing-line list) are provided above. This task operates on an existing line set: `lineCount` = `{{lineCount}}`. If `lineCount` is `0`, stop immediately and return a failure report — this task cannot create lines. You must have: @@ -52,7 +50,7 @@ Use only tools already available in your environment. Do not install packages, l 5. Build a global reading-order sequence of all existing line ids: columns in reading order; within each column, lines sorted top-to-bottom by the `xywh` y-center. 6. DELETE every existing column on the page (see TPEN API below). On any non-2xx, stop and report. Do not POST or PUT after a DELETE failure. 7. For each detected column, POST `{ label, annotations }` where `annotations` is the contiguous slice of the reading-order id sequence from step 5 that belongs to that column. Choose a unique label per column (e.g., `Column A`, `Column B`) that does not clash with any other label chosen in this run. On any non-2xx, stop and report — columns POSTed before the failure remain persisted. -8. If the reading-order sequence from step 5 differs from the order of "Existing lines", PUT the page with `items` in the new order. Each entry re-uses the existing annotation URI verbatim as its `id`, its `body` reconstructed from the entry's body form, and its `target` rebuilt from the entry's `xywh` selector. The PUT must not change any URI, or the columns POSTed in step 7 will reference dead ids. If the sequence is already in the current order, skip the PUT. On any non-2xx, stop and report. +8. If the reading-order sequence from step 5 differs from the order of "Existing lines", PUT the page with `items` in the new order. Each entry re-uses the existing annotation URI verbatim as its `id`, its `body` reconstructed from the entry's body form, and its `target` rebuilt from the entry's `xywh` selector. The server remaps column references when URIs change, but echoing `body` and `target` verbatim avoids minting unnecessary RERUM versions. If the sequence is already in the current order, skip the PUT. On any non-2xx, stop and report. 9. Report: columns deleted, columns created, whether the page order was updated, and per-column line counts. ## Rules @@ -63,7 +61,7 @@ Use only tools already available in your environment. Do not install packages, l - Column labels must be unique within this run. The DELETE in step 6 clears every existing column, so no pre-existing label can collide. - Each existing line belongs to exactly one column. - Do not POST a column with an empty `annotations` array — the server rejects it. If a detected column would end up with zero assigned lines, merge its assignments into the nearest populated column instead. -- Echo each line's existing `body` and `target` unchanged in the PUT. Changing either mints a new line URI on the server, which breaks the columns created in step 7. +- Echo each line's existing `body` and `target` unchanged in the PUT. Changing either mints a new RERUM version of the line; the server remaps columns to the new URIs, but echoing verbatim avoids the needless version. ## TPEN API @@ -100,7 +98,7 @@ Content-Type: application/json "items": [ { "id": "", - "body": [], + "body": , "target": { "source": "{{canvasId}}", "type": "SpecificResource", diff --git a/templates/detect-lines/PROMPT.md b/templates/detect-lines/PROMPT.md index ebe08a3..973aadd 100644 --- a/templates/detect-lines/PROMPT.md +++ b/templates/detect-lines/PROMPT.md @@ -4,8 +4,6 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Context -- Project: {{projectID}} -- Page: {{pageID}} - Canvas: {{canvasId}} - Canvas Dimensions: {{canvasWidth}} × {{canvasHeight}} - Image: {{imageUrl}} @@ -13,7 +11,7 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Preconditions -All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: +All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: 1. Ability to fetch the image bytes (or a derivative) and identify line bounds from them. Precise pixel measurement is preferred when available; visual estimation from the fetched image is acceptable otherwise. 2. Either HTTP PUT capability with `Content-Type: application/json`, or the ability to emit the payload as a fallback JSON code block in your report. If HTTP PUT is not available, skip straight to the Fallback section — do not retry. diff --git a/templates/detect-order-and-transcribe/PROMPT.md b/templates/detect-order-and-transcribe/PROMPT.md index 5c27a50..70be548 100644 --- a/templates/detect-order-and-transcribe/PROMPT.md +++ b/templates/detect-order-and-transcribe/PROMPT.md @@ -4,8 +4,6 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Context -- Project: {{projectID}} -- Page: {{pageID}} - Canvas: {{canvasId}} - Canvas Dimensions: {{canvasWidth}} × {{canvasHeight}} - Image: {{imageUrl}} @@ -13,7 +11,7 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Preconditions -All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: +All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: 1. Ability to fetch the image bytes (or a derivative) and identify line bounds and text from them. Precise pixel measurement is preferred when available; visual estimation and on-sight transcription from the fetched image are acceptable otherwise. 2. Either HTTP PUT capability with `Content-Type: application/json`, or the ability to emit the payload as a fallback JSON code block in your report. If HTTP PUT is not available, skip straight to the Fallback section — do not retry. diff --git a/templates/inject-context.js b/templates/inject-context.js index c2be8e3..9a65ca3 100644 --- a/templates/inject-context.js +++ b/templates/inject-context.js @@ -41,7 +41,7 @@ function canvasDimensions(canvas) { * @returns {Record} */ export function buildTemplateContext(ctx) { - const { canvas, page, projectID, pageID, pageEndpoint, token } = ctx + const { canvas, page, pageEndpoint, token } = ctx const canvasId = getIRI(canvas) ?? '(unknown canvas id)' const imageUrl = extractImageUrl(canvas) ?? '(no image body found on canvas)' const { width, height } = canvasDimensions(canvas) @@ -49,8 +49,6 @@ export function buildTemplateContext(ctx) { const canvasHeight = height != null ? String(height) : '(unknown)' const lineCount = Array.isArray(page?.items) ? page.items.length : 0 return { - projectID: projectID ?? '', - pageID: pageID ?? '', canvasId, imageUrl, canvasWidth, @@ -94,6 +92,12 @@ function extractXywh(item) { * `[{ "type": "TextualBody", "value": , "format": "text/plain" }]`. * The common case, so it's worth the shorter display. * - `body=` — anything else; echo the JSON verbatim. + * + * Existing TPEN line bodies are expected to always carry `type`, `value`, and + * `format`. The `text=` round-trip reconstruction sets `format: "text/plain"`, + * so `only.format === 'text/plain'` is a strict match — any other shape (no + * format, different format, multiple bodies, non-`TextualBody`) drops to + * `body=` to preserve fidelity on the PUT echo. * @param {any} body an annotation `body` value. * @returns {string} */ @@ -105,7 +109,7 @@ function formatBody(body) { && typeof only === 'object' && only.type === 'TextualBody' && typeof only.value === 'string' - && (only.format === undefined || only.format === 'text/plain') + && only.format === 'text/plain' if (isPlainTextual) return `text=${JSON.stringify(only.value)}` } return `body=${JSON.stringify(body)}` diff --git a/templates/transcribe-known-lines/PROMPT.md b/templates/transcribe-known-lines/PROMPT.md index 65bf522..776f0e2 100644 --- a/templates/transcribe-known-lines/PROMPT.md +++ b/templates/transcribe-known-lines/PROMPT.md @@ -4,8 +4,6 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Context -- Project: {{projectID}} -- Page: {{pageID}} - Canvas: {{canvasId}} - Canvas Dimensions: {{canvasWidth}} × {{canvasHeight}} - Image: {{imageUrl}} @@ -19,7 +17,7 @@ Each entry is ` | xywh= | ` in canvas ## Preconditions -All required inputs (`projectID`, `pageID`, `canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions, existing-line list) are provided above. This template only revises existing lines: `lineCount` = `{{lineCount}}`. If `lineCount` is `0`, stop immediately and report. +All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions, existing-line list) are provided above. This template only revises existing lines: `lineCount` = `{{lineCount}}`. If `lineCount` is `0`, stop immediately and report. You must have: From 68b9c40820605dcfc362ab717ffa9af17f9b8610 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Thu, 23 Apr 2026 17:32:20 -0500 Subject: [PATCH 20/47] condensed communication --- templates/detect-and-transcribe/PROMPT.md | 18 +++- templates/detect-columns-and-lines/PROMPT.md | 22 +++-- templates/detect-lines/PROMPT.md | 18 +++- .../detect-order-and-transcribe/PROMPT.md | 23 +++-- templates/transcribe-known-lines/PROMPT.md | 29 ++---- tpen-service.js | 8 +- ui-manager.js | 92 +++++++++++++++++-- 7 files changed, 157 insertions(+), 53 deletions(-) diff --git a/templates/detect-and-transcribe/PROMPT.md b/templates/detect-and-transcribe/PROMPT.md index e97e53f..25f7947 100644 --- a/templates/detect-and-transcribe/PROMPT.md +++ b/templates/detect-and-transcribe/PROMPT.md @@ -29,8 +29,8 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). 4. Run handwriting text recognition on each line's crop. Apply the recognition rules below. -5. Build the `{ "items": [...] }` payload described under TPEN API — one Annotation per line with the recognized text as the `TextualBody` value and `xywh=x,y,w,h` as the fragment selector. -6. If HTTP PUT is available, send the request once. On any non-2xx response, do not retry — fall back. If HTTP PUT is unavailable from the start, go directly to the fallback. +5. If HTTP PUT is available, build the full payload under **TPEN API** — one Annotation per line with the recognized text and `xywh=x,y,w,h` selector — and send the request once. On any non-2xx response, do not retry — fall back. +6. If HTTP PUT is unavailable (or step 5 fell back), emit the condensed payload under **Fallback** as the final code block. 7. Report counts (lines saved/in payload, non-empty text, uncertain) and which path was used (direct PUT or fallback). 8. Report notable ambiguities (e.g., illegible lines transcribed as empty or flagged). @@ -83,7 +83,17 @@ Content-Type: application/json ## Fallback -When the direct PUT is impossible or returns non-2xx, emit the `{ "items": [...] }` body from TPEN API as the final code block of your report. It must be valid JSON (no comments, no placeholders — substitute the real coordinates and recognized text). The user will paste it into the TPEN splitscreen tool, which submits it with their authorized token. +When the direct PUT is impossible or returns non-2xx, emit the condensed payload below as the final code block of your report. The TPEN splitscreen tool expands each item into a full W3C Annotation before PUTting it — do not inline the canvas source, selector boilerplate, or motivation. It must be valid JSON (no comments, no placeholders — substitute the real coordinates and recognized text). + +``` +{ + "items": [ + { "text": "", "target": "xywh=x,y,w,h" } + ] +} +``` + +One item per detected line, in reading order. `target` is the bare selector value (no `#`, no `pixel:` prefix). `text` is an empty string for fully illegible lines — do not drop the item. ## Completion @@ -100,4 +110,4 @@ Fallback path, report: - counts: lines in payload, lines with non-empty text, lines flagged uncertain - HTTP status and error body if a PUT was attempted first - notable ambiguities worth a human review -- final code block: the full `{ "items": [...] }` JSON for the user to paste +- final code block: the condensed `{ "items": [...] }` JSON for the user to paste diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index 959376c..c6e70f0 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -1,6 +1,6 @@ # Task: detect columns AND lines on a TPEN3 page and save both to the page -You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services (direct) or emitted as fallback JSON payloads for the user to paste. +You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services (direct) or emitted as a fallback JSON payload for the user to paste. ## Context @@ -32,9 +32,9 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). -4. Build the `{ "items": [...] }` payload described under TPEN API. The `items` array MUST be in the global reading-order sequence from step 2 — this fixes the page's canonical line order. Leave `body` empty — no text yet. -5. If HTTP PUT and POST are available: PUT the items once, then for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The response returns `items` in submission order, so use each line's column index from step 2 to slice the returned ids. Labels must be unique and must not clash with anything in "Existing columns on this page". On any non-2xx, stop and fall back for everything not yet persisted. -6. If HTTP PUT/POST are unavailable from the start, go directly to the Fallback — emit the items payload for the user to paste. Column creation is out of scope for the fallback path. +4. Assemble the per-line list in the global reading-order sequence from step 2 — this fixes the page's canonical line order for both paths. +5. If HTTP PUT and POST are available: build the full payload under **TPEN API** and PUT the items once, then for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The response returns `items` in submission order, so use each line's column index from step 2 to slice the returned ids. Labels must be unique and must not clash with anything in "Existing columns on this page". On any non-2xx, stop and fall back for everything not yet persisted. +6. If HTTP PUT/POST are unavailable (or step 5 fell back), emit the condensed payload under **Fallback** as the final code block. Column creation is out of scope for the fallback path. 7. Report counts (lines saved/in payload, columns created/in payload) and which path was used. ## Rules @@ -92,7 +92,17 @@ Content-Type: application/json ## Fallback -When the direct path is unavailable or returns non-2xx, emit only the `{ "items": [...] }` body from TPEN API as the final code block of your report, in the global reading-order sequence from step 2. It must be valid JSON. The user pastes it into the TPEN splitscreen tool, which PUTs it with their authorized token. Column creation is out of scope for this fallback. +When the direct path is unavailable or returns non-2xx, emit the condensed payload below as the final code block of your report, in the global reading-order sequence from step 2. The TPEN splitscreen tool expands each item into a full W3C Annotation before PUTting it — do not inline the canvas source, selector boilerplate, or motivation. It must be valid JSON. Column creation is out of scope for this fallback. + +``` +{ + "items": [ + { "target": "xywh=x,y,w,h" } + ] +} +``` + +One item per detected line, in the global reading-order sequence. `target` is the bare selector value (no `#`, no `pixel:` prefix). `body` is omitted because no text is produced by this task. ## Completion @@ -108,5 +118,5 @@ Fallback path, report: - path: `fallback` - counts: lines in payload - HTTP status and error body if a request was attempted first -- final code block: the full `{ "items": [...] }` JSON for the user to paste +- final code block: the condensed `{ "items": [...] }` JSON for the user to paste - list the labels of any columns already created before the failure, so a follow-up pass can avoid duplicating them. diff --git a/templates/detect-lines/PROMPT.md b/templates/detect-lines/PROMPT.md index 973aadd..218c62e 100644 --- a/templates/detect-lines/PROMPT.md +++ b/templates/detect-lines/PROMPT.md @@ -28,8 +28,8 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). -4. Build the `{ "items": [...] }` payload described under TPEN API. Leave `body` empty — no text yet. -5. If HTTP PUT is available, send the request once. On any non-2xx response, do not retry — fall back. If HTTP PUT is unavailable from the start, go directly to the fallback. +4. If HTTP PUT is available, build the full payload under **TPEN API** and send the request once. On any non-2xx response, do not retry — fall back. +5. If HTTP PUT is unavailable (or step 4 fell back), emit the condensed payload under **Fallback** as the final code block. 6. Report count and which path was used (direct PUT or fallback). ## Rules @@ -70,7 +70,17 @@ Content-Type: application/json ## Fallback -When the direct PUT is impossible or returns non-2xx, emit the `{ "items": [...] }` body from TPEN API as the final code block of your report. It must be valid JSON (no comments, no placeholders — substitute the real coordinates). The user will paste it into the TPEN splitscreen tool, which submits it with their authorized token. +When the direct PUT is impossible or returns non-2xx, emit the condensed payload below as the final code block of your report. The TPEN splitscreen tool expands each item into a full W3C Annotation before PUTting it — do not inline the canvas source, selector boilerplate, or motivation. It must be valid JSON (no comments, no placeholders — substitute the real coordinates). + +``` +{ + "items": [ + { "target": "xywh=x,y,w,h" } + ] +} +``` + +One item per detected line, in reading order. `target` is the bare selector value (no `#`, no `pixel:` prefix). `body` is omitted because no text is produced by this task. ## Completion @@ -85,4 +95,4 @@ Fallback path, report: - path: `fallback` - count: number of line annotations in the payload - HTTP status and error body if a PUT was attempted first -- final code block: the full `{ "items": [...] }` JSON for the user to paste +- final code block: the condensed `{ "items": [...] }` JSON for the user to paste diff --git a/templates/detect-order-and-transcribe/PROMPT.md b/templates/detect-order-and-transcribe/PROMPT.md index 70be548..6e91bab 100644 --- a/templates/detect-order-and-transcribe/PROMPT.md +++ b/templates/detect-order-and-transcribe/PROMPT.md @@ -29,10 +29,11 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). 4. Run handwriting text recognition on each line's crop. Apply the recognition rules below. -5. Build the `{ "items": [...] }` payload described under TPEN API — one Annotation per line with the recognized text as the `TextualBody` value and `xywh=x,y,w,h` as the fragment selector. The `items` array MUST be in the global reading-order sequence from step 2 — this fixes the page's canonical line order. -6. If HTTP PUT is available, send the request once. On any non-2xx response, do not retry — fall back. If HTTP PUT is unavailable from the start, go directly to the fallback. -7. Report counts (lines saved/in payload, non-empty text, uncertain) and which path was used (direct PUT or fallback). -8. Report notable ambiguities (e.g., illegible lines transcribed as empty or flagged) and the detected block count so reviewers know whether a multi-block layout was recognised. +5. Assemble the per-line list in the global reading-order sequence from step 2 — this fixes the page's canonical line order for both paths. +6. If HTTP PUT is available, build the full payload under **TPEN API** from that list and send the request once. On any non-2xx response, do not retry — fall back. +7. If HTTP PUT is unavailable (or step 6 fell back), emit the condensed payload under **Fallback** as the final code block. +8. Report counts (lines saved/in payload, non-empty text, uncertain) and which path was used (direct PUT or fallback). +9. Report notable ambiguities (e.g., illegible lines transcribed as empty or flagged) and the detected block count so reviewers know whether a multi-block layout was recognised. ## Rules @@ -83,7 +84,17 @@ Content-Type: application/json ## Fallback -When the direct PUT is impossible or returns non-2xx, emit the `{ "items": [...] }` body from TPEN API as the final code block of your report, in the reading-order sequence from step 2. It must be valid JSON (no comments, no placeholders — substitute the real coordinates and recognized text). The user will paste it into the TPEN splitscreen tool, which submits it with their authorized token. +When the direct PUT is impossible or returns non-2xx, emit the condensed payload below as the final code block of your report, in the reading-order sequence from step 2. The TPEN splitscreen tool expands each item into a full W3C Annotation before PUTting it — do not inline the canvas source, selector boilerplate, or motivation. It must be valid JSON (no comments, no placeholders — substitute the real coordinates and recognized text). + +``` +{ + "items": [ + { "text": "", "target": "xywh=x,y,w,h" } + ] +} +``` + +One item per detected line. `target` is the bare selector value (no `#`, no `pixel:` prefix). `text` is an empty string for fully illegible lines — do not drop the item. Item order is the page's canonical reading order; do not interleave lines from different blocks. ## Completion @@ -100,4 +111,4 @@ Fallback path, report: - counts: lines in payload, lines with non-empty text, lines flagged uncertain, text blocks detected - HTTP status and error body if a PUT was attempted first - notable ambiguities worth a human review -- final code block: the full `{ "items": [...] }` JSON for the user to paste +- final code block: the condensed `{ "items": [...] }` JSON for the user to paste diff --git a/templates/transcribe-known-lines/PROMPT.md b/templates/transcribe-known-lines/PROMPT.md index 776f0e2..e688353 100644 --- a/templates/transcribe-known-lines/PROMPT.md +++ b/templates/transcribe-known-lines/PROMPT.md @@ -11,7 +11,7 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Existing lines -Each entry is ` | xywh= | ` in canvas coordinates. The body form is `body=[]` (empty), `text=""` (single plain-text `TextualBody`), or `body=` (anything else) — use it as context for what's already on the line. The fallback payload re-uses the full annotation URI verbatim as the `id` of each item; the server preserves these ids and updates only the body text. +Each entry is ` | xywh= | ` in canvas coordinates. The body form is `body=[]` (empty), `text=""` (single plain-text `TextualBody`), or `body=` (anything else) — use it as context for what's already on the line. The fallback payload re-uses the full annotation URI verbatim as the `id` of each item; the splitscreen tool preserves the existing target server-side and updates only the body text. {{existingLines}} @@ -44,7 +44,7 @@ Use only tools already available in your environment. Do not install packages, l - Use explicit uncertainty markers for unclear glyphs (for example `[a?]`). Do not force certainty. - Do not invent expansions. If an abbreviation mark is present, transcribe the mark; do not silently expand. - Keep line segmentation stable — one transcription string per existing line annotation. -- If a line's crop is illegible, send an empty body (direct) or emit an empty `TextualBody` value (fallback) and report the line id as unresolved — do not fabricate text. In the fallback payload, do not drop the item. +- If a line's crop is illegible, send an empty body (direct) or emit `"text": ""` (fallback) and report the line id as unresolved — do not fabricate text. In the fallback payload, do not drop the item. ## TPEN API @@ -60,34 +60,17 @@ Content-Type: text/plain ## Fallback -The fallback tool only accepts JSON, so it uses a single page-level PUT instead of per-line PATCH. When PATCH is unavailable or every attempt returned non-2xx, emit the payload below as the final code block of your report. There must be exactly one item per entry in "Existing lines", each re-using that entry's annotation URI verbatim as its `id`, preserving its `target` (the `xywh` selector shown above) unchanged, and carrying the recognized text as the `TextualBody` value (empty string for fully illegible lines). Item order must match the order of "Existing lines" — do not reorder. It must be valid JSON (no comments, no placeholders — substitute the real URIs, xywh selectors, and recognized text). +The fallback tool only accepts JSON, so it uses a single page-level PUT instead of per-line PATCH. When PATCH is unavailable or every attempt returned non-2xx, emit the condensed payload below as the final code block of your report. The TPEN splitscreen tool expands each item into a full W3C Annotation — preserving each line's existing target on the server — before PUTting it. ``` -PUT {{pageEndpoint}} -Authorization: Bearer -Content-Type: application/json - { "items": [ - { - "id": "", - "body": [{ "type": "TextualBody", "value": "", "format": "text/plain" }], - "target": { - "source": "{{canvasId}}", - "type": "SpecificResource", - "selector": { - "type": "FragmentSelector", - "conformsTo": "http://www.w3.org/TR/media-frags/", - "value": "xywh=x,y,w,h" - } - }, - "motivation": "transcribing" - } + { "id": "", "text": "" } ] } ``` -The user will paste this into the TPEN splitscreen tool, which submits it with their authorized token. +There must be exactly one item per entry in "Existing lines", each re-using that entry's annotation URI verbatim as its `id`. Item order must match the order of "Existing lines" — do not reorder. `text` is an empty string for fully illegible lines — do not drop the item. It must be valid JSON (no comments, no placeholders). ## Completion @@ -102,4 +85,4 @@ Fallback path, report: - path: `fallback` - counts: lines in payload, lines flagged illegible - HTTP status and error body if a PATCH was attempted first -- final code block: the full `{ "items": [...] }` JSON for the user to paste +- final code block: the condensed `{ "items": [...] }` JSON for the user to paste diff --git a/tpen-service.js b/tpen-service.js index 79dceae..00a56dd 100644 --- a/tpen-service.js +++ b/tpen-service.js @@ -17,7 +17,7 @@ import { CONFIG } from './config.js' * @param {string} token JWT. * @returns {Promise} parsed JSON body (or `null` when the response has no body). */ -async function authedJson(path, method, body, token) { +async function serviceRequest(path, method, body, token) { if (!token) throw new Error(`Missing auth token for ${path}`) const init = { method, @@ -50,7 +50,7 @@ async function authedJson(path, method, body, token) { * @returns {Promise} parsed JSON body. */ function authedGet(path, token) { - return authedJson(path, 'GET', undefined, token) + return serviceRequest(path, 'GET', undefined, token) } /** @@ -92,7 +92,7 @@ export function fetchPageResolved(projectID, pageID, token) { * @returns {Promise} */ export function putPage(projectID, pageID, body, token) { - return authedJson( + return serviceRequest( `/project/${encodeURIComponent(projectID)}/page/${encodeURIComponent(pageID)}`, 'PUT', body, token ) @@ -109,7 +109,7 @@ export function putPage(projectID, pageID, body, token) { * @returns {Promise} */ export function postColumn(projectID, pageID, body, token) { - return authedJson( + return serviceRequest( `/project/${encodeURIComponent(projectID)}/page/${encodeURIComponent(pageID)}/column`, 'POST', body, token ) diff --git a/ui-manager.js b/ui-manager.js index a4059f9..a645c8f 100644 --- a/ui-manager.js +++ b/ui-manager.js @@ -10,7 +10,7 @@ import { listTemplates, renderTemplate } from './prompt-generator.js' import { pageEndpoint, putPage, postColumn } from './tpen-service.js' -import { trailingId } from './iiif-ids.js' +import { getIRI, trailingId } from './iiif-ids.js' /** * Build a DOM element. Recognizes a few special prop keys: @@ -50,6 +50,60 @@ const OPTIONAL_ID_FIELDS = [ { name: 'lineID', label: 'Line ID (optional)' } ] +/** + * Build a W3C `SpecificResource` target from a canvas IRI and an `xywh=…` + * selector value. + * @param {string} canvasId + * @param {string} xywh the bare selector value (e.g. `xywh=10,20,300,40`). + */ +function buildSpecificResourceTarget(canvasId, xywh) { + return { + source: canvasId, + type: 'SpecificResource', + selector: { + type: 'FragmentSelector', + conformsTo: 'http://www.w3.org/TR/media-frags/', + value: xywh + } + } +} + +/** + * Expand a condensed fallback item into a full W3C Annotation. Condensed items + * carry only the per-line differences — `target` as a bare `"xywh=x,y,w,h"` + * selector value, `text` as a plain string, optionally `id` for known-line + * updates. The fixed boilerplate (canvas source, selector type, motivation, + * body wrapper) is reapplied here so the prompt output stays small. + * + * For known-line updates the condensed shape carries only `id` and `text`; the + * existing `target` is looked up from `existingItemsById` and echoed verbatim + * so the services API does not wipe it on PUT. + * + * A no-op when `target` is already an object and `body` is already present — + * lets legacy full-shape pastes submit unchanged. + * @param {any} item raw parsed item from the fallback textarea. + * @param {string|null} canvasId the canvas IRI used as the annotation's target source. + * @param {Map} existingItemsById lookup from annotation id → resolved page item. + * @returns {object} a W3C Annotation ready for PUT. + */ +function expandFallbackItem(item, canvasId, existingItemsById) { + const out = { ...item } + if (typeof item.target === 'string') { + out.target = buildSpecificResourceTarget(canvasId, item.target) + } else if (out.target === undefined && typeof item.id === 'string') { + const existing = existingItemsById.get(item.id) + if (existing?.target !== undefined) out.target = existing.target + } + if (typeof item.text === 'string') { + out.body = item.text === '' + ? [] + : [{ type: 'TextualBody', value: item.text, format: 'text/plain' }] + delete out.text + } + if (!('motivation' in out)) out.motivation = 'transcribing' + return out +} + /** * Renders the three UI states (status, id form, workspace) into a single * root node and owns state while a workspace is displayed. The workspace @@ -335,12 +389,38 @@ export class UIManager { setFeedback('Each item in `items` must be an annotation object.') return } + if ('target' in item && typeof item.target !== 'string' && typeof item.target !== 'object') { + setFeedback('Each item `target` must be an `xywh=…` string or a full target object.') + return + } + if ('text' in item && typeof item.text !== 'string') { + setFeedback('Each item `text` must be a string.') + return + } + } + const canvasId = getIRI(this.state.canvas) + if (!canvasId && payload.items.some(i => typeof i.target === 'string')) { + setFeedback('Canvas context missing — reload the workspace and retry.') + return + } + // Index the resolved page's items by id so the expander can + // echo each existing line's `target` when the condensed item + // only carries `id` + `text` (known-line updates). Without the + // echo the services API would reset the line's target. + const existingItemsById = new Map() + for (const existing of this.state.page?.items ?? []) { + const eid = getIRI(existing) + if (eid) existingItemsById.set(eid, existing) } - // Narrow to the minimal PUT body the services API needs. - // Top-level keys beyond `items` would otherwise be applied to - // the page record by the server's property-copy loop. - const result = await putPage(projectID, pageID, { items: payload.items }, token) - const saved = payload.items.length + // Expand condensed items (string target, optional text) into + // full W3C Annotations. Legacy full-shape items pass through + // unchanged. Narrow to the minimal PUT body the services API + // needs — top-level keys beyond `items` would otherwise be + // applied to the page record by the server's property-copy + // loop. + const items = payload.items.map(i => expandFallbackItem(i, canvasId, existingItemsById)) + const result = await putPage(projectID, pageID, { items }, token) + const saved = items.length if (result && typeof result === 'object') { writeTextarea(JSON.stringify(result, null, 2)) setFeedback(`Saved ${saved} line item${saved === 1 ? '' : 's'}. Server response (with ids) is in the textarea.`, true) From 26127d715919be363cbae7cea02ffff57ef0dcc3 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Thu, 23 Apr 2026 18:02:00 -0500 Subject: [PATCH 21/47] condensed communication --- templates/detect-columns/PROMPT.md | 2 +- templates/inject-context.js | 10 +- tpen-service.js | 17 ---- ui-manager.js | 150 +++++++++++++++-------------- 4 files changed, 85 insertions(+), 94 deletions(-) diff --git a/templates/detect-columns/PROMPT.md b/templates/detect-columns/PROMPT.md index 220020f..3a39d88 100644 --- a/templates/detect-columns/PROMPT.md +++ b/templates/detect-columns/PROMPT.md @@ -50,7 +50,7 @@ Use only tools already available in your environment. Do not install packages, l 5. Build a global reading-order sequence of all existing line ids: columns in reading order; within each column, lines sorted top-to-bottom by the `xywh` y-center. 6. DELETE every existing column on the page (see TPEN API below). On any non-2xx, stop and report. Do not POST or PUT after a DELETE failure. 7. For each detected column, POST `{ label, annotations }` where `annotations` is the contiguous slice of the reading-order id sequence from step 5 that belongs to that column. Choose a unique label per column (e.g., `Column A`, `Column B`) that does not clash with any other label chosen in this run. On any non-2xx, stop and report — columns POSTed before the failure remain persisted. -8. If the reading-order sequence from step 5 differs from the order of "Existing lines", PUT the page with `items` in the new order. Each entry re-uses the existing annotation URI verbatim as its `id`, its `body` reconstructed from the entry's body form, and its `target` rebuilt from the entry's `xywh` selector. The server remaps column references when URIs change, but echoing `body` and `target` verbatim avoids minting unnecessary RERUM versions. If the sequence is already in the current order, skip the PUT. On any non-2xx, stop and report. +8. Compare your step-5 reading-order sequence to "Existing lines" position-by-position — they differ if any index holds a different annotation URI. If they match exactly, skip the PUT. Otherwise, PUT the page with `items` in the step-5 order. Each entry re-uses the existing annotation URI verbatim as its `id`, its `body` reconstructed from the entry's body form, and its `target` rebuilt from the entry's `xywh` selector. The server remaps column references when URIs change, but echoing `body` and `target` verbatim avoids minting unnecessary RERUM versions. On any non-2xx, stop and report. 9. Report: columns deleted, columns created, whether the page order was updated, and per-column line counts. ## Rules diff --git a/templates/inject-context.js b/templates/inject-context.js index 9a65ca3..33573dc 100644 --- a/templates/inject-context.js +++ b/templates/inject-context.js @@ -105,8 +105,14 @@ function formatBody(body) { if (!Array.isArray(body) || body.length === 0) return 'body=[]' if (body.length === 1) { const only = body[0] - const isPlainTextual = only - && typeof only === 'object' + // Require EXACTLY {type, value, format} with the expected values so the + // `text=` → `[{type, value, format}]` round-trip is lossless. Any extra + // field (e.g. `language`, `creator`, `id`) would be silently dropped on + // the PUT echo and trigger a needless RERUM re-version. + const keys = only && typeof only === 'object' ? Object.keys(only) : [] + const isPlainTextual = + keys.length === 3 + && keys.every(k => k === 'type' || k === 'value' || k === 'format') && only.type === 'TextualBody' && typeof only.value === 'string' && only.format === 'text/plain' diff --git a/tpen-service.js b/tpen-service.js index 00a56dd..c1b2f6b 100644 --- a/tpen-service.js +++ b/tpen-service.js @@ -98,23 +98,6 @@ export function putPage(projectID, pageID, body, token) { ) } -/** - * POST a single column to a page. Body is `{ label, annotations }` where each - * `annotations[i]` must match an existing `page.items[*].id`; the server - * rejects duplicate labels within the page. - * @param {string} projectID - * @param {string} pageID - * @param {{ label: string, annotations: Array }} body - * @param {string} token - * @returns {Promise} - */ -export function postColumn(projectID, pageID, body, token) { - return serviceRequest( - `/project/${encodeURIComponent(projectID)}/page/${encodeURIComponent(pageID)}/column`, - 'POST', body, token - ) -} - /** * Build the page endpoint URL (page/index.js). Templates use this for PUT/PATCH * operations that target the page or its sub-resources (lines, columns). diff --git a/ui-manager.js b/ui-manager.js index a645c8f..56fb07d 100644 --- a/ui-manager.js +++ b/ui-manager.js @@ -9,7 +9,7 @@ */ import { listTemplates, renderTemplate } from './prompt-generator.js' -import { pageEndpoint, putPage, postColumn } from './tpen-service.js' +import { pageEndpoint, putPage } from './tpen-service.js' import { getIRI, trailingId } from './iiif-ids.js' /** @@ -104,6 +104,38 @@ function expandFallbackItem(item, canvasId, existingItemsById) { return out } +/** `{ items: [...] }` page-PUT shape — the only shape any prompt fallback emits. */ +function isItemsPayload(p) { + return p && typeof p === 'object' && !Array.isArray(p) && Array.isArray(p.items) +} + +/** + * Validate a pre-expansion `items` array, returning a user-facing error string + * or `null`. Catches the shape-erasure trap: a known-line update (string `id`) + * carrying neither `text` nor `body` would pass the expander and then be PUT + * with `body` absent, causing the services API to overwrite the existing body + * with `[]` on save (Line.js#saveLineToRerum: `body: this.body ?? []`). + * @param {Array} items + * @returns {string|null} + */ +function validateItems(items) { + for (const item of items) { + if (!item || typeof item !== 'object' || Array.isArray(item)) { + return 'Each item in `items` must be an annotation object.' + } + if ('target' in item && typeof item.target !== 'string' && typeof item.target !== 'object') { + return 'Each item `target` must be an `xywh=…` string or a full target object.' + } + if ('text' in item && typeof item.text !== 'string') { + return 'Each item `text` must be a string.' + } + if (typeof item.id === 'string' && !('text' in item) && !('body' in item)) { + return `Item for ${item.id} is missing both \`text\` and \`body\` — would erase the existing transcription.` + } + } + return null +} + /** * Renders the three UI states (status, id form, workspace) into a single * root node and owns state while a workspace is displayed. The workspace @@ -315,7 +347,7 @@ export class UIManager { const ready = hasPage && Boolean(token) const textarea = el('textarea', { rows: 10, spellcheck: false, autocomplete: 'off', - placeholder: '{ "items": [ … ] }\nor\n{ "label": "Column A", "annotations": ["…"] }\nor\n[ { "label": "…", "annotations": ["…"] }, … ]', + placeholder: '{ "items": [ { "target": "xywh=10,20,400,30" } ] }', attrs: { 'aria-label': 'JSON payload to submit to TPEN' } }) const submit = el('button', { @@ -336,11 +368,8 @@ export class UIManager { } /** - * Parse the pasted JSON, classify its shape, and dispatch the matching - * TPEN write. Shapes accepted: - * - `{ items: [...] }` → `PUT page` - * - `{ label, annotations: [...] }` → single `POST column` - * - `[ { label, annotations }, ... ]` → iterate `POST column`, stop at first failure + * Parse the pasted JSON and submit it as a page PUT. Only one shape is + * accepted: `{ items: [...] }` — the shape every prompt fallback emits. * @param {HTMLTextAreaElement} textarea * @param {HTMLButtonElement} button * @param {HTMLElement} feedback @@ -382,76 +411,13 @@ export class UIManager { button.disabled = true setFeedback('Submitting…') + const opts = { projectID, pageID, token, setFeedback, writeTextarea } try { - if (payload && typeof payload === 'object' && !Array.isArray(payload) && Array.isArray(payload.items)) { - for (const item of payload.items) { - if (!item || typeof item !== 'object' || Array.isArray(item)) { - setFeedback('Each item in `items` must be an annotation object.') - return - } - if ('target' in item && typeof item.target !== 'string' && typeof item.target !== 'object') { - setFeedback('Each item `target` must be an `xywh=…` string or a full target object.') - return - } - if ('text' in item && typeof item.text !== 'string') { - setFeedback('Each item `text` must be a string.') - return - } - } - const canvasId = getIRI(this.state.canvas) - if (!canvasId && payload.items.some(i => typeof i.target === 'string')) { - setFeedback('Canvas context missing — reload the workspace and retry.') - return - } - // Index the resolved page's items by id so the expander can - // echo each existing line's `target` when the condensed item - // only carries `id` + `text` (known-line updates). Without the - // echo the services API would reset the line's target. - const existingItemsById = new Map() - for (const existing of this.state.page?.items ?? []) { - const eid = getIRI(existing) - if (eid) existingItemsById.set(eid, existing) - } - // Expand condensed items (string target, optional text) into - // full W3C Annotations. Legacy full-shape items pass through - // unchanged. Narrow to the minimal PUT body the services API - // needs — top-level keys beyond `items` would otherwise be - // applied to the page record by the server's property-copy - // loop. - const items = payload.items.map(i => expandFallbackItem(i, canvasId, existingItemsById)) - const result = await putPage(projectID, pageID, { items }, token) - const saved = items.length - if (result && typeof result === 'object') { - writeTextarea(JSON.stringify(result, null, 2)) - setFeedback(`Saved ${saved} line item${saved === 1 ? '' : 's'}. Server response (with ids) is in the textarea.`, true) - } else { - setFeedback(`Saved ${saved} line item${saved === 1 ? '' : 's'}. Server returned no body; pasted payload left in the textarea.`, true) - } + if (isItemsPayload(payload)) { + await this.#submitItems(payload.items, opts) return } - if (payload && typeof payload === 'object' && !Array.isArray(payload) - && typeof payload.label === 'string' && Array.isArray(payload.annotations)) { - await postColumn(projectID, pageID, payload, token) - setFeedback(`Created column "${payload.label}".`, true) - writeTextarea('') - return - } - if (Array.isArray(payload) && payload.every(c => - c && typeof c === 'object' && typeof c.label === 'string' && Array.isArray(c.annotations))) { - for (let i = 0; i < payload.length; i++) { - const col = payload[i] - try { await postColumn(projectID, pageID, col, token) } - catch (err) { - writeTextarea(JSON.stringify(payload.slice(i), null, 2)) - setFeedback(`Created ${i} of ${payload.length} columns; failed on "${col.label}" — ${err.message}. Remaining columns kept in the textarea for retry.`) - return - } - } - setFeedback(`Created ${payload.length} column${payload.length === 1 ? '' : 's'}.`, true) - writeTextarea('') - return - } - setFeedback('Unrecognized payload shape — expected `{items: [...]}`, `{label, annotations}`, or an array of `{label, annotations}`.') + setFeedback('Unrecognized payload shape — expected `{ "items": [...] }`.') } catch (err) { const status = err?.status ? `TPEN API ${err.status}: ` : '' setFeedback(`${status}${err?.message ?? 'Submission failed.'}`) @@ -462,6 +428,42 @@ export class UIManager { } } + /** + * Validate, expand, and PUT an `items` payload. Narrows the PUT body to + * just `{ items }` — top-level keys beyond `items` would otherwise be + * applied to the page record by the server's property-copy loop. + * @param {Array} items + * @param {{projectID:string,pageID:string,token:string,setFeedback:Function,writeTextarea:Function}} opts + */ + async #submitItems(items, { projectID, pageID, token, setFeedback, writeTextarea }) { + const validationError = validateItems(items) + if (validationError) { setFeedback(validationError); return } + const canvasId = getIRI(this.state.canvas) + if (!canvasId && items.some(i => typeof i.target === 'string')) { + setFeedback('Canvas context missing — reload the workspace and retry.') + return + } + // Index the resolved page's items by id so the expander can echo each + // existing line's `target` when the condensed item only carries `id` + + // `text` (known-line updates). Without the echo the services API would + // reset the line's target. + const existingItemsById = new Map() + for (const existing of this.state.page?.items ?? []) { + const eid = getIRI(existing) + if (eid) existingItemsById.set(eid, existing) + } + const expanded = items.map(i => expandFallbackItem(i, canvasId, existingItemsById)) + const result = await putPage(projectID, pageID, { items: expanded }, token) + const saved = expanded.length + const noun = `line item${saved === 1 ? '' : 's'}` + if (result && typeof result === 'object') { + writeTextarea(JSON.stringify(result, null, 2)) + setFeedback(`Saved ${saved} ${noun}. Server response (with ids) is in the textarea.`, true) + } else { + setFeedback(`Saved ${saved} ${noun}. Server returned no body; pasted payload left in the textarea.`, true) + } + } + /** * Update the stored token and remove the in-workspace consent button if * it's on screen. Called from `PromptsApp.acceptAuth` when the parent From 4977c7fa315eb748691ce80557ff2519524e2581 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Thu, 23 Apr 2026 18:16:27 -0500 Subject: [PATCH 22/47] condensed communication --- templates/detect-and-transcribe/PROMPT.md | 1 + templates/detect-columns-and-lines/PROMPT.md | 3 ++- templates/detect-lines/PROMPT.md | 1 + templates/detect-order-and-transcribe/PROMPT.md | 1 + templates/transcribe-known-lines/PROMPT.md | 2 +- ui-manager.js | 17 +++++++++++++---- 6 files changed, 19 insertions(+), 6 deletions(-) diff --git a/templates/detect-and-transcribe/PROMPT.md b/templates/detect-and-transcribe/PROMPT.md index 25f7947..b098faa 100644 --- a/templates/detect-and-transcribe/PROMPT.md +++ b/templates/detect-and-transcribe/PROMPT.md @@ -43,6 +43,7 @@ Use only tools already available in your environment. Do not install packages, l - Prefer tight bounds when you can measure them; best-effort bounds are acceptable. When uncertain whether a tall run is one line or several, prefer splitting over merging. - Do not include decorative borders, frame rules, ornaments, or illustrations as part of a line. - Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected downstream. +- Zero lines detected is an unprocessable outcome. Stop and report — do not PUT, do not emit a fallback payload. An empty `items` array would erase every existing annotation on the page. ### Recognition (HANDWRITING_TEXT_RECOGNITION) diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index c6e70f0..1ee67fe 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -18,7 +18,7 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: 1. Ability to fetch the image bytes (or a derivative) and identify line and column bounds from them. Precise pixel measurement is preferred when available; visual estimation from the fetched image is acceptable otherwise. -2. Either HTTP PUT and POST capability with `Content-Type: application/json`, or the ability to emit the payloads as fallback JSON code blocks in your report. If either verb is unavailable, skip straight to the Fallback section — do not retry. +2. Either HTTP PUT and POST capability with `Content-Type: application/json`, or the ability to emit the lines-only payload as a fallback JSON code block in your report. Column creation has no fallback — if POST is unavailable, column grouping is dropped. If PUT is unavailable, skip straight to the Fallback section — do not retry. Use only tools already available in your environment. Do not install packages, libraries, or system utilities. @@ -48,6 +48,7 @@ Use only tools already available in your environment. Do not install packages, l - Do not include decorative borders, frame rules, ornaments, or illustrations as part of a line. - Do not POST a column with an empty `annotations` array — the server rejects it. Skip any detected column that ends up with zero assigned lines. - Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected downstream. +- Zero lines detected is an unprocessable outcome. Stop and report — do not PUT, do not POST a column, do not emit a fallback payload. An empty `items` array would erase every existing annotation on the page. ## TPEN API diff --git a/templates/detect-lines/PROMPT.md b/templates/detect-lines/PROMPT.md index 218c62e..f21e559 100644 --- a/templates/detect-lines/PROMPT.md +++ b/templates/detect-lines/PROMPT.md @@ -39,6 +39,7 @@ Use only tools already available in your environment. Do not install packages, l - Prefer tight bounds when you can measure them; best-effort bounds are acceptable. When uncertain whether a tall run is one line or several, prefer splitting over merging. - Do not include decorative borders, frame rules, ornaments, or illustrations as part of a line. - Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected downstream. +- Zero lines detected is an unprocessable outcome. Stop and report — do not PUT, do not emit a fallback payload. An empty `items` array would erase every existing annotation on the page. ## TPEN API diff --git a/templates/detect-order-and-transcribe/PROMPT.md b/templates/detect-order-and-transcribe/PROMPT.md index 6e91bab..1b97f92 100644 --- a/templates/detect-order-and-transcribe/PROMPT.md +++ b/templates/detect-order-and-transcribe/PROMPT.md @@ -44,6 +44,7 @@ Use only tools already available in your environment. Do not install packages, l - Prefer tight bounds when you can measure them; best-effort bounds are acceptable. When uncertain whether a tall run is one line or several, prefer splitting over merging. - Do not include decorative borders, frame rules, ornaments, or illustrations as part of a line. - Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected downstream. +- Zero lines detected is an unprocessable outcome. Stop and report — do not PUT, do not emit a fallback payload. An empty `items` array would erase every existing annotation on the page. ### Recognition (HANDWRITING_TEXT_RECOGNITION) diff --git a/templates/transcribe-known-lines/PROMPT.md b/templates/transcribe-known-lines/PROMPT.md index e688353..b363987 100644 --- a/templates/transcribe-known-lines/PROMPT.md +++ b/templates/transcribe-known-lines/PROMPT.md @@ -60,7 +60,7 @@ Content-Type: text/plain ## Fallback -The fallback tool only accepts JSON, so it uses a single page-level PUT instead of per-line PATCH. When PATCH is unavailable or every attempt returned non-2xx, emit the condensed payload below as the final code block of your report. The TPEN splitscreen tool expands each item into a full W3C Annotation — preserving each line's existing target on the server — before PUTting it. +The fallback tool only accepts JSON, so it uses a single page-level PUT instead of per-line PATCH. When PATCH is unavailable or every attempt returned non-2xx, emit the condensed payload below as the final code block of your report. The TPEN splitscreen tool re-uses each line's existing target from the hydrated page context before PUTting it — the item's `id` must match an entry in "Existing lines" above. ``` { diff --git a/ui-manager.js b/ui-manager.js index 56fb07d..14d7afe 100644 --- a/ui-manager.js +++ b/ui-manager.js @@ -111,14 +111,23 @@ function isItemsPayload(p) { /** * Validate a pre-expansion `items` array, returning a user-facing error string - * or `null`. Catches the shape-erasure trap: a known-line update (string `id`) - * carrying neither `text` nor `body` would pass the expander and then be PUT - * with `body` absent, causing the services API to overwrite the existing body - * with `[]` on save (Line.js#saveLineToRerum: `body: this.body ?? []`). + * or `null`. Catches two erasure traps: + * + * 1. An empty array — the services PUT handler's top-level copy loop writes + * `page.items = []` even when `itemsProvided` is false, erasing every line + * reference on the page and leaving columns pointing at stale ids. Prompts + * should stop and report "no lines" rather than emit an empty payload. + * 2. A known-line update (string `id`) carrying neither `text` nor `body` + * would pass the expander and then be PUT with `body` absent, causing the + * services API to overwrite the existing body with `[]` on save + * (Line.js#saveLineToRerum: `body: this.body ?? []`). * @param {Array} items * @returns {string|null} */ function validateItems(items) { + if (items.length === 0) { + return '`items` is empty — submitting would erase every line on the page. Regenerate the prompt response with at least one detected line or stop.' + } for (const item of items) { if (!item || typeof item !== 'object' || Array.isArray(item)) { return 'Each item in `items` must be an annotation object.' From e5da7a802ec82c7a57b6ff69c2e7fcbd9b61f0cf Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Thu, 23 Apr 2026 18:41:48 -0500 Subject: [PATCH 23/47] changes from review --- templates/detect-columns-and-lines/PROMPT.md | 4 +- templates/detect-columns-and-lines/index.js | 2 +- templates/detect-columns/index.js | 2 +- templates/inject-context.js | 14 ++-- tpen-service.js | 2 +- ui-manager.js | 72 +++++++++++++------- 6 files changed, 60 insertions(+), 36 deletions(-) diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index 1ee67fe..7f1b5d2 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -33,8 +33,8 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). 4. Assemble the per-line list in the global reading-order sequence from step 2 — this fixes the page's canonical line order for both paths. -5. If HTTP PUT and POST are available: build the full payload under **TPEN API** and PUT the items once, then for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The response returns `items` in submission order, so use each line's column index from step 2 to slice the returned ids. Labels must be unique and must not clash with anything in "Existing columns on this page". On any non-2xx, stop and fall back for everything not yet persisted. -6. If HTTP PUT/POST are unavailable (or step 5 fell back), emit the condensed payload under **Fallback** as the final code block. Column creation is out of scope for the fallback path. +5. If HTTP PUT and POST are available, build the full payload under **TPEN API** and PUT the items once. If the PUT returns non-2xx, stop and fall back — lines are not persisted yet. If the PUT succeeds, for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The response returns `items` in submission order, so use each line's column index from step 2 to slice the returned ids. Labels must be unique and must not clash with anything in "Existing columns on this page". If a column POST returns non-2xx, stop and report the partial state — do not emit a fallback payload; lines are already saved. +6. If HTTP PUT is unavailable (or the PUT in step 5 failed), emit the condensed payload under **Fallback** as the final code block. Column creation is out of scope for the fallback path. 7. Report counts (lines saved/in payload, columns created/in payload) and which path was used. ## Rules diff --git a/templates/detect-columns-and-lines/index.js b/templates/detect-columns-and-lines/index.js index b722206..bf2c1c5 100644 --- a/templates/detect-columns-and-lines/index.js +++ b/templates/detect-columns-and-lines/index.js @@ -16,6 +16,6 @@ export const detectColumnsAndLinesTemplate = { templateUrl: new URL('./PROMPT.md', import.meta.url), buildContext: (ctx) => ({ ...buildTemplateContext(ctx), - existingColumns: formatExistingColumns(ctx.project, ctx.pageID, ctx.page) + existingColumns: formatExistingColumns(ctx.project, ctx.page) }) } diff --git a/templates/detect-columns/index.js b/templates/detect-columns/index.js index e26199a..5a14e46 100644 --- a/templates/detect-columns/index.js +++ b/templates/detect-columns/index.js @@ -16,7 +16,7 @@ export const detectColumnsTemplate = { templateUrl: new URL('./PROMPT.md', import.meta.url), buildContext: (ctx) => ({ ...buildTemplateContext(ctx), - existingColumns: formatExistingColumns(ctx.project, ctx.pageID, ctx.page), + existingColumns: formatExistingColumns(ctx.project, ctx.page), existingLines: formatExistingLines(ctx.page) }) } diff --git a/templates/inject-context.js b/templates/inject-context.js index 33573dc..0ff7f78 100644 --- a/templates/inject-context.js +++ b/templates/inject-context.js @@ -150,19 +150,19 @@ export function formatExistingLines(fetchedPage) { /** * Render the current column state for a given page as a markdown bullet list. * Used by templates that must avoid duplicate column labels. The directly - * fetched `page` is authoritative when supplied, since the project graph may - * not hydrate `layer.pages[].columns` for every page. + * fetched `page` is authoritative, since the project graph may not hydrate + * `layer.pages[].columns` for every page; the project tree is consulted as a + * fallback. * @param {any} project the TPEN project object. - * @param {string|null|undefined} pageID the short page id or full page IRI. - * @param {any} [fetchedPage] the page object returned by `fetchPageResolved`, preferred when available. + * @param {any} page the page object returned by `fetchPageResolved`. * @returns {string} */ -export function formatExistingColumns(project, pageID, fetchedPage = null) { - const tail = trailingId(pageID) +export function formatExistingColumns(project, page) { + const tail = trailingId(page) const projectPage = (project?.layers ?? []) .flatMap(l => l.pages ?? []) .find(pg => trailingId(pg) === tail) - const cols = fetchedPage?.columns ?? projectPage?.columns ?? [] + const cols = page?.columns ?? projectPage?.columns ?? [] if (!Array.isArray(cols) || cols.length === 0) { return '- (No existing columns on this page — labels must be unique when created.)' } diff --git a/tpen-service.js b/tpen-service.js index c1b2f6b..46cfb56 100644 --- a/tpen-service.js +++ b/tpen-service.js @@ -34,7 +34,7 @@ async function serviceRequest(path, method, body, token) { // utilities/shared.js#respondWithError and utilities/routeErrorHandler.js). const detail = await res.json().catch(() => ({})) const msg = detail.message ?? detail.error ?? res.statusText - const err = new Error(`${res.status} ${path}: ${msg}`) + const err = new Error(`${path}: ${msg}`) err.status = res.status throw err } diff --git a/ui-manager.js b/ui-manager.js index 14d7afe..d8d6473 100644 --- a/ui-manager.js +++ b/ui-manager.js @@ -69,18 +69,49 @@ function buildSpecificResourceTarget(canvasId, xywh) { } /** - * Expand a condensed fallback item into a full W3C Annotation. Condensed items - * carry only the per-line differences — `target` as a bare `"xywh=x,y,w,h"` - * selector value, `text` as a plain string, optionally `id` for known-line - * updates. The fixed boilerplate (canvas source, selector type, motivation, - * body wrapper) is reapplied here so the prompt output stays small. + * Pull the bare `xywh=…` selector value out of whatever target shape the + * fallback item carries. * - * For known-line updates the condensed shape carries only `id` and `text`; the - * existing `target` is looked up from `existingItemsById` and echoed verbatim - * so the services API does not wipe it on PUT. + * - Bare string (condensed shape) → the string itself. + * - Object `SpecificResource` (legacy full-shape paste) → `selector.value`. + * - Absent; known-line update (`{id, text}`) → look up the existing line's + * target on the hydrated page and extract its selector value. * - * A no-op when `target` is already an object and `body` is already present — - * lets legacy full-shape pastes submit unchanged. + * Everything else returns `null`; the caller leaves `target` off and the + * services API rejects the item with `Line data is malformed` — the same + * outcome as submitting before the refactor. + * @param {any} item + * @param {Map} existingItemsById + * @returns {string|null} + */ +function resolveXywh(item, existingItemsById) { + if (typeof item.target === 'string') return item.target + if (item.target && typeof item.target === 'object') { + return typeof item.target.selector?.value === 'string' ? item.target.selector.value : null + } + if (typeof item.id === 'string') { + const existing = existingItemsById.get(item.id) + const value = existing?.target?.selector?.value + return typeof value === 'string' ? value : null + } + return null +} + +/** + * Expand a condensed fallback item into a full W3C Annotation. Every output + * target is rebuilt fresh with `canvasId` as `source` — we don't trust any + * source that rode in on a pasted item or an echoed existing target, so the + * rebuilt annotation always points at the canvas the UI is showing. + * + * The condensed per-item shapes are (by prompt): + * + * - `{ target: "xywh=…" }` — detection only. + * - `{ target: "xywh=…", text }` — detection + transcription. + * - `{ id, text }` — known-line update; xywh is looked up from the hydrated + * page. + * + * Legacy full-shape pastes pass through in all other respects — only + * `target.source` gets normalized and `motivation` is filled when missing. * @param {any} item raw parsed item from the fallback textarea. * @param {string|null} canvasId the canvas IRI used as the annotation's target source. * @param {Map} existingItemsById lookup from annotation id → resolved page item. @@ -88,12 +119,8 @@ function buildSpecificResourceTarget(canvasId, xywh) { */ function expandFallbackItem(item, canvasId, existingItemsById) { const out = { ...item } - if (typeof item.target === 'string') { - out.target = buildSpecificResourceTarget(canvasId, item.target) - } else if (out.target === undefined && typeof item.id === 'string') { - const existing = existingItemsById.get(item.id) - if (existing?.target !== undefined) out.target = existing.target - } + const xywh = resolveXywh(item, existingItemsById) + if (xywh) out.target = buildSpecificResourceTarget(canvasId, xywh) if (typeof item.text === 'string') { out.body = item.text === '' ? [] @@ -448,14 +475,14 @@ export class UIManager { const validationError = validateItems(items) if (validationError) { setFeedback(validationError); return } const canvasId = getIRI(this.state.canvas) - if (!canvasId && items.some(i => typeof i.target === 'string')) { + if (!canvasId) { setFeedback('Canvas context missing — reload the workspace and retry.') return } - // Index the resolved page's items by id so the expander can echo each - // existing line's `target` when the condensed item only carries `id` + - // `text` (known-line updates). Without the echo the services API would - // reset the line's target. + // Index the resolved page's items by id so the expander can recover + // each existing line's xywh for known-line updates (`{id, text}` only). + // The rebuilt target still uses `canvasId` as `source`; only the xywh + // selector value is pulled from the hydrated item. const existingItemsById = new Map() for (const existing of this.state.page?.items ?? []) { const eid = getIRI(existing) @@ -541,9 +568,6 @@ export class UIManager { try { const full = renderTemplate(select.value, { project: s.project, page: s.page, canvas: s.canvas, - layer: s.layer, column: s.column, line: s.line, - projectID: s.projectID, pageID: s.pageID, - layerID: s.layerID, columnID: s.columnID, lineID: s.lineID, token: s.token, pageEndpoint: (s.projectID && s.pageID) ? pageEndpoint(s.projectID, s.pageID) : null }) From 59d73636d1c2dfc24b14f4ffffdd1cea5d24635d Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Thu, 23 Apr 2026 18:53:52 -0500 Subject: [PATCH 24/47] condense --- templates/detect-columns-and-lines/PROMPT.md | 7 +++---- templates/detect-order-and-transcribe/PROMPT.md | 9 ++++----- templates/inject-context.js | 13 ++++++------- ui-manager.js | 7 +------ 4 files changed, 14 insertions(+), 22 deletions(-) diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index 7f1b5d2..65e6e3e 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -32,10 +32,9 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). -4. Assemble the per-line list in the global reading-order sequence from step 2 — this fixes the page's canonical line order for both paths. -5. If HTTP PUT and POST are available, build the full payload under **TPEN API** and PUT the items once. If the PUT returns non-2xx, stop and fall back — lines are not persisted yet. If the PUT succeeds, for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The response returns `items` in submission order, so use each line's column index from step 2 to slice the returned ids. Labels must be unique and must not clash with anything in "Existing columns on this page". If a column POST returns non-2xx, stop and report the partial state — do not emit a fallback payload; lines are already saved. -6. If HTTP PUT is unavailable (or the PUT in step 5 failed), emit the condensed payload under **Fallback** as the final code block. Column creation is out of scope for the fallback path. -7. Report counts (lines saved/in payload, columns created/in payload) and which path was used. +4. If HTTP PUT and POST are available, build the full payload under **TPEN API** and PUT the items once in the global reading-order sequence from step 2. If the PUT returns non-2xx, stop and fall back — lines are not persisted yet. If the PUT succeeds, for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The response returns `items` in submission order, so use each line's column index from step 2 to slice the returned ids. Labels must be unique and must not clash with anything in "Existing columns on this page". If a column POST returns non-2xx, stop and report the partial state — do not emit a fallback payload; lines are already saved. +5. If HTTP PUT is unavailable (or the PUT in step 4 failed), emit the condensed payload under **Fallback** as the final code block. Column creation is out of scope for the fallback path. +6. Report counts (lines saved/in payload, columns created/in payload) and which path was used. ## Rules diff --git a/templates/detect-order-and-transcribe/PROMPT.md b/templates/detect-order-and-transcribe/PROMPT.md index 1b97f92..f7d5645 100644 --- a/templates/detect-order-and-transcribe/PROMPT.md +++ b/templates/detect-order-and-transcribe/PROMPT.md @@ -29,11 +29,10 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). 4. Run handwriting text recognition on each line's crop. Apply the recognition rules below. -5. Assemble the per-line list in the global reading-order sequence from step 2 — this fixes the page's canonical line order for both paths. -6. If HTTP PUT is available, build the full payload under **TPEN API** from that list and send the request once. On any non-2xx response, do not retry — fall back. -7. If HTTP PUT is unavailable (or step 6 fell back), emit the condensed payload under **Fallback** as the final code block. -8. Report counts (lines saved/in payload, non-empty text, uncertain) and which path was used (direct PUT or fallback). -9. Report notable ambiguities (e.g., illegible lines transcribed as empty or flagged) and the detected block count so reviewers know whether a multi-block layout was recognised. +5. If HTTP PUT is available, build the full payload under **TPEN API** in the global reading-order sequence from step 2 and send the request once. On any non-2xx response, do not retry — fall back. +6. If HTTP PUT is unavailable (or step 5 fell back), emit the condensed payload under **Fallback** as the final code block. +7. Report counts (lines saved/in payload, non-empty text, uncertain) and which path was used (direct PUT or fallback). +8. Report notable ambiguities (e.g., illegible lines transcribed as empty or flagged) and the detected block count so reviewers know whether a multi-block layout was recognised. ## Rules diff --git a/templates/inject-context.js b/templates/inject-context.js index 0ff7f78..75b897b 100644 --- a/templates/inject-context.js +++ b/templates/inject-context.js @@ -149,10 +149,9 @@ export function formatExistingLines(fetchedPage) { /** * Render the current column state for a given page as a markdown bullet list. - * Used by templates that must avoid duplicate column labels. The directly - * fetched `page` is authoritative, since the project graph may not hydrate - * `layer.pages[].columns` for every page; the project tree is consulted as a - * fallback. + * Used by templates that must avoid duplicate column labels. Columns live on + * `project.layers[].pages[]`; the `/resolved` page endpoint does not emit + * them, so the project graph is the only source. * @param {any} project the TPEN project object. * @param {any} page the page object returned by `fetchPageResolved`. * @returns {string} @@ -162,9 +161,9 @@ export function formatExistingColumns(project, page) { const projectPage = (project?.layers ?? []) .flatMap(l => l.pages ?? []) .find(pg => trailingId(pg) === tail) - const cols = page?.columns ?? projectPage?.columns ?? [] - if (!Array.isArray(cols) || cols.length === 0) { + const cols = projectPage?.columns ?? [] + if (cols.length === 0) { return '- (No existing columns on this page — labels must be unique when created.)' } - return cols.map(c => `- ${c.label ?? '(unlabeled)'}: ${(c.lines ?? c.annotations ?? []).length} line(s)`).join('\n') + return cols.map(c => `- ${c.label ?? '(unlabeled)'}`).join('\n') } diff --git a/ui-manager.js b/ui-manager.js index d8d6473..e0440ed 100644 --- a/ui-manager.js +++ b/ui-manager.js @@ -131,11 +131,6 @@ function expandFallbackItem(item, canvasId, existingItemsById) { return out } -/** `{ items: [...] }` page-PUT shape — the only shape any prompt fallback emits. */ -function isItemsPayload(p) { - return p && typeof p === 'object' && !Array.isArray(p) && Array.isArray(p.items) -} - /** * Validate a pre-expansion `items` array, returning a user-facing error string * or `null`. Catches two erasure traps: @@ -449,7 +444,7 @@ export class UIManager { setFeedback('Submitting…') const opts = { projectID, pageID, token, setFeedback, writeTextarea } try { - if (isItemsPayload(payload)) { + if (payload && typeof payload === 'object' && !Array.isArray(payload) && Array.isArray(payload.items)) { await this.#submitItems(payload.items, opts) return } From 37643a4c940c149f291e58e0bf174d7779e124e7 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Thu, 23 Apr 2026 19:32:25 -0500 Subject: [PATCH 25/47] changes during review --- iiif-ids.js | 32 ++++++++++++++++++++ templates/detect-columns-and-lines/PROMPT.md | 2 +- templates/inject-context.js | 27 ++--------------- templates/transcribe-known-lines/PROMPT.md | 2 +- ui-manager.js | 28 +++++++---------- 5 files changed, 48 insertions(+), 43 deletions(-) diff --git a/iiif-ids.js b/iiif-ids.js index b104a9a..48a1801 100644 --- a/iiif-ids.js +++ b/iiif-ids.js @@ -30,3 +30,35 @@ export function trailingId(value) { const parts = String(iri).split('/').filter(Boolean) return parts.pop() ?? null } + +/** + * Pull a Media Fragments `xywh=…` selector value out of any of the target + * shapes that flow through this app: + * + * - W3C `SpecificResource` object with `selector.value` (or `selector[0].value` + * when the selector is wrapped in an array). + * - Bare string target like `"#xywh=10,20,300,40"` — historical + * annotations stored this way still show up in hydrated pages. + * - Already-bare selector like `"xywh=10,20,300,40"` — the shape prompts emit + * in condensed fallback payloads. + * + * Returns the full `"xywh=…"` form (suitable for a `FragmentSelector.value`) + * or `null` if no selector is present. Strips the non-standard `pixel:` + * prefix that Annotorious produces. + * @param {any} target a target value: string, `SpecificResource`, or nullish. + * @returns {string|null} + */ +export function parseXywh(target) { + if (typeof target === 'string') { + if (!target.includes('xywh=')) return null + return target.slice(target.indexOf('xywh=')).replace(/^xywh=pixel:/, 'xywh=') + } + if (target && typeof target === 'object') { + const sel = target.selector + const value = Array.isArray(sel) ? sel[0]?.value : sel?.value + if (typeof value === 'string' && value.includes('xywh=')) { + return value.slice(value.indexOf('xywh=')).replace(/^xywh=pixel:/, 'xywh=') + } + } + return null +} diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index 65e6e3e..f69f3e6 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -32,7 +32,7 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). -4. If HTTP PUT and POST are available, build the full payload under **TPEN API** and PUT the items once in the global reading-order sequence from step 2. If the PUT returns non-2xx, stop and fall back — lines are not persisted yet. If the PUT succeeds, for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The response returns `items` in submission order, so use each line's column index from step 2 to slice the returned ids. Labels must be unique and must not clash with anything in "Existing columns on this page". If a column POST returns non-2xx, stop and report the partial state — do not emit a fallback payload; lines are already saved. +4. If HTTP PUT and POST are available, build the full payload under **TPEN API** and PUT the items once in the global reading-order sequence from step 2. If the PUT returns non-2xx, stop and fall back — lines are not persisted yet. If the PUT succeeds, for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The PUT response's `items` array is guaranteed to be in the same order as the submitted items, so use each line's column index from step 2 to slice the returned ids. Labels must be unique and must not clash with anything in "Existing columns on this page". If a column POST returns non-2xx, stop and report the partial state — do not emit a fallback payload; lines are already saved. 5. If HTTP PUT is unavailable (or the PUT in step 4 failed), emit the condensed payload under **Fallback** as the final code block. Column creation is out of scope for the fallback path. 6. Report counts (lines saved/in payload, columns created/in payload) and which path was used. diff --git a/templates/inject-context.js b/templates/inject-context.js index 75b897b..b21ab71 100644 --- a/templates/inject-context.js +++ b/templates/inject-context.js @@ -10,7 +10,7 @@ * @author thehabes */ -import { getIRI, trailingId } from '../iiif-ids.js' +import { getIRI, parseXywh, trailingId } from '../iiif-ids.js' /** * Pull the first image body URL off a IIIF canvas, or null if none is present. @@ -59,27 +59,6 @@ export function buildTemplateContext(ctx) { } } -/** - * Extract an `xywh=x,y,w,h` fragment from a line annotation's target, accepting - * both `target.selector.value` and a plain `"source#xywh=..."` string target. - * Strips the non-standard `pixel:` prefix introduced by Annotorious — prompts - * and any annotations produced downstream must use plain integer coordinates. - * @param {any} item - * @returns {string|null} - */ -function extractXywh(item) { - const sel = item?.target?.selector - const selValue = Array.isArray(sel) ? sel[0]?.value : sel?.value - let raw = null - if (typeof selValue === 'string' && selValue.includes('xywh=')) { - raw = selValue.slice(selValue.indexOf('xywh=')) - } else { - const target = typeof item?.target === 'string' ? item.target : null - if (target && target.includes('#xywh=')) raw = target.slice(target.indexOf('xywh=')) - } - return raw ? raw.replace(/^xywh=pixel:/, 'xywh=') : null -} - /** * Summarize a line's body for the "Existing lines" listing. * @@ -142,8 +121,8 @@ export function formatExistingLines(fetchedPage) { } return items.map(item => { const lineUri = getIRI(item) ?? '(unknown)' - const xywh = extractXywh(item) ?? '(no xywh selector)' - return `- ${lineUri} | xywh=${xywh} | ${formatBody(item?.body)}` + const xywh = parseXywh(item?.target) ?? '(no xywh selector)' + return `- ${lineUri} | ${xywh} | ${formatBody(item?.body)}` }).join('\n') } diff --git a/templates/transcribe-known-lines/PROMPT.md b/templates/transcribe-known-lines/PROMPT.md index b363987..fab9d9f 100644 --- a/templates/transcribe-known-lines/PROMPT.md +++ b/templates/transcribe-known-lines/PROMPT.md @@ -11,7 +11,7 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Existing lines -Each entry is ` | xywh= | ` in canvas coordinates. The body form is `body=[]` (empty), `text=""` (single plain-text `TextualBody`), or `body=` (anything else) — use it as context for what's already on the line. The fallback payload re-uses the full annotation URI verbatim as the `id` of each item; the splitscreen tool preserves the existing target server-side and updates only the body text. +Each entry is ` | xywh= | ` in canvas coordinates. The body form is `body=[]` (empty), `text=""` (single plain-text `TextualBody`), or `body=` (anything else) — use it as context for what's already on the line. The fallback payload re-uses the full annotation URI verbatim as the `id` of each item; the splitscreen tool rebuilds each existing target from the hydrated page before PUTting it, and updates only the body text. {{existingLines}} diff --git a/ui-manager.js b/ui-manager.js index e0440ed..ec8075c 100644 --- a/ui-manager.js +++ b/ui-manager.js @@ -10,7 +10,7 @@ import { listTemplates, renderTemplate } from './prompt-generator.js' import { pageEndpoint, putPage } from './tpen-service.js' -import { getIRI, trailingId } from './iiif-ids.js' +import { getIRI, parseXywh, trailingId } from './iiif-ids.js' /** * Build a DOM element. Recognizes a few special prop keys: @@ -70,29 +70,23 @@ function buildSpecificResourceTarget(canvasId, xywh) { /** * Pull the bare `xywh=…` selector value out of whatever target shape the - * fallback item carries. + * fallback item carries. Delegates all target-shape handling to `parseXywh` + * in iiif-ids.js so the known-line-update branch (`{id, text}`, no target) + * transparently recovers xywh from existing lines whose hydrated targets are + * either `SpecificResource` objects or legacy bare `"#xywh=…"` strings. * - * - Bare string (condensed shape) → the string itself. - * - Object `SpecificResource` (legacy full-shape paste) → `selector.value`. - * - Absent; known-line update (`{id, text}`) → look up the existing line's - * target on the hydrated page and extract its selector value. - * - * Everything else returns `null`; the caller leaves `target` off and the - * services API rejects the item with `Line data is malformed` — the same - * outcome as submitting before the refactor. + * Returns `null` when no selector can be resolved; the caller leaves `target` + * off and the services API rejects the item with `Line data is malformed`. * @param {any} item * @param {Map} existingItemsById * @returns {string|null} */ function resolveXywh(item, existingItemsById) { - if (typeof item.target === 'string') return item.target - if (item.target && typeof item.target === 'object') { - return typeof item.target.selector?.value === 'string' ? item.target.selector.value : null - } - if (typeof item.id === 'string') { + const direct = parseXywh(item?.target) + if (direct) return direct + if (typeof item?.id === 'string') { const existing = existingItemsById.get(item.id) - const value = existing?.target?.selector?.value - return typeof value === 'string' ? value : null + return parseXywh(existing?.target) } return null } From ac4b2aad315547ec63f0eb0abe7866115ea7f52e Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Thu, 23 Apr 2026 19:42:26 -0500 Subject: [PATCH 26/47] this is not right --- tpen-service.js | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/tpen-service.js b/tpen-service.js index 46cfb56..f3991a9 100644 --- a/tpen-service.js +++ b/tpen-service.js @@ -15,11 +15,11 @@ import { CONFIG } from './config.js' * @param {string} method HTTP verb (`GET`, `PUT`, `POST`, `PATCH`). * @param {any} [body] JSON-serializable body; omitted for GET. * @param {string} token JWT. - * @returns {Promise} parsed JSON body (or `null` when the response has no body). + * @returns {Promise} parsed JSON body. */ -async function serviceRequest(path, method, body, token) { +async function tpenServiceRequest(path, method, body, token) { if (!token) throw new Error(`Missing auth token for ${path}`) - const init = { + const options = { method, headers: { 'Content-Type': 'application/json', @@ -27,8 +27,8 @@ async function serviceRequest(path, method, body, token) { }, signal: AbortSignal.timeout(15000) } - if (body !== undefined) init.body = JSON.stringify(body) - const res = await fetch(`${CONFIG.servicesURL}${path}`, init) + if (body !== undefined) options.body = JSON.stringify(options.body) + const res = await fetch(`${CONFIG.servicesURL}${path}`, options) if (!res.ok) { // TPEN services always emit JSON errors (see tpen3-services // utilities/shared.js#respondWithError and utilities/routeErrorHandler.js). @@ -38,9 +38,7 @@ async function serviceRequest(path, method, body, token) { err.status = res.status throw err } - const text = await res.text() - if (!text) return null - return JSON.parse(text) + return res.json() } /** @@ -50,7 +48,7 @@ async function serviceRequest(path, method, body, token) { * @returns {Promise} parsed JSON body. */ function authedGet(path, token) { - return serviceRequest(path, 'GET', undefined, token) + return tpenServiceRequest(path, 'GET', undefined, token) } /** @@ -92,7 +90,7 @@ export function fetchPageResolved(projectID, pageID, token) { * @returns {Promise} */ export function putPage(projectID, pageID, body, token) { - return serviceRequest( + return tpenServiceRequest( `/project/${encodeURIComponent(projectID)}/page/${encodeURIComponent(pageID)}`, 'PUT', body, token ) From 3863853c893cdb31d21d3c2ed0b572a14797ccc8 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Thu, 23 Apr 2026 19:46:08 -0500 Subject: [PATCH 27/47] this is not right --- tpen-service.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpen-service.js b/tpen-service.js index f3991a9..ce64f95 100644 --- a/tpen-service.js +++ b/tpen-service.js @@ -27,7 +27,7 @@ async function tpenServiceRequest(path, method, body, token) { }, signal: AbortSignal.timeout(15000) } - if (body !== undefined) options.body = JSON.stringify(options.body) + if (body !== undefined) body = JSON.stringify(body) const res = await fetch(`${CONFIG.servicesURL}${path}`, options) if (!res.ok) { // TPEN services always emit JSON errors (see tpen3-services From b7ec4dfd596a96688bf8df3f46d612a0a1086fb1 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Thu, 23 Apr 2026 20:03:12 -0500 Subject: [PATCH 28/47] this is right --- tpen-service.js | 2 +- ui-manager.js | 27 ++++++++++++++++----------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/tpen-service.js b/tpen-service.js index ce64f95..27d17a0 100644 --- a/tpen-service.js +++ b/tpen-service.js @@ -27,7 +27,7 @@ async function tpenServiceRequest(path, method, body, token) { }, signal: AbortSignal.timeout(15000) } - if (body !== undefined) body = JSON.stringify(body) + if (body !== undefined) options.body = JSON.stringify(body) const res = await fetch(`${CONFIG.servicesURL}${path}`, options) if (!res.ok) { // TPEN services always emit JSON errors (see tpen3-services diff --git a/ui-manager.js b/ui-manager.js index ec8075c..2ad408c 100644 --- a/ui-manager.js +++ b/ui-manager.js @@ -55,6 +55,7 @@ const OPTIONAL_ID_FIELDS = [ * selector value. * @param {string} canvasId * @param {string} xywh the bare selector value (e.g. `xywh=10,20,300,40`). + * @returns {{source: string, type: string, selector: {type: string, conformsTo: string, value: string}}} */ function buildSpecificResourceTarget(canvasId, xywh) { return { @@ -71,9 +72,14 @@ function buildSpecificResourceTarget(canvasId, xywh) { /** * Pull the bare `xywh=…` selector value out of whatever target shape the * fallback item carries. Delegates all target-shape handling to `parseXywh` - * in iiif-ids.js so the known-line-update branch (`{id, text}`, no target) - * transparently recovers xywh from existing lines whose hydrated targets are - * either `SpecificResource` objects or legacy bare `"#xywh=…"` strings. + * in iiif-ids.js so both `SpecificResource` objects and legacy bare + * `"#xywh=…"` strings round-trip correctly. + * + * Known-line updates (item `id` matches an existing line) ignore any + * `target` the LLM included and re-use the existing line's selector — the + * fallback flow is documented as text-only in `transcribe-known-lines`, so + * trusting an LLM-supplied target would silently clobber bounds when the + * model echoes a stale or wrong selector. * * Returns `null` when no selector can be resolved; the caller leaves `target` * off and the services API rejects the item with `Line data is malformed`. @@ -82,13 +88,10 @@ function buildSpecificResourceTarget(canvasId, xywh) { * @returns {string|null} */ function resolveXywh(item, existingItemsById) { - const direct = parseXywh(item?.target) - if (direct) return direct - if (typeof item?.id === 'string') { - const existing = existingItemsById.get(item.id) - return parseXywh(existing?.target) + if (typeof item?.id === 'string' && existingItemsById.has(item.id)) { + return parseXywh(existingItemsById.get(item.id)?.target) } - return null + return parseXywh(item?.target) } /** @@ -148,8 +151,10 @@ function validateItems(items) { if (!item || typeof item !== 'object' || Array.isArray(item)) { return 'Each item in `items` must be an annotation object.' } - if ('target' in item && typeof item.target !== 'string' && typeof item.target !== 'object') { - return 'Each item `target` must be an `xywh=…` string or a full target object.' + if ('target' in item) { + const t = item.target + const ok = typeof t === 'string' || (t !== null && typeof t === 'object' && !Array.isArray(t)) + if (!ok) return 'Each item `target` must be an `xywh=…` string or a full target object.' } if ('text' in item && typeof item.text !== 'string') { return 'Each item `text` must be a string.' From b72f85cddb1dc55f4d374b07177dd69c0efe66cd Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Thu, 23 Apr 2026 20:18:05 -0500 Subject: [PATCH 29/47] changes during review --- templates/detect-columns/PROMPT.md | 9 +--- templates/detect-columns/index.js | 3 +- templates/transcribe-known-lines/PROMPT.md | 5 +- tpen-service.js | 5 +- ui-manager.js | 59 ++++++++++++++-------- 5 files changed, 49 insertions(+), 32 deletions(-) diff --git a/templates/detect-columns/PROMPT.md b/templates/detect-columns/PROMPT.md index 3a39d88..61f282a 100644 --- a/templates/detect-columns/PROMPT.md +++ b/templates/detect-columns/PROMPT.md @@ -9,10 +9,6 @@ You are assisting with TPEN manuscript transcription. This task rebuilds the col - Image: {{imageUrl}} - Page endpoint: {{pageEndpoint}} -## Existing columns on this page - -{{existingColumns}} - ## Existing lines Each entry is ` | xywh= | ` in canvas coordinates, printed in the page's current order. Use the full annotation URI verbatim when assigning lines to columns and when echoing lines in the page PUT. Compare the current order against the reading-order sequence you compute in step 5 to decide whether the PUT in step 8 is necessary. @@ -50,7 +46,7 @@ Use only tools already available in your environment. Do not install packages, l 5. Build a global reading-order sequence of all existing line ids: columns in reading order; within each column, lines sorted top-to-bottom by the `xywh` y-center. 6. DELETE every existing column on the page (see TPEN API below). On any non-2xx, stop and report. Do not POST or PUT after a DELETE failure. 7. For each detected column, POST `{ label, annotations }` where `annotations` is the contiguous slice of the reading-order id sequence from step 5 that belongs to that column. Choose a unique label per column (e.g., `Column A`, `Column B`) that does not clash with any other label chosen in this run. On any non-2xx, stop and report — columns POSTed before the failure remain persisted. -8. Compare your step-5 reading-order sequence to "Existing lines" position-by-position — they differ if any index holds a different annotation URI. If they match exactly, skip the PUT. Otherwise, PUT the page with `items` in the step-5 order. Each entry re-uses the existing annotation URI verbatim as its `id`, its `body` reconstructed from the entry's body form, and its `target` rebuilt from the entry's `xywh` selector. The server remaps column references when URIs change, but echoing `body` and `target` verbatim avoids minting unnecessary RERUM versions. On any non-2xx, stop and report. +8. Compare the step-5 sequence against the "Existing lines" order index-by-index. If they are identical, skip the PUT. Otherwise, PUT the page with `items` in the step-5 order. Each entry re-uses the existing annotation URI verbatim as its `id`, its `body` reconstructed from the entry's body form, and its `target` rebuilt from the entry's `xywh` selector. The server remaps column references when URIs change, but echoing `body` and `target` verbatim avoids minting unnecessary RERUM versions. On any non-2xx, stop and report. 9. Report: columns deleted, columns created, whether the page order was updated, and per-column line counts. ## Rules @@ -107,8 +103,7 @@ Content-Type: application/json "conformsTo": "http://www.w3.org/TR/media-frags/", "value": "xywh=x,y,w,h" } - }, - "motivation": "transcribing" + } } ] } diff --git a/templates/detect-columns/index.js b/templates/detect-columns/index.js index 5a14e46..0eb33b6 100644 --- a/templates/detect-columns/index.js +++ b/templates/detect-columns/index.js @@ -7,7 +7,7 @@ * @author thehabes */ -import { buildTemplateContext, formatExistingColumns, formatExistingLines } from '../inject-context.js' +import { buildTemplateContext, formatExistingLines } from '../inject-context.js' /** @type {import('../../prompt-generator.js').PromptTemplate} */ export const detectColumnsTemplate = { @@ -16,7 +16,6 @@ export const detectColumnsTemplate = { templateUrl: new URL('./PROMPT.md', import.meta.url), buildContext: (ctx) => ({ ...buildTemplateContext(ctx), - existingColumns: formatExistingColumns(ctx.project, ctx.page), existingLines: formatExistingLines(ctx.page) }) } diff --git a/templates/transcribe-known-lines/PROMPT.md b/templates/transcribe-known-lines/PROMPT.md index fab9d9f..0d0c9dc 100644 --- a/templates/transcribe-known-lines/PROMPT.md +++ b/templates/transcribe-known-lines/PROMPT.md @@ -35,8 +35,9 @@ Use only tools already available in your environment. Do not install packages, l - `pixel_h = round(canvas_h * img_h / {{canvasHeight}})` Crop each line region and verify it visibly contains a single line of inked text. 2. Run handwriting text recognition over each crop. Apply the recognition rules below. -3. If HTTP PATCH is available, PATCH the text to each line's line-text endpoint — one PATCH per line in the "Existing lines" list. On any non-2xx, record the status and continue with the remaining lines. If HTTP PATCH is unavailable from the start, go directly to the fallback. -4. Report counts (lines updated, lines flagged illegible, lines failed) and which path was used. +3. If HTTP PATCH is available, PATCH the text to each line's line-text endpoint — one PATCH per line in the "Existing lines" list. On any non-2xx, record the status and continue with the remaining lines. If every PATCH returned non-2xx, treat PATCH as unavailable and proceed to step 4. If HTTP PATCH is unavailable from the start, skip directly to step 4. +4. If you reached this step because PATCH was unavailable or every attempt failed, emit the condensed payload under **Fallback** as the final code block. +5. Report counts (lines updated, lines flagged illegible, lines failed) and which path was used. ## Rules diff --git a/tpen-service.js b/tpen-service.js index 27d17a0..2640f43 100644 --- a/tpen-service.js +++ b/tpen-service.js @@ -34,7 +34,10 @@ async function tpenServiceRequest(path, method, body, token) { // utilities/shared.js#respondWithError and utilities/routeErrorHandler.js). const detail = await res.json().catch(() => ({})) const msg = detail.message ?? detail.error ?? res.statusText - const err = new Error(`${path}: ${msg}`) + // Prefix with the status so callers that surface `err.message` raw + // (e.g., main.js#loadContext) still show it; the numeric status is + // also preserved on `err.status` for programmatic handling. + const err = new Error(`${res.status} ${path}: ${msg}`) err.status = res.status throw err } diff --git a/ui-manager.js b/ui-manager.js index 2ad408c..20f81ac 100644 --- a/ui-manager.js +++ b/ui-manager.js @@ -136,10 +136,14 @@ function expandFallbackItem(item, canvasId, existingItemsById) { * `page.items = []` even when `itemsProvided` is false, erasing every line * reference on the page and leaving columns pointing at stale ids. Prompts * should stop and report "no lines" rather than emit an empty payload. - * 2. A known-line update (string `id`) carrying neither `text` nor `body` - * would pass the expander and then be PUT with `body` absent, causing the - * services API to overwrite the existing body with `[]` on save - * (Line.js#saveLineToRerum: `body: this.body ?? []`). + * 2. A known-line update (string `id`) without usable transcription content + * would be PUT with `body` absent or empty, causing the services API to + * overwrite the existing body with `[]` on save + * (Line.js#saveLineToRerum: `body: this.body ?? []`). `'body' in item` is + * not enough — `body: null`, `body: ""`, `body: {}` all collapse to `[]` + * via the `??` fallback. Require either a `text` string or a non-empty + * `body` array; reject any other `body` shape outright so a buggy paste + * can't slip through and silently truncate a line. * @param {Array} items * @returns {string|null} */ @@ -159,8 +163,15 @@ function validateItems(items) { if ('text' in item && typeof item.text !== 'string') { return 'Each item `text` must be a string.' } - if (typeof item.id === 'string' && !('text' in item) && !('body' in item)) { - return `Item for ${item.id} is missing both \`text\` and \`body\` — would erase the existing transcription.` + if ('body' in item && item.body !== undefined && !Array.isArray(item.body)) { + return 'Each item `body` must be an array of body entries.' + } + if (typeof item.id === 'string') { + const hasText = typeof item.text === 'string' + const hasBody = Array.isArray(item.body) && item.body.length > 0 + if (!hasText && !hasBody) { + return `Item for ${item.id} is missing transcription content (\`text\` string or non-empty \`body\` array) — would erase the existing transcription.` + } } } return null @@ -185,8 +196,6 @@ export class UIManager { #workspaceBody = null /** Pending timer for clearing the Copy feedback message. */ #feedbackTimer = null - /** Pending timer for clearing the fallback panel feedback message. */ - #fallbackFeedbackTimer = null /** Fallback-panel submit button; toggled by `updateToken`. */ #fallbackSubmit = null @@ -369,6 +378,11 @@ export class UIManager { * below is belt-and-suspenders against a stale reference being clicked * programmatically. `updateToken` still flips it when the token arrives * after the panel was built so the pageID gate remains authoritative. + * + * The auto-clear timer for the feedback span lives in this closure, not + * on the instance — `renderWorkspace` rebuilds the panel on every render, + * and an instance-level timer reference would let an old panel's pending + * timer null out a new panel's timer slot. * @returns {HTMLElement} */ #buildFallbackPanel() { @@ -387,7 +401,12 @@ export class UIManager { }) this.#fallbackSubmit = submit const feedback = el('span', { class: 'feedback', attrs: { 'aria-live': 'polite' } }) - submit.addEventListener('click', () => this.#onFallbackSubmit(textarea, submit, feedback)) + let feedbackTimer = null + submit.addEventListener('click', () => this.#onFallbackSubmit({ + textarea, button: submit, feedback, + getTimer: () => feedbackTimer, + setTimer: (t) => { feedbackTimer = t } + })) const children = [ el('summary', { text: `Couldn't Use the API? Paste JSON from LLM here` }), el('p', { class: 'hint', text: 'Use this when your chat LLM produced the JSON payload but could not call the TPEN API itself. This tool will submit it using the token you authorized.' }) @@ -400,11 +419,9 @@ export class UIManager { /** * Parse the pasted JSON and submit it as a page PUT. Only one shape is * accepted: `{ items: [...] }` — the shape every prompt fallback emits. - * @param {HTMLTextAreaElement} textarea - * @param {HTMLButtonElement} button - * @param {HTMLElement} feedback + * @param {{textarea: HTMLTextAreaElement, button: HTMLButtonElement, feedback: HTMLElement, getTimer: () => any, setTimer: (t: any) => void}} ctx */ - async #onFallbackSubmit(textarea, button, feedback) { + async #onFallbackSubmit({ textarea, button, feedback, getTimer, setTimer }) { const { projectID, pageID, token } = this.state const raw = textarea.value.trim() // `renderWorkspace` can re-run mid-submit (e.g., token changes via @@ -416,15 +433,18 @@ export class UIManager { const setFeedback = (msg, autoClear = false) => { if (!alive()) return feedback.textContent = msg - if (this.#fallbackFeedbackTimer) { - clearTimeout(this.#fallbackFeedbackTimer) - this.#fallbackFeedbackTimer = null + const existing = getTimer() + if (existing) { + clearTimeout(existing) + setTimer(null) } if (autoClear) { - this.#fallbackFeedbackTimer = setTimeout(() => { + const t = setTimeout(() => { + if (getTimer() !== t) return feedback.textContent = '' - this.#fallbackFeedbackTimer = null + setTimer(null) }, 3000) + setTimer(t) } } if (!projectID || !pageID || !token) { @@ -449,8 +469,7 @@ export class UIManager { } setFeedback('Unrecognized payload shape — expected `{ "items": [...] }`.') } catch (err) { - const status = err?.status ? `TPEN API ${err.status}: ` : '' - setFeedback(`${status}${err?.message ?? 'Submission failed.'}`) + setFeedback(err?.message ?? 'Submission failed.') } finally { if (button.isConnected) { button.disabled = !(this.state.projectID && this.state.pageID && this.state.token) From 8682b35299d67c24df5d39ff4e033b406dc50530 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Thu, 23 Apr 2026 20:47:04 -0500 Subject: [PATCH 30/47] small hack to get the transcription interface to refresh when the fallback succeeds. --- ui-manager.js | 42 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/ui-manager.js b/ui-manager.js index 20f81ac..e5ec560 100644 --- a/ui-manager.js +++ b/ui-manager.js @@ -503,14 +503,29 @@ export class UIManager { } const expanded = items.map(i => expandFallbackItem(i, canvasId, existingItemsById)) const result = await putPage(projectID, pageID, { items: expanded }, token) + // Drop the saved page into local state so the next Generate's + // "Existing lines" listing reflects what was just persisted. + this.state.page = result + writeTextarea(JSON.stringify(result, null, 2)) const saved = expanded.length const noun = `line item${saved === 1 ? '' : 's'}` - if (result && typeof result === 'object') { - writeTextarea(JSON.stringify(result, null, 2)) - setFeedback(`Saved ${saved} ${noun}. Server response (with ids) is in the textarea.`, true) - } else { - setFeedback(`Saved ${saved} ${noun}. Server returned no body; pasted payload left in the textarea.`, true) + // Mint `/transcribe?projectID=…&pageID=…` from the parent + // origin (taken from `document.referrer`, which survives the default + // `strict-origin-when-cross-origin` policy) and the workspace state, + // then top-navigate there to refresh the transcription column. + // Writing `top.location.href` is allowed cross-origin under user + // activation (the Submit click); when it works the iframe is torn + // down. When no origin is resolvable (sandboxed iframe with + // `allow-top-navigation` withheld, or strict `no-referrer` policy), + // fall back to a manual-refresh hint. The proper postMessage-based + // fix lives in TPEN-interfaces#528. + const reloadUrl = mintTranscriptionUrl(projectID, pageID) + if (reloadUrl) { + setFeedback(`Saved ${saved} ${noun}. Refreshing the transcription page…`) + window.top.location.href = reloadUrl + return } + setFeedback(`Saved ${saved} ${noun}. Refresh the transcription page to see the new lines in the column.`, true) } /** @@ -658,3 +673,20 @@ function truncateToken(token) { return `${token.slice(0, 10)}…${token.slice(-10)}` } +/** + * Fallback reload target when the parent didn't forward `parentUrl` via + * `TPEN_CONTEXT`. Minted from the parent origin (taken from + * `document.referrer`, which survives the default cross-origin + * `strict-origin-when-cross-origin` policy) and the tpen3-interfaces + * transcription permalink shape (`/transcribe?projectID=…&pageID=…`). + * @param {string} projectID + * @param {string} pageID + * @returns {string|null} the minted URL, or null when no origin is available. + */ +function mintTranscriptionUrl(projectID, pageID) { + let origin = null + try { origin = new URL(document.referrer).origin } catch {} + if (!origin) return null + return `${origin}/transcribe?projectID=${encodeURIComponent(projectID)}&pageID=${encodeURIComponent(pageID)}` +} + From b3074fc06bf727574186d35255184d8f7aee0861 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Thu, 23 Apr 2026 21:06:06 -0500 Subject: [PATCH 31/47] Changes during review --- templates/detect-columns/PROMPT.md | 4 +-- tpen-service.js | 6 ++++- ui-manager.js | 40 +++++++++++++++++++++--------- 3 files changed, 35 insertions(+), 15 deletions(-) diff --git a/templates/detect-columns/PROMPT.md b/templates/detect-columns/PROMPT.md index 61f282a..36562bb 100644 --- a/templates/detect-columns/PROMPT.md +++ b/templates/detect-columns/PROMPT.md @@ -72,7 +72,7 @@ Then POST each new column — one request per column: ``` POST {{pageEndpoint}}/column -Authorization: Bearer {{token}} +Authorization: Bearer Content-Type: application/json { @@ -87,7 +87,7 @@ Finally, if step 8 determined the reading order changed, PUT the page to rewrite ``` PUT {{pageEndpoint}} -Authorization: Bearer {{token}} +Authorization: Bearer Content-Type: application/json { diff --git a/tpen-service.js b/tpen-service.js index 2640f43..60dd123 100644 --- a/tpen-service.js +++ b/tpen-service.js @@ -18,7 +18,11 @@ import { CONFIG } from './config.js' * @returns {Promise} parsed JSON body. */ async function tpenServiceRequest(path, method, body, token) { - if (!token) throw new Error(`Missing auth token for ${path}`) + if (!token) { + const err = new Error(`Missing auth token for ${path}`) + err.status = 401 + throw err + } const options = { method, headers: { diff --git a/ui-manager.js b/ui-manager.js index e5ec560..dbf45cd 100644 --- a/ui-manager.js +++ b/ui-manager.js @@ -335,13 +335,15 @@ export class UIManager { select.append(el('option', { value: t.id, text: t.label })) } - // Prompts embed the auth token in `{{token}}`; generating before - // consent yields a prompt whose Authorization header is `Bearer ` with - // nothing after it. Gate Generate on token presence and nudge the user - // toward the consent button in the header. + // Prompts embed the auth token in `{{token}}` and the page endpoint + // in `{{pageEndpoint}}`. Generating without either yields a prompt + // whose Authorization header is `Bearer ` (no token) or whose target + // URL is `(unknown page endpoint)`. Gate Generate on both, and nudge + // the user toward whatever's missing. + const canGenerate = Boolean(token && pageID) const generateBtn = el('button', { type: 'button', id: 'generate-btn', text: 'Generate prompt', - disabled: !token + disabled: !canGenerate }) this.#generateBtn = generateBtn const output = el('textarea', { @@ -359,13 +361,19 @@ export class UIManager { generateBtn ] - const body = el('div', { class: 'workspace-body', hidden: !token }, [ - el('div', { class: 'controls' }, generateControls), + const bodyChildren = [ + el('div', { class: 'controls' }, generateControls) + ] + if (!pageID) { + bodyChildren.push(el('p', { class: 'hint', text: 'Needs a page context before a prompt can be generated.' })) + } + bodyChildren.push( el('label', { class: 'output-label', htmlFor: 'output', text: 'Generated prompt' }), output, el('div', { class: 'controls' }, [copyBtn, feedback]), this.#buildFallbackPanel() - ]) + ) + const body = el('div', { class: 'workspace-body', hidden: !token }, bodyChildren) this.#workspaceBody = body this.#replace(el('section', { class: 'card' }, [header, body])) @@ -522,8 +530,16 @@ export class UIManager { const reloadUrl = mintTranscriptionUrl(projectID, pageID) if (reloadUrl) { setFeedback(`Saved ${saved} ${noun}. Refreshing the transcription page…`) - window.top.location.href = reloadUrl - return + // The PUT already succeeded; if the navigation throws (sandbox + // without `allow-top-navigation`, or top is cross-origin and + // the click's user activation has been consumed by the await + // chain above), don't let it surface as a submission failure. + try { + window.top.location.href = reloadUrl + return + } catch (err) { + console.warn('top.location navigation blocked', err) + } } setFeedback(`Saved ${saved} ${noun}. Refresh the transcription page to see the new lines in the column.`, true) } @@ -555,9 +571,9 @@ export class UIManager { this.#authButton.remove() this.#authButton = null } - if (this.#generateBtn) this.#generateBtn.disabled = false + const { projectID, pageID } = this.state + if (this.#generateBtn) this.#generateBtn.disabled = !pageID if (this.#fallbackSubmit) { - const { projectID, pageID } = this.state this.#fallbackSubmit.disabled = !(projectID && pageID) } if (this.#workspaceBody) this.#workspaceBody.hidden = false From 48eb2d360834397fe016f593811e9ed10512afdc Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Thu, 23 Apr 2026 21:30:08 -0500 Subject: [PATCH 32/47] lock it in --- ui-manager.js | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/ui-manager.js b/ui-manager.js index dbf45cd..4b9aeab 100644 --- a/ui-manager.js +++ b/ui-manager.js @@ -155,9 +155,17 @@ function validateItems(items) { if (!item || typeof item !== 'object' || Array.isArray(item)) { return 'Each item in `items` must be an annotation object.' } - if ('target' in item) { + const hasId = typeof item.id === 'string' + const hasTargetField = 'target' in item && item.target != null + // Without an id we can't look up an existing target; without a target we + // can't build one. Either path resolves a selector — neither makes the + // server throw "Line data is malformed" with a generic 500. + if (!hasId && !hasTargetField) { + return 'Each item must include `target` (xywh selector) or an `id` matching an existing line.' + } + if (hasTargetField) { const t = item.target - const ok = typeof t === 'string' || (t !== null && typeof t === 'object' && !Array.isArray(t)) + const ok = typeof t === 'string' || (typeof t === 'object' && !Array.isArray(t)) if (!ok) return 'Each item `target` must be an `xywh=…` string or a full target object.' } if ('text' in item && typeof item.text !== 'string') { @@ -166,7 +174,7 @@ function validateItems(items) { if ('body' in item && item.body !== undefined && !Array.isArray(item.body)) { return 'Each item `body` must be an array of body entries.' } - if (typeof item.id === 'string') { + if (hasId) { const hasText = typeof item.text === 'string' const hasBody = Array.isArray(item.body) && item.body.length > 0 if (!hasText && !hasBody) { From c6a80858473851a469ce9137c0886170bba5afe8 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Fri, 24 Apr 2026 09:05:35 -0500 Subject: [PATCH 33/47] Changes while testing --- templates/detect-and-transcribe/PROMPT.md | 2 +- templates/detect-columns-and-lines/PROMPT.md | 6 +++--- templates/detect-columns/PROMPT.md | 2 +- templates/detect-lines/PROMPT.md | 2 +- templates/detect-order-and-transcribe/PROMPT.md | 2 +- templates/inject-context.js | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/templates/detect-and-transcribe/PROMPT.md b/templates/detect-and-transcribe/PROMPT.md index b098faa..655a5dd 100644 --- a/templates/detect-and-transcribe/PROMPT.md +++ b/templates/detect-and-transcribe/PROMPT.md @@ -27,7 +27,7 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` - Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). + Then clamp `x,y,w,h` so that `0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`. 4. Run handwriting text recognition on each line's crop. Apply the recognition rules below. 5. If HTTP PUT is available, build the full payload under **TPEN API** — one Annotation per line with the recognized text and `xywh=x,y,w,h` selector — and send the request once. On any non-2xx response, do not retry — fall back. 6. If HTTP PUT is unavailable (or step 5 fell back), emit the condensed payload under **Fallback** as the final code block. diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index f69f3e6..086607d 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -18,7 +18,7 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: 1. Ability to fetch the image bytes (or a derivative) and identify line and column bounds from them. Precise pixel measurement is preferred when available; visual estimation from the fetched image is acceptable otherwise. -2. Either HTTP PUT and POST capability with `Content-Type: application/json`, or the ability to emit the lines-only payload as a fallback JSON code block in your report. Column creation has no fallback — if POST is unavailable, column grouping is dropped. If PUT is unavailable, skip straight to the Fallback section — do not retry. +2. Either HTTP PUT and POST capability with `Content-Type: application/json`, or the ability to emit the lines-only payload as a fallback JSON code block in your report. If either verb is unavailable, skip straight to the Fallback section — do not retry. Column creation has no fallback; it is dropped when the fallback path is taken. Use only tools already available in your environment. Do not install packages, libraries, or system utilities. @@ -31,9 +31,9 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` - Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). + Then clamp `x,y,w,h` so that `0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`. 4. If HTTP PUT and POST are available, build the full payload under **TPEN API** and PUT the items once in the global reading-order sequence from step 2. If the PUT returns non-2xx, stop and fall back — lines are not persisted yet. If the PUT succeeds, for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The PUT response's `items` array is guaranteed to be in the same order as the submitted items, so use each line's column index from step 2 to slice the returned ids. Labels must be unique and must not clash with anything in "Existing columns on this page". If a column POST returns non-2xx, stop and report the partial state — do not emit a fallback payload; lines are already saved. -5. If HTTP PUT is unavailable (or the PUT in step 4 failed), emit the condensed payload under **Fallback** as the final code block. Column creation is out of scope for the fallback path. +5. If HTTP PUT or POST is unavailable (or the PUT in step 4 failed), emit the condensed payload under **Fallback** as the final code block. Column creation is out of scope for the fallback path. 6. Report counts (lines saved/in payload, columns created/in payload) and which path was used. ## Rules diff --git a/templates/detect-columns/PROMPT.md b/templates/detect-columns/PROMPT.md index 36562bb..3a4f6bc 100644 --- a/templates/detect-columns/PROMPT.md +++ b/templates/detect-columns/PROMPT.md @@ -41,7 +41,7 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` - Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). + Then clamp `x,y,w,h` so that `0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`. 4. For each detected column, determine which of the existing line ids (from the list above) belong to it. Assign a line to the column whose canvas-space region contains the center point of the line's `xywh`. If a line's center falls outside every detected column, assign it to the nearest column by Euclidean distance from the center point to the column's region (distance `0` when the point is inside). Each line belongs to exactly one column. 5. Build a global reading-order sequence of all existing line ids: columns in reading order; within each column, lines sorted top-to-bottom by the `xywh` y-center. 6. DELETE every existing column on the page (see TPEN API below). On any non-2xx, stop and report. Do not POST or PUT after a DELETE failure. diff --git a/templates/detect-lines/PROMPT.md b/templates/detect-lines/PROMPT.md index f21e559..e4c1a15 100644 --- a/templates/detect-lines/PROMPT.md +++ b/templates/detect-lines/PROMPT.md @@ -27,7 +27,7 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` - Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). + Then clamp `x,y,w,h` so that `0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`. 4. If HTTP PUT is available, build the full payload under **TPEN API** and send the request once. On any non-2xx response, do not retry — fall back. 5. If HTTP PUT is unavailable (or step 4 fell back), emit the condensed payload under **Fallback** as the final code block. 6. Report count and which path was used (direct PUT or fallback). diff --git a/templates/detect-order-and-transcribe/PROMPT.md b/templates/detect-order-and-transcribe/PROMPT.md index f7d5645..c832baa 100644 --- a/templates/detect-order-and-transcribe/PROMPT.md +++ b/templates/detect-order-and-transcribe/PROMPT.md @@ -27,7 +27,7 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` - Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). + Then clamp `x,y,w,h` so that `0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`. 4. Run handwriting text recognition on each line's crop. Apply the recognition rules below. 5. If HTTP PUT is available, build the full payload under **TPEN API** in the global reading-order sequence from step 2 and send the request once. On any non-2xx response, do not retry — fall back. 6. If HTTP PUT is unavailable (or step 5 fell back), emit the condensed payload under **Fallback** as the final code block. diff --git a/templates/inject-context.js b/templates/inject-context.js index b21ab71..3934669 100644 --- a/templates/inject-context.js +++ b/templates/inject-context.js @@ -142,7 +142,7 @@ export function formatExistingColumns(project, page) { .find(pg => trailingId(pg) === tail) const cols = projectPage?.columns ?? [] if (cols.length === 0) { - return '- (No existing columns on this page — labels must be unique when created.)' + return '- (No existing columns on this page)' } return cols.map(c => `- ${c.label ?? '(unlabeled)'}`).join('\n') } From c8e8ab91077085b5a8d1bab7881b033fee6ff54d Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Fri, 24 Apr 2026 09:10:14 -0500 Subject: [PATCH 34/47] Simplify a bit --- ASSISTANTS.md | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ASSISTANTS.md b/ASSISTANTS.md index dafdda5..c42c2c1 100644 --- a/ASSISTANTS.md +++ b/ASSISTANTS.md @@ -6,7 +6,7 @@ You are a small GitHub Pages app that functions as a TPEN AI Prompt Generator. ## TPEN Interfaces -You are a TPEN Interfaces component with an accompanying interface for UI. Specifically, you are a splitscreen tool for the transcription interface. +You are a tool imported into a TPEN Interfaces component. That component is your parent. > The TPEN Interfaces code can be found at https://github.com/CenterForDigitalHumanities/TPEN-interfaces. diff --git a/README.md b/README.md index 40806a3..ddde66f 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,3 @@ # TPEN-Prompts -A small GitHub Pages app that composes well-formatted LLM prompts carrying TPEN3 project context. It emits prompt text only — it does **not** call any LLM. Offered as a splitscreen tool on the TPEN3 transcription interface. +A small GitHub Pages app that composes well-formatted LLM prompts carrying TPEN3 project context. It emits prompt text only — it does **not** call any LLM. From 3f720737d6991905e3da1c8cfc3fad7c240c65f2 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Fri, 24 Apr 2026 09:17:31 -0500 Subject: [PATCH 35/47] Changes from review --- templates/detect-columns-and-lines/PROMPT.md | 1 - templates/detect-columns/PROMPT.md | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index 086607d..2370f9a 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -119,4 +119,3 @@ Fallback path, report: - counts: lines in payload - HTTP status and error body if a request was attempted first - final code block: the condensed `{ "items": [...] }` JSON for the user to paste -- list the labels of any columns already created before the failure, so a follow-up pass can avoid duplicating them. diff --git a/templates/detect-columns/PROMPT.md b/templates/detect-columns/PROMPT.md index 3a4f6bc..186c9d1 100644 --- a/templates/detect-columns/PROMPT.md +++ b/templates/detect-columns/PROMPT.md @@ -16,7 +16,7 @@ Each entry is ` | xywh= | ` in canvas The body form is one of: - `body=[]` — echo as `[]`. -- `text=""` — echo as `[{ "type": "TextualBody", "value": , "format": "text/plain" }]`. +- `text=""` — `` is a JSON string literal (quotes and escapes already encoded). Echo as `[{ "type": "TextualBody", "value": , "format": "text/plain" }]` — paste it straight into the `value` slot, do not re-quote or re-escape. - `body=` — echo the JSON verbatim. {{existingLines}} @@ -45,7 +45,7 @@ Use only tools already available in your environment. Do not install packages, l 4. For each detected column, determine which of the existing line ids (from the list above) belong to it. Assign a line to the column whose canvas-space region contains the center point of the line's `xywh`. If a line's center falls outside every detected column, assign it to the nearest column by Euclidean distance from the center point to the column's region (distance `0` when the point is inside). Each line belongs to exactly one column. 5. Build a global reading-order sequence of all existing line ids: columns in reading order; within each column, lines sorted top-to-bottom by the `xywh` y-center. 6. DELETE every existing column on the page (see TPEN API below). On any non-2xx, stop and report. Do not POST or PUT after a DELETE failure. -7. For each detected column, POST `{ label, annotations }` where `annotations` is the contiguous slice of the reading-order id sequence from step 5 that belongs to that column. Choose a unique label per column (e.g., `Column A`, `Column B`) that does not clash with any other label chosen in this run. On any non-2xx, stop and report — columns POSTed before the failure remain persisted. +7. For each detected column, POST `{ label, annotations }` where `annotations` is the contiguous slice of the reading-order id sequence from step 5 that belongs to that column. Choose a unique label per column (e.g., `Column A`, `Column B`) that does not clash with any other label chosen in this run. On any non-2xx, stop and report — columns POSTed before the failure remain persisted. Run step 7 before step 8 so the PUT's column-remap path isn't exercised; lines echoed verbatim in step 8 do not mint a new RERUM version, so column membership stays stable. 8. Compare the step-5 sequence against the "Existing lines" order index-by-index. If they are identical, skip the PUT. Otherwise, PUT the page with `items` in the step-5 order. Each entry re-uses the existing annotation URI verbatim as its `id`, its `body` reconstructed from the entry's body form, and its `target` rebuilt from the entry's `xywh` selector. The server remaps column references when URIs change, but echoing `body` and `target` verbatim avoids minting unnecessary RERUM versions. On any non-2xx, stop and report. 9. Report: columns deleted, columns created, whether the page order was updated, and per-column line counts. From fa25231ca54ae5972dc1c895e32500d46dad7977 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Fri, 24 Apr 2026 09:32:05 -0500 Subject: [PATCH 36/47] fail faster --- templates/detect-and-transcribe/PROMPT.md | 4 +++- templates/detect-columns-and-lines/PROMPT.md | 6 ++++-- templates/detect-columns/PROMPT.md | 2 +- templates/detect-lines/PROMPT.md | 4 +++- templates/detect-order-and-transcribe/PROMPT.md | 4 +++- templates/transcribe-known-lines/PROMPT.md | 6 +++--- 6 files changed, 17 insertions(+), 9 deletions(-) diff --git a/templates/detect-and-transcribe/PROMPT.md b/templates/detect-and-transcribe/PROMPT.md index 655a5dd..92da6e2 100644 --- a/templates/detect-and-transcribe/PROMPT.md +++ b/templates/detect-and-transcribe/PROMPT.md @@ -11,7 +11,9 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Preconditions -All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: +All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. This template only creates new lines: `lineCount` = `{{lineCount}}`. If `lineCount` is not `0`, stop immediately and report — existing line data must not be modified. + +You must have: 1. Ability to fetch the image bytes (or a derivative) and identify line bounds and text from them. Precise pixel measurement is preferred when available; visual estimation and on-sight transcription from the fetched image are acceptable otherwise. 2. Either HTTP PUT capability with `Content-Type: application/json`, or the ability to emit the payload as a fallback JSON code block in your report. If HTTP PUT is not available, skip straight to the Fallback section — do not retry. diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index 2370f9a..af93779 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -15,7 +15,9 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Preconditions -All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: +All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. This template only creates new lines: `lineCount` = `{{lineCount}}`. If `lineCount` is not `0`, stop immediately and report — existing line data must not be modified. + +You must have: 1. Ability to fetch the image bytes (or a derivative) and identify line and column bounds from them. Precise pixel measurement is preferred when available; visual estimation from the fetched image is acceptable otherwise. 2. Either HTTP PUT and POST capability with `Content-Type: application/json`, or the ability to emit the lines-only payload as a fallback JSON code block in your report. If either verb is unavailable, skip straight to the Fallback section — do not retry. Column creation has no fallback; it is dropped when the fallback path is taken. @@ -33,7 +35,7 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp `x,y,w,h` so that `0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`. 4. If HTTP PUT and POST are available, build the full payload under **TPEN API** and PUT the items once in the global reading-order sequence from step 2. If the PUT returns non-2xx, stop and fall back — lines are not persisted yet. If the PUT succeeds, for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The PUT response's `items` array is guaranteed to be in the same order as the submitted items, so use each line's column index from step 2 to slice the returned ids. Labels must be unique and must not clash with anything in "Existing columns on this page". If a column POST returns non-2xx, stop and report the partial state — do not emit a fallback payload; lines are already saved. -5. If HTTP PUT or POST is unavailable (or the PUT in step 4 failed), emit the condensed payload under **Fallback** as the final code block. Column creation is out of scope for the fallback path. +5. If the PUT in step 4 failed, emit the condensed payload under **Fallback** as the final code block. Column creation is out of scope for the fallback path. 6. Report counts (lines saved/in payload, columns created/in payload) and which path was used. ## Rules diff --git a/templates/detect-columns/PROMPT.md b/templates/detect-columns/PROMPT.md index 186c9d1..92f427d 100644 --- a/templates/detect-columns/PROMPT.md +++ b/templates/detect-columns/PROMPT.md @@ -23,7 +23,7 @@ The body form is one of: ## Preconditions -All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions, existing-line list) are provided above. This task operates on an existing line set: `lineCount` = `{{lineCount}}`. If `lineCount` is `0`, stop immediately and return a failure report — this task cannot create lines. +All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions, existing-line list) are provided above. This template only groups existing lines: `lineCount` = `{{lineCount}}`. If `lineCount` is `0`, stop immediately and report — this prompt must not create lines. You must have: diff --git a/templates/detect-lines/PROMPT.md b/templates/detect-lines/PROMPT.md index e4c1a15..5f28ac1 100644 --- a/templates/detect-lines/PROMPT.md +++ b/templates/detect-lines/PROMPT.md @@ -11,7 +11,9 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Preconditions -All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: +All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. This template only creates new lines: `lineCount` = `{{lineCount}}`. If `lineCount` is not `0`, stop immediately and report — existing line data must not be modified. + +You must have: 1. Ability to fetch the image bytes (or a derivative) and identify line bounds from them. Precise pixel measurement is preferred when available; visual estimation from the fetched image is acceptable otherwise. 2. Either HTTP PUT capability with `Content-Type: application/json`, or the ability to emit the payload as a fallback JSON code block in your report. If HTTP PUT is not available, skip straight to the Fallback section — do not retry. diff --git a/templates/detect-order-and-transcribe/PROMPT.md b/templates/detect-order-and-transcribe/PROMPT.md index c832baa..41acad9 100644 --- a/templates/detect-order-and-transcribe/PROMPT.md +++ b/templates/detect-order-and-transcribe/PROMPT.md @@ -11,7 +11,9 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Preconditions -All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: +All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. This template only creates new lines: `lineCount` = `{{lineCount}}`. If `lineCount` is not `0`, stop immediately and report — existing line data must not be modified. + +You must have: 1. Ability to fetch the image bytes (or a derivative) and identify line bounds and text from them. Precise pixel measurement is preferred when available; visual estimation and on-sight transcription from the fetched image are acceptable otherwise. 2. Either HTTP PUT capability with `Content-Type: application/json`, or the ability to emit the payload as a fallback JSON code block in your report. If HTTP PUT is not available, skip straight to the Fallback section — do not retry. diff --git a/templates/transcribe-known-lines/PROMPT.md b/templates/transcribe-known-lines/PROMPT.md index 0d0c9dc..34658fc 100644 --- a/templates/transcribe-known-lines/PROMPT.md +++ b/templates/transcribe-known-lines/PROMPT.md @@ -17,7 +17,7 @@ Each entry is ` | xywh= | ` in canvas ## Preconditions -All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions, existing-line list) are provided above. This template only revises existing lines: `lineCount` = `{{lineCount}}`. If `lineCount` is `0`, stop immediately and report. +All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions, existing-line list) are provided above. This template only revises existing lines: `lineCount` = `{{lineCount}}`. If `lineCount` is `0`, stop immediately and report — this prompt must not create lines. You must have: @@ -35,8 +35,8 @@ Use only tools already available in your environment. Do not install packages, l - `pixel_h = round(canvas_h * img_h / {{canvasHeight}})` Crop each line region and verify it visibly contains a single line of inked text. 2. Run handwriting text recognition over each crop. Apply the recognition rules below. -3. If HTTP PATCH is available, PATCH the text to each line's line-text endpoint — one PATCH per line in the "Existing lines" list. On any non-2xx, record the status and continue with the remaining lines. If every PATCH returned non-2xx, treat PATCH as unavailable and proceed to step 4. If HTTP PATCH is unavailable from the start, skip directly to step 4. -4. If you reached this step because PATCH was unavailable or every attempt failed, emit the condensed payload under **Fallback** as the final code block. +3. If HTTP PATCH is available, PATCH the text to each line's line-text endpoint — one PATCH per line in the "Existing lines" list. On any non-2xx, record the status and continue with the remaining lines. If every PATCH returned non-2xx, stop and report the per-line statuses — do not emit a fallback payload; the same token and content would be re-submitted through it. +4. If HTTP PATCH is unavailable from the start, emit the condensed payload under **Fallback** as the final code block. 5. Report counts (lines updated, lines flagged illegible, lines failed) and which path was used. ## Rules From b77c4e74c90a6e0bd305322dbc307e21a49cf5f7 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Mon, 27 Apr 2026 12:56:37 -0500 Subject: [PATCH 37/47] changes during review and sync --- templates/detect-and-transcribe/PROMPT.md | 2 +- templates/detect-columns-and-lines/PROMPT.md | 2 +- .../detect-order-and-transcribe/PROMPT.md | 58 +++++++++++++------ .../detect-order-and-transcribe/index.js | 15 +++-- 4 files changed, 51 insertions(+), 26 deletions(-) diff --git a/templates/detect-and-transcribe/PROMPT.md b/templates/detect-and-transcribe/PROMPT.md index 92da6e2..65fe1cb 100644 --- a/templates/detect-and-transcribe/PROMPT.md +++ b/templates/detect-and-transcribe/PROMPT.md @@ -31,7 +31,7 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp `x,y,w,h` so that `0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`. 4. Run handwriting text recognition on each line's crop. Apply the recognition rules below. -5. If HTTP PUT is available, build the full payload under **TPEN API** — one Annotation per line with the recognized text and `xywh=x,y,w,h` selector — and send the request once. On any non-2xx response, do not retry — fall back. +5. If HTTP PUT is available, build the full payload under **TPEN API** and send the request once. On any non-2xx response, do not retry — fall back. 6. If HTTP PUT is unavailable (or step 5 fell back), emit the condensed payload under **Fallback** as the final code block. 7. Report counts (lines saved/in payload, non-empty text, uncertain) and which path was used (direct PUT or fallback). 8. Report notable ambiguities (e.g., illegible lines transcribed as empty or flagged). diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index af93779..79d8768 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -36,7 +36,7 @@ Use only tools already available in your environment. Do not install packages, l Then clamp `x,y,w,h` so that `0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`. 4. If HTTP PUT and POST are available, build the full payload under **TPEN API** and PUT the items once in the global reading-order sequence from step 2. If the PUT returns non-2xx, stop and fall back — lines are not persisted yet. If the PUT succeeds, for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The PUT response's `items` array is guaranteed to be in the same order as the submitted items, so use each line's column index from step 2 to slice the returned ids. Labels must be unique and must not clash with anything in "Existing columns on this page". If a column POST returns non-2xx, stop and report the partial state — do not emit a fallback payload; lines are already saved. 5. If the PUT in step 4 failed, emit the condensed payload under **Fallback** as the final code block. Column creation is out of scope for the fallback path. -6. Report counts (lines saved/in payload, columns created/in payload) and which path was used. +6. Report counts (lines saved/in payload, columns created/in payload) and which path was used (direct or fallback). ## Rules diff --git a/templates/detect-order-and-transcribe/PROMPT.md b/templates/detect-order-and-transcribe/PROMPT.md index 41acad9..88be4f9 100644 --- a/templates/detect-order-and-transcribe/PROMPT.md +++ b/templates/detect-order-and-transcribe/PROMPT.md @@ -1,4 +1,4 @@ -# Task: detect, order, and transcribe every text line on a TPEN3 page end-to-end +# Task: detect columns, order lines, and transcribe every text line on a TPEN3 page end-to-end You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services (direct) or emitted as a fallback JSON payload for the user to paste. @@ -9,21 +9,25 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en - Image: {{imageUrl}} - Page endpoint: {{pageEndpoint}} +## Existing columns on this page + +{{existingColumns}} + ## Preconditions All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. This template only creates new lines: `lineCount` = `{{lineCount}}`. If `lineCount` is not `0`, stop immediately and report — existing line data must not be modified. You must have: -1. Ability to fetch the image bytes (or a derivative) and identify line bounds and text from them. Precise pixel measurement is preferred when available; visual estimation and on-sight transcription from the fetched image are acceptable otherwise. -2. Either HTTP PUT capability with `Content-Type: application/json`, or the ability to emit the payload as a fallback JSON code block in your report. If HTTP PUT is not available, skip straight to the Fallback section — do not retry. +1. Ability to fetch the image bytes (or a derivative) and identify column and line bounds plus text from them. Precise pixel measurement is preferred when available; visual estimation and on-sight transcription from the fetched image are acceptable otherwise. +2. Either HTTP PUT and POST capability with `Content-Type: application/json`, or the ability to emit the lines-only payload as a fallback JSON code block in your report. If either verb is unavailable, skip straight to the Fallback section — do not retry. Column creation has no fallback; it is dropped when the fallback path is taken. Use only tools already available in your environment. Do not install packages, libraries, or system utilities. ## Steps 1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, fetch a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. If you have precise pixel tooling and want tighter bounds or a clearer crop for transcription, request a region server-side as `{base}/x,y,w,h/max/0/default.jpg` and add the crop's `x,y` origin back before applying the canvas conversion below. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. -2. Identify the page's layout. If the page has multiple text blocks side-by-side, determine their reading order (left→right for Latin-script layouts; adjust for script tradition). Within each block, detect lines top-to-bottom. Then flatten into a single global reading-order sequence across blocks (block-major: every line in the first block, then the second, etc.). Single-block pages collapse to one top-to-bottom sequence. This task does not create TPEN columns. +2. Detect main text column regions in reading order first, then detect the lines inside each column (reading order preserved within each column). If the page visibly has a single text block, create one column containing every detected line — do not subdivide. Track each line's column index (an integer, 0-based) as you detect it. Then flatten into a single global reading-order sequence across columns (column-major: every line in the first column, then the second, etc., adjusted for script tradition). 3. For every line, measure a bounding box in image-pixel space and convert to integer canvas coordinates using: - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` @@ -31,21 +35,25 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp `x,y,w,h` so that `0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`. 4. Run handwriting text recognition on each line's crop. Apply the recognition rules below. -5. If HTTP PUT is available, build the full payload under **TPEN API** in the global reading-order sequence from step 2 and send the request once. On any non-2xx response, do not retry — fall back. -6. If HTTP PUT is unavailable (or step 5 fell back), emit the condensed payload under **Fallback** as the final code block. -7. Report counts (lines saved/in payload, non-empty text, uncertain) and which path was used (direct PUT or fallback). -8. Report notable ambiguities (e.g., illegible lines transcribed as empty or flagged) and the detected block count so reviewers know whether a multi-block layout was recognised. +5. If HTTP PUT and POST are available, build the full payload under **TPEN API** and PUT the items once in the global reading-order sequence from step 2. If the PUT returns non-2xx, stop and fall back — lines are not persisted yet. If the PUT succeeds, for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The PUT response's `items` array is guaranteed to be in the same order as the submitted items, so use each line's column index from step 2 to slice the returned ids. Labels must be unique and must not clash with anything in "Existing columns on this page". If a column POST returns non-2xx, stop and report the partial state — do not emit a fallback payload; lines are already saved. +6. If the PUT in step 5 failed, emit the condensed payload under **Fallback** as the final code block. Column creation is out of scope for the fallback path. +7. Report counts (lines saved/in payload, non-empty text, uncertain, columns created/in payload) and which path was used (direct or fallback). +8. Report notable ambiguities (e.g., illegible lines transcribed as empty or flagged). ## Rules ### Detection (IMAGE_ANALYSIS) - Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value. -- The PUT `items` order is the page's canonical reading order; do not interleave lines from different blocks. +- Column labels are page-scoped and must be unique. Do not duplicate an existing column label. +- Each line annotation belongs to at most one column. +- Preserve reading order across columns and within each column. +- Line geometry is the primary accuracy target. Column grouping is secondary — for a single-column page, one column containing every line is correct. - Prefer tight bounds when you can measure them; best-effort bounds are acceptable. When uncertain whether a tall run is one line or several, prefer splitting over merging. - Do not include decorative borders, frame rules, ornaments, or illustrations as part of a line. +- Do not POST a column with an empty `annotations` array — the server rejects it. Skip any detected column that ends up with zero assigned lines. - Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected downstream. -- Zero lines detected is an unprocessable outcome. Stop and report — do not PUT, do not emit a fallback payload. An empty `items` array would erase every existing annotation on the page. +- Zero lines detected is an unprocessable outcome. Stop and report — do not PUT, do not POST a column, do not emit a fallback payload. An empty `items` array would erase every existing annotation on the page. ### Recognition (HANDWRITING_TEXT_RECOGNITION) @@ -58,7 +66,7 @@ Use only tools already available in your environment. Do not install packages, l ## TPEN API -Save every detected line with its transcription in a single PUT. The `items` array must contain one annotation per detected line, in the reading-order sequence from step 2; replace `x,y,w,h` with the integer canvas coordinates computed in step 3, and `` with the recognized text (empty string for fully illegible lines). +Save every detected line with its transcription in a single PUT. The `items` array must contain one annotation per detected line, in the global reading-order sequence from step 2; replace `x,y,w,h` with the integer canvas coordinates computed in step 3, and `` with the recognized text (empty string for fully illegible lines). ``` PUT {{pageEndpoint}} @@ -84,9 +92,22 @@ Content-Type: application/json } ``` +Then POST each column (reuse the same Bearer token as the PUT above): + +``` +POST {{pageEndpoint}}/column +Authorization: Bearer +Content-Type: application/json + +{ + "label": "Column A", + "annotations": ["", ""] +} +``` + ## Fallback -When the direct PUT is impossible or returns non-2xx, emit the condensed payload below as the final code block of your report, in the reading-order sequence from step 2. The TPEN splitscreen tool expands each item into a full W3C Annotation before PUTting it — do not inline the canvas source, selector boilerplate, or motivation. It must be valid JSON (no comments, no placeholders — substitute the real coordinates and recognized text). +When the direct path is unavailable or returns non-2xx, emit the condensed payload below as the final code block of your report, in the global reading-order sequence from step 2. The TPEN splitscreen tool expands each item into a full W3C Annotation before PUTting it — do not inline the canvas source, selector boilerplate, or motivation. It must be valid JSON (no comments, no placeholders — substitute the real coordinates and recognized text). Column creation is out of scope for this fallback. ``` { @@ -96,21 +117,22 @@ When the direct PUT is impossible or returns non-2xx, emit the condensed payload } ``` -One item per detected line. `target` is the bare selector value (no `#`, no `pixel:` prefix). `text` is an empty string for fully illegible lines — do not drop the item. Item order is the page's canonical reading order; do not interleave lines from different blocks. +One item per detected line, in the global reading-order sequence. `target` is the bare selector value (no `#`, no `pixel:` prefix). `text` is an empty string for fully illegible lines — do not drop the item. ## Completion -Direct PUT path, report: +Direct path, report: -- operation: `PUT page` -- target: {{pageEndpoint}} -- counts: lines saved, lines with non-empty text, lines flagged uncertain, text blocks detected +- operations: `PUT page`, `POST column` (×N) +- target: {{pageEndpoint}} (page) and {{pageEndpoint}}/column +- counts: lines saved, lines with non-empty text, lines flagged uncertain, columns created +- whether lines were saved even if a column POST failed (partial success is acceptable — describe what persists) - notable ambiguities worth a human review Fallback path, report: - path: `fallback` -- counts: lines in payload, lines with non-empty text, lines flagged uncertain, text blocks detected +- counts: lines in payload, lines with non-empty text, lines flagged uncertain - HTTP status and error body if a PUT was attempted first - notable ambiguities worth a human review - final code block: the condensed `{ "items": [...] }` JSON for the user to paste diff --git a/templates/detect-order-and-transcribe/index.js b/templates/detect-order-and-transcribe/index.js index 255c5cb..8bff6ce 100644 --- a/templates/detect-order-and-transcribe/index.js +++ b/templates/detect-order-and-transcribe/index.js @@ -1,19 +1,22 @@ /** - * @file Template: "Detect lines + order + transcribe → PUT page". + * @file Template: "Detect columns + lines + transcribe → PUT page, POST columns". * - * Combines the multi-block reading-order detection from + * Combines the multi-block reading-order and column creation from * detect-columns-and-lines with the handwriting recognition from - * detect-and-transcribe, without creating column annotations. + * detect-and-transcribe. * * @author thehabes */ -import { buildTemplateContext } from '../inject-context.js' +import { buildTemplateContext, formatExistingColumns } from '../inject-context.js' /** @type {import('../../prompt-generator.js').PromptTemplate} */ export const detectOrderAndTranscribeTemplate = { id: 'detect-order-and-transcribe', - label: 'Line Detection + Ordering + Transcription', + label: 'Line Detection + Column Grouping + Transcription', templateUrl: new URL('./PROMPT.md', import.meta.url), - buildContext: buildTemplateContext + buildContext: (ctx) => ({ + ...buildTemplateContext(ctx), + existingColumns: formatExistingColumns(ctx.project, ctx.page) + }) } From abc20373c5076856d8272cb4df5d3b9ec2235b5f Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Mon, 27 Apr 2026 12:57:07 -0500 Subject: [PATCH 38/47] changes during review and sync --- templates/detect-columns-and-lines/PROMPT.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index 79d8768..604d6d1 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -53,7 +53,7 @@ Use only tools already available in your environment. Do not install packages, l ## TPEN API -Save all lines via a single PUT. The `items` array must contain one annotation per detected line; replace `x,y,w,h` with the integer canvas coordinates computed in step 3. +Save all detected lines via a single PUT. The `items` array must contain one annotation per detected line, in the global reading-order sequence from step 2; replace `x,y,w,h` with the integer canvas coordinates computed in step 3. ``` PUT {{pageEndpoint}} @@ -119,5 +119,5 @@ Fallback path, report: - path: `fallback` - counts: lines in payload -- HTTP status and error body if a request was attempted first +- HTTP status and error body if a PUT was attempted first - final code block: the condensed `{ "items": [...] }` JSON for the user to paste From c62296f994c94a02664576cac545f79621c99924 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Mon, 27 Apr 2026 14:59:26 -0500 Subject: [PATCH 39/47] changes from testing --- templates/detect-and-transcribe/PROMPT.md | 2 +- templates/detect-columns-and-lines/PROMPT.md | 2 +- templates/detect-lines/PROMPT.md | 2 +- templates/detect-order-and-transcribe/PROMPT.md | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/templates/detect-and-transcribe/PROMPT.md b/templates/detect-and-transcribe/PROMPT.md index 65fe1cb..ee15ebe 100644 --- a/templates/detect-and-transcribe/PROMPT.md +++ b/templates/detect-and-transcribe/PROMPT.md @@ -23,7 +23,7 @@ Use only tools already available in your environment. Do not install packages, l ## Steps 1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, fetch a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. If you have precise pixel tooling and want tighter bounds or a clearer crop for transcription, request a region server-side as `{base}/x,y,w,h/max/0/default.jpg` and add the crop's `x,y` origin back before applying the canvas conversion below. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. -2. Detect text lines across the whole page in reading order. This task does not create TPEN columns. +2. Detect text lines across the whole page in reading order. This task does not create TPEN columns. Then do exactly one self-review pass to tweak line placement — catch missed lines, merge over-splits, split over-merges, tighten loose bounds. One pass only, then move on. 3. For every line, measure a bounding box in image-pixel space and convert to integer canvas coordinates using: - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index 604d6d1..214370b 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -27,7 +27,7 @@ Use only tools already available in your environment. Do not install packages, l ## Steps 1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, fetch a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. If you have precise pixel tooling and want tighter bounds on a region, request it server-side as `{base}/x,y,w,h/max/0/default.jpg` and add the crop's `x,y` origin back before applying the canvas conversion below. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. -2. Detect main text column regions in reading order first, then detect the lines inside each column (reading order preserved within each column). If the page visibly has a single text block, create one column containing every detected line — do not subdivide. Track each line's column index (an integer, 0-based) as you detect it. Then flatten into a single global reading-order sequence across columns (column-major: every line in the first column, then the second, etc., adjusted for script tradition). +2. Detect main text column regions in reading order first, then detect the lines inside each column (reading order preserved within each column). If the page visibly has a single text block, create one column containing every detected line — do not subdivide. Track each line's column index (an integer, 0-based) as you detect it. Then flatten into a single global reading-order sequence across columns (column-major: every line in the first column, then the second, etc., adjusted for script tradition). Then do exactly one self-review pass to tweak line placement — catch missed lines, merge over-splits, split over-merges, tighten loose bounds. One pass only, then move on. 3. For every line, measure a bounding box in image-pixel space and convert to integer canvas coordinates using: - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` diff --git a/templates/detect-lines/PROMPT.md b/templates/detect-lines/PROMPT.md index 5f28ac1..c4a18a6 100644 --- a/templates/detect-lines/PROMPT.md +++ b/templates/detect-lines/PROMPT.md @@ -23,7 +23,7 @@ Use only tools already available in your environment. Do not install packages, l ## Steps 1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, fetch a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. If you have precise pixel tooling and want tighter bounds on a region, request it server-side as `{base}/x,y,w,h/max/0/default.jpg` and add the crop's `x,y` origin back before applying the canvas conversion below. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. -2. Detect text lines across the whole page in reading order. This task does not create TPEN columns. +2. Detect text lines across the whole page in reading order. This task does not create TPEN columns. Then do exactly one self-review pass to tweak line placement — catch missed lines, merge over-splits, split over-merges, tighten loose bounds. One pass only, then move on. 3. For every line, measure a bounding box in image-pixel space and convert to integer canvas coordinates using: - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` diff --git a/templates/detect-order-and-transcribe/PROMPT.md b/templates/detect-order-and-transcribe/PROMPT.md index 88be4f9..d3ecbd3 100644 --- a/templates/detect-order-and-transcribe/PROMPT.md +++ b/templates/detect-order-and-transcribe/PROMPT.md @@ -27,7 +27,7 @@ Use only tools already available in your environment. Do not install packages, l ## Steps 1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, fetch a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. If you have precise pixel tooling and want tighter bounds or a clearer crop for transcription, request a region server-side as `{base}/x,y,w,h/max/0/default.jpg` and add the crop's `x,y` origin back before applying the canvas conversion below. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. -2. Detect main text column regions in reading order first, then detect the lines inside each column (reading order preserved within each column). If the page visibly has a single text block, create one column containing every detected line — do not subdivide. Track each line's column index (an integer, 0-based) as you detect it. Then flatten into a single global reading-order sequence across columns (column-major: every line in the first column, then the second, etc., adjusted for script tradition). +2. Detect main text column regions in reading order first, then detect the lines inside each column (reading order preserved within each column). If the page visibly has a single text block, create one column containing every detected line — do not subdivide. Track each line's column index (an integer, 0-based) as you detect it. Then flatten into a single global reading-order sequence across columns (column-major: every line in the first column, then the second, etc., adjusted for script tradition). Then do exactly one self-review pass to tweak line placement — catch missed lines, merge over-splits, split over-merges, tighten loose bounds. One pass only, then move on. 3. For every line, measure a bounding box in image-pixel space and convert to integer canvas coordinates using: - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` From 10976cc86bdd79a4bdd2792d43752b55ab745120 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Mon, 27 Apr 2026 20:48:03 -0500 Subject: [PATCH 40/47] Relax the origin gate in the child. --- config.js | 10 +++++----- message-handler.js | 25 ++++++++++--------------- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/config.js b/config.js index ff9da4b..3aaebea 100644 --- a/config.js +++ b/config.js @@ -22,25 +22,25 @@ const ACTIVE_ENV = ( ?? 'prod' ) -/** @type {Record} */ +/** @type {Record} */ const ENVIRONMENTS = { local: { servicesURL: 'http://localhost:3012', - TPEN3URL: 'http://localhost:4000' + TPENINTERFACESURL: 'http://localhost:4000' }, dev: { servicesURL: 'https://dev.api.t-pen.org', - TPEN3URL: 'http://localhost:4000' + TPENINTERFACESURL: 'http://localhost:4000' }, prod: { servicesURL: 'https://api.t-pen.org', - TPEN3URL: 'https://app.t-pen.org' + TPENINTERFACESURL: 'https://app.t-pen.org' } } /** * Active config for this page load, flattened for convenient destructuring. - * @type {{ env: string, servicesURL: string, TPEN3URL: string }} + * @type {{ env: string, servicesURL: string, TPENINTERFACESURL: string }} */ export const CONFIG = { env: ACTIVE_ENV, diff --git a/message-handler.js b/message-handler.js index f81ecf1..b29aa45 100644 --- a/message-handler.js +++ b/message-handler.js @@ -7,19 +7,15 @@ * clicking the consent button sends `REQUEST_TPEN_ID_TOKEN` upstream, and * the parent replies with `TPEN_ID_TOKEN`. * + * Replies are aimed at `parentOrigin`, captured from the first inbound + * message; before any inbound arrives, `CONFIG.TPENINTERFACESURL` is used + * as the default target. + * * @author thehabes */ import { CONFIG } from './config.js' -// Accept messages only from known TPEN3 origins and the current origin (for -// same-origin dev harnesses). Anything else could inject auth tokens or drive -// the tool's state, so drop silently. -const ALLOWED_ORIGINS = new Set([ - CONFIG.TPEN3URL, - location.origin -]) - /** * Listens on `window` for postMessage traffic from the TPEN3 parent and routes * recognized message types to the `PromptsApp`. @@ -30,17 +26,16 @@ export class MessageHandler { */ constructor(app) { this.app = app - /** Origin of the first trusted inbound message; used as targetOrigin for replies. */ + /** Origin of the first inbound message; used as targetOrigin for replies. */ this.parentOrigin = null window.addEventListener('message', (event) => this.handle(event)) } /** - * Route an incoming postMessage. Origin-gated; unknown types are ignored. + * Route an incoming postMessage. Unknown types are ignored. * @param {MessageEvent} event */ handle(event) { - if (!ALLOWED_ORIGINS.has(event.origin)) return this.parentOrigin ??= event.origin const data = event.data if (!data?.type) return @@ -61,15 +56,15 @@ export class MessageHandler { /** * Post a message to the parent frame. Replies target the origin of the - * first trusted inbound message; before any inbound arrives we fall back - * to `CONFIG.TPEN3URL` (the expected production parent). No-op when the - * parent is the page itself. + * first inbound message; before any inbound arrives we fall back to + * `CONFIG.TPENINTERFACESURL` (the expected production parent). No-op + * when the parent is the page itself. * @param {object} message * @returns {boolean} true when a parent frame exists and the post was dispatched. */ #postToParent(message) { if (window.parent === window) return false - const targetOrigin = this.parentOrigin ?? CONFIG.TPEN3URL + const targetOrigin = this.parentOrigin ?? CONFIG.TPENINTERFACESURL window.parent.postMessage(message, targetOrigin) return true } From e19e4fa1b7fa96236b33fdbf66ae50dbc48ce61b Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Mon, 27 Apr 2026 21:17:20 -0500 Subject: [PATCH 41/47] changes from review --- templates/detect-and-transcribe/PROMPT.md | 7 +++---- templates/detect-columns-and-lines/PROMPT.md | 7 +++---- templates/detect-columns/PROMPT.md | 8 +------- templates/detect-lines/PROMPT.md | 7 +++---- templates/detect-order-and-transcribe/PROMPT.md | 7 +++---- templates/transcribe-known-lines/PROMPT.md | 5 ++--- ui-manager.js | 2 +- 7 files changed, 16 insertions(+), 27 deletions(-) diff --git a/templates/detect-and-transcribe/PROMPT.md b/templates/detect-and-transcribe/PROMPT.md index ee15ebe..e1a357d 100644 --- a/templates/detect-and-transcribe/PROMPT.md +++ b/templates/detect-and-transcribe/PROMPT.md @@ -31,8 +31,8 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp `x,y,w,h` so that `0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`. 4. Run handwriting text recognition on each line's crop. Apply the recognition rules below. -5. If HTTP PUT is available, build the full payload under **TPEN API** and send the request once. On any non-2xx response, do not retry — fall back. -6. If HTTP PUT is unavailable (or step 5 fell back), emit the condensed payload under **Fallback** as the final code block. +5. If HTTP PUT is available, build the full payload under **TPEN API** and send the request once. On any non-2xx response, stop and report the status and error body — do not emit a fallback payload; the same token and content would be re-submitted through it. +6. If HTTP PUT is unavailable from the start, emit the condensed payload under **Fallback** as the final code block — do not also attempt PUT. 7. Report counts (lines saved/in payload, non-empty text, uncertain) and which path was used (direct PUT or fallback). 8. Report notable ambiguities (e.g., illegible lines transcribed as empty or flagged). @@ -86,7 +86,7 @@ Content-Type: application/json ## Fallback -When the direct PUT is impossible or returns non-2xx, emit the condensed payload below as the final code block of your report. The TPEN splitscreen tool expands each item into a full W3C Annotation before PUTting it — do not inline the canvas source, selector boilerplate, or motivation. It must be valid JSON (no comments, no placeholders — substitute the real coordinates and recognized text). +When HTTP PUT is unavailable from the start, emit the condensed payload below as the final code block of your report. The TPEN splitscreen tool expands each item into a full W3C Annotation before PUTting it — do not inline the canvas source, selector boilerplate, or motivation. It must be valid JSON (no comments, no placeholders — substitute the real coordinates and recognized text). ``` { @@ -111,6 +111,5 @@ Fallback path, report: - path: `fallback` - counts: lines in payload, lines with non-empty text, lines flagged uncertain -- HTTP status and error body if a PUT was attempted first - notable ambiguities worth a human review - final code block: the condensed `{ "items": [...] }` JSON for the user to paste diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index 214370b..e46c2fc 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -34,8 +34,8 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp `x,y,w,h` so that `0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`. -4. If HTTP PUT and POST are available, build the full payload under **TPEN API** and PUT the items once in the global reading-order sequence from step 2. If the PUT returns non-2xx, stop and fall back — lines are not persisted yet. If the PUT succeeds, for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The PUT response's `items` array is guaranteed to be in the same order as the submitted items, so use each line's column index from step 2 to slice the returned ids. Labels must be unique and must not clash with anything in "Existing columns on this page". If a column POST returns non-2xx, stop and report the partial state — do not emit a fallback payload; lines are already saved. -5. If the PUT in step 4 failed, emit the condensed payload under **Fallback** as the final code block. Column creation is out of scope for the fallback path. +4. If HTTP PUT and POST are available, build the full payload under **TPEN API** and PUT the items once in the global reading-order sequence from step 2. If the PUT returns non-2xx, stop and report the status and error body — do not emit a fallback payload; the same token and content would be re-submitted through it. If the PUT succeeds, for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The PUT response's `items` array is guaranteed to be in the same order as the submitted items, so use each line's column index from step 2 to slice the returned ids. Labels must be unique and must not clash with anything in "Existing columns on this page". If a column POST returns non-2xx, stop and report the partial state — do not emit a fallback payload; lines are already saved. +5. If HTTP PUT or POST is unavailable from the start, emit the condensed payload under **Fallback** as the final code block — do not also attempt PUT. Column creation is out of scope for the fallback path. 6. Report counts (lines saved/in payload, columns created/in payload) and which path was used (direct or fallback). ## Rules @@ -94,7 +94,7 @@ Content-Type: application/json ## Fallback -When the direct path is unavailable or returns non-2xx, emit the condensed payload below as the final code block of your report, in the global reading-order sequence from step 2. The TPEN splitscreen tool expands each item into a full W3C Annotation before PUTting it — do not inline the canvas source, selector boilerplate, or motivation. It must be valid JSON. Column creation is out of scope for this fallback. +When HTTP PUT or POST is unavailable from the start, emit the condensed payload below as the final code block of your report, in the global reading-order sequence from step 2. The TPEN splitscreen tool expands each item into a full W3C Annotation before PUTting it — do not inline the canvas source, selector boilerplate, or motivation. It must be valid JSON. Column creation is out of scope for this fallback. ``` { @@ -119,5 +119,4 @@ Fallback path, report: - path: `fallback` - counts: lines in payload -- HTTP status and error body if a PUT was attempted first - final code block: the condensed `{ "items": [...] }` JSON for the user to paste diff --git a/templates/detect-columns/PROMPT.md b/templates/detect-columns/PROMPT.md index 92f427d..c242177 100644 --- a/templates/detect-columns/PROMPT.md +++ b/templates/detect-columns/PROMPT.md @@ -11,13 +11,7 @@ You are assisting with TPEN manuscript transcription. This task rebuilds the col ## Existing lines -Each entry is ` | xywh= | ` in canvas coordinates, printed in the page's current order. Use the full annotation URI verbatim when assigning lines to columns and when echoing lines in the page PUT. Compare the current order against the reading-order sequence you compute in step 5 to decide whether the PUT in step 8 is necessary. - -The body form is one of: - -- `body=[]` — echo as `[]`. -- `text=""` — `` is a JSON string literal (quotes and escapes already encoded). Echo as `[{ "type": "TextualBody", "value": , "format": "text/plain" }]` — paste it straight into the `value` slot, do not re-quote or re-escape. -- `body=` — echo the JSON verbatim. +Each entry is ` | xywh= | ` in canvas coordinates, in the page's current order. Use the URI verbatim when assigning lines to columns and when echoing lines in the page PUT. Echo each body form verbatim: `body=[]` → `[]`; `text=""` → `[{ "type": "TextualBody", "value": , "format": "text/plain" }]` (paste `` as-is — already a JSON string literal, do not re-quote or re-escape); `body=` → the JSON verbatim. Compare the current order against the reading-order sequence from step 5 to decide whether the PUT in step 8 is necessary. {{existingLines}} diff --git a/templates/detect-lines/PROMPT.md b/templates/detect-lines/PROMPT.md index c4a18a6..312c727 100644 --- a/templates/detect-lines/PROMPT.md +++ b/templates/detect-lines/PROMPT.md @@ -30,8 +30,8 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp `x,y,w,h` so that `0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`. -4. If HTTP PUT is available, build the full payload under **TPEN API** and send the request once. On any non-2xx response, do not retry — fall back. -5. If HTTP PUT is unavailable (or step 4 fell back), emit the condensed payload under **Fallback** as the final code block. +4. If HTTP PUT is available, build the full payload under **TPEN API** and send the request once. On any non-2xx response, stop and report the status and error body — do not emit a fallback payload; the same token and content would be re-submitted through it. +5. If HTTP PUT is unavailable from the start, emit the condensed payload under **Fallback** as the final code block — do not also attempt PUT. 6. Report count and which path was used (direct PUT or fallback). ## Rules @@ -73,7 +73,7 @@ Content-Type: application/json ## Fallback -When the direct PUT is impossible or returns non-2xx, emit the condensed payload below as the final code block of your report. The TPEN splitscreen tool expands each item into a full W3C Annotation before PUTting it — do not inline the canvas source, selector boilerplate, or motivation. It must be valid JSON (no comments, no placeholders — substitute the real coordinates). +When HTTP PUT is unavailable from the start, emit the condensed payload below as the final code block of your report. The TPEN splitscreen tool expands each item into a full W3C Annotation before PUTting it — do not inline the canvas source, selector boilerplate, or motivation. It must be valid JSON (no comments, no placeholders — substitute the real coordinates). ``` { @@ -97,5 +97,4 @@ Fallback path, report: - path: `fallback` - count: number of line annotations in the payload -- HTTP status and error body if a PUT was attempted first - final code block: the condensed `{ "items": [...] }` JSON for the user to paste diff --git a/templates/detect-order-and-transcribe/PROMPT.md b/templates/detect-order-and-transcribe/PROMPT.md index d3ecbd3..584dd86 100644 --- a/templates/detect-order-and-transcribe/PROMPT.md +++ b/templates/detect-order-and-transcribe/PROMPT.md @@ -35,8 +35,8 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp `x,y,w,h` so that `0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`. 4. Run handwriting text recognition on each line's crop. Apply the recognition rules below. -5. If HTTP PUT and POST are available, build the full payload under **TPEN API** and PUT the items once in the global reading-order sequence from step 2. If the PUT returns non-2xx, stop and fall back — lines are not persisted yet. If the PUT succeeds, for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The PUT response's `items` array is guaranteed to be in the same order as the submitted items, so use each line's column index from step 2 to slice the returned ids. Labels must be unique and must not clash with anything in "Existing columns on this page". If a column POST returns non-2xx, stop and report the partial state — do not emit a fallback payload; lines are already saved. -6. If the PUT in step 5 failed, emit the condensed payload under **Fallback** as the final code block. Column creation is out of scope for the fallback path. +5. If HTTP PUT and POST are available, build the full payload under **TPEN API** and PUT the items once in the global reading-order sequence from step 2. If the PUT returns non-2xx, stop and report the status and error body — do not emit a fallback payload; the same token and content would be re-submitted through it. If the PUT succeeds, for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The PUT response's `items` array is guaranteed to be in the same order as the submitted items, so use each line's column index from step 2 to slice the returned ids. Labels must be unique and must not clash with anything in "Existing columns on this page". If a column POST returns non-2xx, stop and report the partial state — do not emit a fallback payload; lines are already saved. +6. If HTTP PUT or POST is unavailable from the start, emit the condensed payload under **Fallback** as the final code block — do not also attempt PUT. Column creation is out of scope for the fallback path. 7. Report counts (lines saved/in payload, non-empty text, uncertain, columns created/in payload) and which path was used (direct or fallback). 8. Report notable ambiguities (e.g., illegible lines transcribed as empty or flagged). @@ -107,7 +107,7 @@ Content-Type: application/json ## Fallback -When the direct path is unavailable or returns non-2xx, emit the condensed payload below as the final code block of your report, in the global reading-order sequence from step 2. The TPEN splitscreen tool expands each item into a full W3C Annotation before PUTting it — do not inline the canvas source, selector boilerplate, or motivation. It must be valid JSON (no comments, no placeholders — substitute the real coordinates and recognized text). Column creation is out of scope for this fallback. +When HTTP PUT or POST is unavailable from the start, emit the condensed payload below as the final code block of your report, in the global reading-order sequence from step 2. The TPEN splitscreen tool expands each item into a full W3C Annotation before PUTting it — do not inline the canvas source, selector boilerplate, or motivation. It must be valid JSON (no comments, no placeholders — substitute the real coordinates and recognized text). Column creation is out of scope for this fallback. ``` { @@ -133,6 +133,5 @@ Fallback path, report: - path: `fallback` - counts: lines in payload, lines with non-empty text, lines flagged uncertain -- HTTP status and error body if a PUT was attempted first - notable ambiguities worth a human review - final code block: the condensed `{ "items": [...] }` JSON for the user to paste diff --git a/templates/transcribe-known-lines/PROMPT.md b/templates/transcribe-known-lines/PROMPT.md index 34658fc..ef30d2d 100644 --- a/templates/transcribe-known-lines/PROMPT.md +++ b/templates/transcribe-known-lines/PROMPT.md @@ -36,7 +36,7 @@ Use only tools already available in your environment. Do not install packages, l Crop each line region and verify it visibly contains a single line of inked text. 2. Run handwriting text recognition over each crop. Apply the recognition rules below. 3. If HTTP PATCH is available, PATCH the text to each line's line-text endpoint — one PATCH per line in the "Existing lines" list. On any non-2xx, record the status and continue with the remaining lines. If every PATCH returned non-2xx, stop and report the per-line statuses — do not emit a fallback payload; the same token and content would be re-submitted through it. -4. If HTTP PATCH is unavailable from the start, emit the condensed payload under **Fallback** as the final code block. +4. If HTTP PATCH is unavailable from the start, emit the condensed payload under **Fallback** as the final code block — do not also attempt PATCH. 5. Report counts (lines updated, lines flagged illegible, lines failed) and which path was used. ## Rules @@ -61,7 +61,7 @@ Content-Type: text/plain ## Fallback -The fallback tool only accepts JSON, so it uses a single page-level PUT instead of per-line PATCH. When PATCH is unavailable or every attempt returned non-2xx, emit the condensed payload below as the final code block of your report. The TPEN splitscreen tool re-uses each line's existing target from the hydrated page context before PUTting it — the item's `id` must match an entry in "Existing lines" above. +The fallback tool only accepts JSON, so it uses a single page-level PUT instead of per-line PATCH. When PATCH is unavailable from the start, emit the condensed payload below as the final code block of your report. The TPEN splitscreen tool re-uses each line's existing target from the hydrated page context before PUTting it — the item's `id` must match an entry in "Existing lines" above. ``` { @@ -85,5 +85,4 @@ Fallback path, report: - path: `fallback` - counts: lines in payload, lines flagged illegible -- HTTP status and error body if a PATCH was attempted first - final code block: the condensed `{ "items": [...] }` JSON for the user to paste diff --git a/ui-manager.js b/ui-manager.js index 4b9aeab..dc9c620 100644 --- a/ui-manager.js +++ b/ui-manager.js @@ -320,7 +320,7 @@ export class UIManager { const warning = el('div', { class: 'warning', attrs: { role: 'note' } }, [ el('strong', { text: 'Security: ' }), - el('span', { text: `The generated prompt carries your TPEN session token so an agentic LLM can manipulate your TPEN data on your behalf. Clicking 'Copy' writes the full token to your clipboard. Only paste it into LLM environments you trust.` }) + el('span', { text: `The generated prompt carries your TPEN session token so an agentic LLM can manipulate your TPEN data on your behalf. Clicking 'Copy' writes the full token to your clipboard. Only paste it into LLM environments you trust. If the token leaks, log out of TPEN to invalidate it.` }) ]) this.#authButton = null From 8ebe2f8832e8e5a949d7e721bf482795d487d68f Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Mon, 27 Apr 2026 21:38:10 -0500 Subject: [PATCH 42/47] Do not need to worry about existing columns here anymore --- templates/detect-columns-and-lines/PROMPT.md | 8 ++---- templates/detect-columns-and-lines/index.js | 7 ++--- .../detect-order-and-transcribe/PROMPT.md | 8 ++---- .../detect-order-and-transcribe/index.js | 7 ++--- templates/inject-context.js | 27 +++---------------- 5 files changed, 11 insertions(+), 46 deletions(-) diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index e46c2fc..f95b67c 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -9,10 +9,6 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en - Image: {{imageUrl}} - Page endpoint: {{pageEndpoint}} -## Existing columns on this page - -{{existingColumns}} - ## Preconditions All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. This template only creates new lines: `lineCount` = `{{lineCount}}`. If `lineCount` is not `0`, stop immediately and report — existing line data must not be modified. @@ -34,14 +30,14 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp `x,y,w,h` so that `0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`. -4. If HTTP PUT and POST are available, build the full payload under **TPEN API** and PUT the items once in the global reading-order sequence from step 2. If the PUT returns non-2xx, stop and report the status and error body — do not emit a fallback payload; the same token and content would be re-submitted through it. If the PUT succeeds, for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The PUT response's `items` array is guaranteed to be in the same order as the submitted items, so use each line's column index from step 2 to slice the returned ids. Labels must be unique and must not clash with anything in "Existing columns on this page". If a column POST returns non-2xx, stop and report the partial state — do not emit a fallback payload; lines are already saved. +4. If HTTP PUT and POST are available, build the full payload under **TPEN API** and PUT the items once in the global reading-order sequence from step 2. If the PUT returns non-2xx, stop and report the status and error body — do not emit a fallback payload; the same token and content would be re-submitted through it. If the PUT succeeds, for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The PUT response's `items` array is guaranteed to be in the same order as the submitted items, so use each line's column index from step 2 to slice the returned ids. Labels must be unique within this run. If a column POST returns non-2xx, stop and report the partial state — do not emit a fallback payload; lines are already saved. 5. If HTTP PUT or POST is unavailable from the start, emit the condensed payload under **Fallback** as the final code block — do not also attempt PUT. Column creation is out of scope for the fallback path. 6. Report counts (lines saved/in payload, columns created/in payload) and which path was used (direct or fallback). ## Rules - Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value. -- Column labels are page-scoped and must be unique. Do not duplicate an existing column label. +- Column labels must be unique within this run. - Each line annotation belongs to at most one column. - Preserve reading order across columns and within each column. - Line geometry is the primary accuracy target. Column grouping is secondary — for a single-column page, one column containing every line is correct. diff --git a/templates/detect-columns-and-lines/index.js b/templates/detect-columns-and-lines/index.js index bf2c1c5..7693d6b 100644 --- a/templates/detect-columns-and-lines/index.js +++ b/templates/detect-columns-and-lines/index.js @@ -7,15 +7,12 @@ * @author thehabes */ -import { buildTemplateContext, formatExistingColumns } from '../inject-context.js' +import { buildTemplateContext } from '../inject-context.js' /** @type {import('../../prompt-generator.js').PromptTemplate} */ export const detectColumnsAndLinesTemplate = { id: 'detect-columns-and-lines', label: 'Line Detection + Column Grouping', templateUrl: new URL('./PROMPT.md', import.meta.url), - buildContext: (ctx) => ({ - ...buildTemplateContext(ctx), - existingColumns: formatExistingColumns(ctx.project, ctx.page) - }) + buildContext: buildTemplateContext } diff --git a/templates/detect-order-and-transcribe/PROMPT.md b/templates/detect-order-and-transcribe/PROMPT.md index 584dd86..d3e43e9 100644 --- a/templates/detect-order-and-transcribe/PROMPT.md +++ b/templates/detect-order-and-transcribe/PROMPT.md @@ -9,10 +9,6 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en - Image: {{imageUrl}} - Page endpoint: {{pageEndpoint}} -## Existing columns on this page - -{{existingColumns}} - ## Preconditions All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. This template only creates new lines: `lineCount` = `{{lineCount}}`. If `lineCount` is not `0`, stop immediately and report — existing line data must not be modified. @@ -35,7 +31,7 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp `x,y,w,h` so that `0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`. 4. Run handwriting text recognition on each line's crop. Apply the recognition rules below. -5. If HTTP PUT and POST are available, build the full payload under **TPEN API** and PUT the items once in the global reading-order sequence from step 2. If the PUT returns non-2xx, stop and report the status and error body — do not emit a fallback payload; the same token and content would be re-submitted through it. If the PUT succeeds, for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The PUT response's `items` array is guaranteed to be in the same order as the submitted items, so use each line's column index from step 2 to slice the returned ids. Labels must be unique and must not clash with anything in "Existing columns on this page". If a column POST returns non-2xx, stop and report the partial state — do not emit a fallback payload; lines are already saved. +5. If HTTP PUT and POST are available, build the full payload under **TPEN API** and PUT the items once in the global reading-order sequence from step 2. If the PUT returns non-2xx, stop and report the status and error body — do not emit a fallback payload; the same token and content would be re-submitted through it. If the PUT succeeds, for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The PUT response's `items` array is guaranteed to be in the same order as the submitted items, so use each line's column index from step 2 to slice the returned ids. Labels must be unique within this run. If a column POST returns non-2xx, stop and report the partial state — do not emit a fallback payload; lines are already saved. 6. If HTTP PUT or POST is unavailable from the start, emit the condensed payload under **Fallback** as the final code block — do not also attempt PUT. Column creation is out of scope for the fallback path. 7. Report counts (lines saved/in payload, non-empty text, uncertain, columns created/in payload) and which path was used (direct or fallback). 8. Report notable ambiguities (e.g., illegible lines transcribed as empty or flagged). @@ -45,7 +41,7 @@ Use only tools already available in your environment. Do not install packages, l ### Detection (IMAGE_ANALYSIS) - Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value. -- Column labels are page-scoped and must be unique. Do not duplicate an existing column label. +- Column labels must be unique within this run. - Each line annotation belongs to at most one column. - Preserve reading order across columns and within each column. - Line geometry is the primary accuracy target. Column grouping is secondary — for a single-column page, one column containing every line is correct. diff --git a/templates/detect-order-and-transcribe/index.js b/templates/detect-order-and-transcribe/index.js index 8bff6ce..d59fe2b 100644 --- a/templates/detect-order-and-transcribe/index.js +++ b/templates/detect-order-and-transcribe/index.js @@ -8,15 +8,12 @@ * @author thehabes */ -import { buildTemplateContext, formatExistingColumns } from '../inject-context.js' +import { buildTemplateContext } from '../inject-context.js' /** @type {import('../../prompt-generator.js').PromptTemplate} */ export const detectOrderAndTranscribeTemplate = { id: 'detect-order-and-transcribe', label: 'Line Detection + Column Grouping + Transcription', templateUrl: new URL('./PROMPT.md', import.meta.url), - buildContext: (ctx) => ({ - ...buildTemplateContext(ctx), - existingColumns: formatExistingColumns(ctx.project, ctx.page) - }) + buildContext: buildTemplateContext } diff --git a/templates/inject-context.js b/templates/inject-context.js index 3934669..afeca68 100644 --- a/templates/inject-context.js +++ b/templates/inject-context.js @@ -4,13 +4,13 @@ * Every template consumes a superset of flat `{{name}}` variables produced by * `buildTemplateContext`. Individual templates only reference the subset they * need in their PROMPT.md body — unused keys simply don't render. Templates - * that need richer context (e.g. existing column listings) spread this result - * and layer their own keys on top. + * that need richer context (e.g. an `existingLines` listing) spread this + * result and layer their own keys on top. * * @author thehabes */ -import { getIRI, parseXywh, trailingId } from '../iiif-ids.js' +import { getIRI, parseXywh } from '../iiif-ids.js' /** * Pull the first image body URL off a IIIF canvas, or null if none is present. @@ -125,24 +125,3 @@ export function formatExistingLines(fetchedPage) { return `- ${lineUri} | ${xywh} | ${formatBody(item?.body)}` }).join('\n') } - -/** - * Render the current column state for a given page as a markdown bullet list. - * Used by templates that must avoid duplicate column labels. Columns live on - * `project.layers[].pages[]`; the `/resolved` page endpoint does not emit - * them, so the project graph is the only source. - * @param {any} project the TPEN project object. - * @param {any} page the page object returned by `fetchPageResolved`. - * @returns {string} - */ -export function formatExistingColumns(project, page) { - const tail = trailingId(page) - const projectPage = (project?.layers ?? []) - .flatMap(l => l.pages ?? []) - .find(pg => trailingId(pg) === tail) - const cols = projectPage?.columns ?? [] - if (cols.length === 0) { - return '- (No existing columns on this page)' - } - return cols.map(c => `- ${c.label ?? '(unlabeled)'}`).join('\n') -} From 75f3d20f790bc00d7737e3809d41b5b81925cddf Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Tue, 28 Apr 2026 09:26:07 -0500 Subject: [PATCH 43/47] Small touches from manual readthrough of .md and produced prompt. --- templates/detect-and-transcribe/PROMPT.md | 8 ++++---- templates/detect-columns-and-lines/PROMPT.md | 6 +++--- templates/detect-columns/PROMPT.md | 8 ++++---- templates/detect-lines/PROMPT.md | 4 ++-- templates/detect-order-and-transcribe/PROMPT.md | 10 +++++----- templates/detect-order-and-transcribe/index.js | 2 +- templates/transcribe-known-lines/PROMPT.md | 6 +++--- 7 files changed, 22 insertions(+), 22 deletions(-) diff --git a/templates/detect-and-transcribe/PROMPT.md b/templates/detect-and-transcribe/PROMPT.md index e1a357d..84f5311 100644 --- a/templates/detect-and-transcribe/PROMPT.md +++ b/templates/detect-and-transcribe/PROMPT.md @@ -30,7 +30,7 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp `x,y,w,h` so that `0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`. -4. Run handwriting text recognition on each line's crop. Apply the recognition rules below. +4. Run text recognition (print or handwriting) on each line's crop. Apply the recognition rules below. 5. If HTTP PUT is available, build the full payload under **TPEN API** and send the request once. On any non-2xx response, stop and report the status and error body — do not emit a fallback payload; the same token and content would be re-submitted through it. 6. If HTTP PUT is unavailable from the start, emit the condensed payload under **Fallback** as the final code block — do not also attempt PUT. 7. Report counts (lines saved/in payload, non-empty text, uncertain) and which path was used (direct PUT or fallback). @@ -40,14 +40,14 @@ Use only tools already available in your environment. Do not install packages, l ### Detection (IMAGE_ANALYSIS) -- Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value. +- Bounds MUST be saved as integer coordinates in canvas space. No percents. No `percent:` or `pixel:` prefix on the selector value. - Preserve reading order across the whole page. - Prefer tight bounds when you can measure them; best-effort bounds are acceptable. When uncertain whether a tall run is one line or several, prefer splitting over merging. - Do not include decorative borders, frame rules, ornaments, or illustrations as part of a line. -- Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected downstream. +- Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected by humans downstream. - Zero lines detected is an unprocessable outcome. Stop and report — do not PUT, do not emit a fallback payload. An empty `items` array would erase every existing annotation on the page. -### Recognition (HANDWRITING_TEXT_RECOGNITION) +### Recognition (TEXT_RECOGNITION) - Prioritize diplomatic transcription over normalization. - Preserve orthography and punctuation as observed. diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index f95b67c..64a41cd 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -31,12 +31,12 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp `x,y,w,h` so that `0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`. 4. If HTTP PUT and POST are available, build the full payload under **TPEN API** and PUT the items once in the global reading-order sequence from step 2. If the PUT returns non-2xx, stop and report the status and error body — do not emit a fallback payload; the same token and content would be re-submitted through it. If the PUT succeeds, for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The PUT response's `items` array is guaranteed to be in the same order as the submitted items, so use each line's column index from step 2 to slice the returned ids. Labels must be unique within this run. If a column POST returns non-2xx, stop and report the partial state — do not emit a fallback payload; lines are already saved. -5. If HTTP PUT or POST is unavailable from the start, emit the condensed payload under **Fallback** as the final code block — do not also attempt PUT. Column creation is out of scope for the fallback path. +5. If HTTP PUT or POST is unavailable from the start, emit the condensed payload under **Fallback** as the final code block — do not also attempt PUT. 6. Report counts (lines saved/in payload, columns created/in payload) and which path was used (direct or fallback). ## Rules -- Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value. +- Bounds MUST be saved as integer coordinates in canvas space. No percents. No `percent:` or `pixel:` prefix on the selector value. - Column labels must be unique within this run. - Each line annotation belongs to at most one column. - Preserve reading order across columns and within each column. @@ -44,7 +44,7 @@ Use only tools already available in your environment. Do not install packages, l - Prefer tight bounds when you can measure them; best-effort bounds are acceptable. When uncertain whether a tall run is one line or several, prefer splitting over merging. - Do not include decorative borders, frame rules, ornaments, or illustrations as part of a line. - Do not POST a column with an empty `annotations` array — the server rejects it. Skip any detected column that ends up with zero assigned lines. -- Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected downstream. +- Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected by humans downstream. - Zero lines detected is an unprocessable outcome. Stop and report — do not PUT, do not POST a column, do not emit a fallback payload. An empty `items` array would erase every existing annotation on the page. ## TPEN API diff --git a/templates/detect-columns/PROMPT.md b/templates/detect-columns/PROMPT.md index c242177..7529e5f 100644 --- a/templates/detect-columns/PROMPT.md +++ b/templates/detect-columns/PROMPT.md @@ -39,8 +39,8 @@ Use only tools already available in your environment. Do not install packages, l 4. For each detected column, determine which of the existing line ids (from the list above) belong to it. Assign a line to the column whose canvas-space region contains the center point of the line's `xywh`. If a line's center falls outside every detected column, assign it to the nearest column by Euclidean distance from the center point to the column's region (distance `0` when the point is inside). Each line belongs to exactly one column. 5. Build a global reading-order sequence of all existing line ids: columns in reading order; within each column, lines sorted top-to-bottom by the `xywh` y-center. 6. DELETE every existing column on the page (see TPEN API below). On any non-2xx, stop and report. Do not POST or PUT after a DELETE failure. -7. For each detected column, POST `{ label, annotations }` where `annotations` is the contiguous slice of the reading-order id sequence from step 5 that belongs to that column. Choose a unique label per column (e.g., `Column A`, `Column B`) that does not clash with any other label chosen in this run. On any non-2xx, stop and report — columns POSTed before the failure remain persisted. Run step 7 before step 8 so the PUT's column-remap path isn't exercised; lines echoed verbatim in step 8 do not mint a new RERUM version, so column membership stays stable. -8. Compare the step-5 sequence against the "Existing lines" order index-by-index. If they are identical, skip the PUT. Otherwise, PUT the page with `items` in the step-5 order. Each entry re-uses the existing annotation URI verbatim as its `id`, its `body` reconstructed from the entry's body form, and its `target` rebuilt from the entry's `xywh` selector. The server remaps column references when URIs change, but echoing `body` and `target` verbatim avoids minting unnecessary RERUM versions. On any non-2xx, stop and report. +7. For each detected column, POST `{ label, annotations }` where `annotations` is the contiguous slice of the reading-order id sequence from step 5 that belongs to that column. Choose a unique label per column (e.g., `Column A`, `Column B`) that does not clash with any other label chosen in this run. On any non-2xx, stop and report — columns POSTed before the failure remain persisted. Run step 7 before step 8. +8. Compare the step-5 sequence against the "Existing lines" order index-by-index. If they are identical, skip the PUT. Otherwise, PUT the page with `items` in the step-5 order. Each entry re-uses the existing annotation URI verbatim as its `id`, its `body` reconstructed from the entry's body form, and its `target` rebuilt from the entry's `xywh` selector. On any non-2xx, stop and report. 9. Report: columns deleted, columns created, whether the page order was updated, and per-column line counts. ## Rules @@ -50,12 +50,12 @@ Use only tools already available in your environment. Do not install packages, l - Keep column boundaries tight enough that each line clearly belongs to one column, but generous enough to avoid clipping existing line selectors. - Column labels must be unique within this run. The DELETE in step 6 clears every existing column, so no pre-existing label can collide. - Each existing line belongs to exactly one column. -- Do not POST a column with an empty `annotations` array — the server rejects it. If a detected column would end up with zero assigned lines, merge its assignments into the nearest populated column instead. +- Do not POST a column with an empty `annotations` array — the server rejects it. Skip any detected column that ends up with zero assigned lines. - Echo each line's existing `body` and `target` unchanged in the PUT. Changing either mints a new RERUM version of the line; the server remaps columns to the new URIs, but echoing verbatim avoids the needless version. ## TPEN API -First, delete all existing columns on the page. Expect `204 No Content` on success (including when the page had no columns): +First, delete all existing columns on the page: ``` DELETE {{pageEndpoint}}/clear-columns diff --git a/templates/detect-lines/PROMPT.md b/templates/detect-lines/PROMPT.md index 312c727..a7ce916 100644 --- a/templates/detect-lines/PROMPT.md +++ b/templates/detect-lines/PROMPT.md @@ -36,11 +36,11 @@ Use only tools already available in your environment. Do not install packages, l ## Rules -- Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value. +- Bounds MUST be saved as integer coordinates in canvas space. No percents. No `percent:` or `pixel:` prefix on the selector value. - Preserve reading order across the whole page. - Prefer tight bounds when you can measure them; best-effort bounds are acceptable. When uncertain whether a tall run is one line or several, prefer splitting over merging. - Do not include decorative borders, frame rules, ornaments, or illustrations as part of a line. -- Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected downstream. +- Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected by humans downstream. - Zero lines detected is an unprocessable outcome. Stop and report — do not PUT, do not emit a fallback payload. An empty `items` array would erase every existing annotation on the page. ## TPEN API diff --git a/templates/detect-order-and-transcribe/PROMPT.md b/templates/detect-order-and-transcribe/PROMPT.md index d3e43e9..1cb436f 100644 --- a/templates/detect-order-and-transcribe/PROMPT.md +++ b/templates/detect-order-and-transcribe/PROMPT.md @@ -30,9 +30,9 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` Then clamp `x,y,w,h` so that `0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`. -4. Run handwriting text recognition on each line's crop. Apply the recognition rules below. +4. Run text recognition (print or handwriting) on each line's crop. Apply the recognition rules below. 5. If HTTP PUT and POST are available, build the full payload under **TPEN API** and PUT the items once in the global reading-order sequence from step 2. If the PUT returns non-2xx, stop and report the status and error body — do not emit a fallback payload; the same token and content would be re-submitted through it. If the PUT succeeds, for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The PUT response's `items` array is guaranteed to be in the same order as the submitted items, so use each line's column index from step 2 to slice the returned ids. Labels must be unique within this run. If a column POST returns non-2xx, stop and report the partial state — do not emit a fallback payload; lines are already saved. -6. If HTTP PUT or POST is unavailable from the start, emit the condensed payload under **Fallback** as the final code block — do not also attempt PUT. Column creation is out of scope for the fallback path. +6. If HTTP PUT or POST is unavailable from the start, emit the condensed payload under **Fallback** as the final code block — do not also attempt PUT. 7. Report counts (lines saved/in payload, non-empty text, uncertain, columns created/in payload) and which path was used (direct or fallback). 8. Report notable ambiguities (e.g., illegible lines transcribed as empty or flagged). @@ -40,7 +40,7 @@ Use only tools already available in your environment. Do not install packages, l ### Detection (IMAGE_ANALYSIS) -- Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value. +- Bounds MUST be saved as integer coordinates in canvas space. No percents. No `percent:` or `pixel:` prefix on the selector value. - Column labels must be unique within this run. - Each line annotation belongs to at most one column. - Preserve reading order across columns and within each column. @@ -48,10 +48,10 @@ Use only tools already available in your environment. Do not install packages, l - Prefer tight bounds when you can measure them; best-effort bounds are acceptable. When uncertain whether a tall run is one line or several, prefer splitting over merging. - Do not include decorative borders, frame rules, ornaments, or illustrations as part of a line. - Do not POST a column with an empty `annotations` array — the server rejects it. Skip any detected column that ends up with zero assigned lines. -- Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected downstream. +- Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected by humans downstream. - Zero lines detected is an unprocessable outcome. Stop and report — do not PUT, do not POST a column, do not emit a fallback payload. An empty `items` array would erase every existing annotation on the page. -### Recognition (HANDWRITING_TEXT_RECOGNITION) +### Recognition (TEXT_RECOGNITION) - Prioritize diplomatic transcription over normalization. - Preserve orthography and punctuation as observed. diff --git a/templates/detect-order-and-transcribe/index.js b/templates/detect-order-and-transcribe/index.js index d59fe2b..911f7cc 100644 --- a/templates/detect-order-and-transcribe/index.js +++ b/templates/detect-order-and-transcribe/index.js @@ -2,7 +2,7 @@ * @file Template: "Detect columns + lines + transcribe → PUT page, POST columns". * * Combines the multi-block reading-order and column creation from - * detect-columns-and-lines with the handwriting recognition from + * detect-columns-and-lines with the text recognition from * detect-and-transcribe. * * @author thehabes diff --git a/templates/transcribe-known-lines/PROMPT.md b/templates/transcribe-known-lines/PROMPT.md index ef30d2d..6b3d861 100644 --- a/templates/transcribe-known-lines/PROMPT.md +++ b/templates/transcribe-known-lines/PROMPT.md @@ -34,8 +34,8 @@ Use only tools already available in your environment. Do not install packages, l - `pixel_w = round(canvas_w * img_w / {{canvasWidth}})` - `pixel_h = round(canvas_h * img_h / {{canvasHeight}})` Crop each line region and verify it visibly contains a single line of inked text. -2. Run handwriting text recognition over each crop. Apply the recognition rules below. -3. If HTTP PATCH is available, PATCH the text to each line's line-text endpoint — one PATCH per line in the "Existing lines" list. On any non-2xx, record the status and continue with the remaining lines. If every PATCH returned non-2xx, stop and report the per-line statuses — do not emit a fallback payload; the same token and content would be re-submitted through it. +2. Run text recognition (print or handwriting) over each crop. Apply the recognition rules below. +3. If HTTP PATCH is available, PATCH the text to each line's line-text endpoint — one PATCH per line in the "Existing lines" list. `` is the trailing path segment of the annotation URI listed above (the last `/`-separated segment). On any non-2xx, record the status and continue with the remaining lines. If every PATCH returned non-2xx, stop and report the per-line statuses — do not emit a fallback payload; the same token and content would be re-submitted through it. 4. If HTTP PATCH is unavailable from the start, emit the condensed payload under **Fallback** as the final code block — do not also attempt PATCH. 5. Report counts (lines updated, lines flagged illegible, lines failed) and which path was used. @@ -49,7 +49,7 @@ Use only tools already available in your environment. Do not install packages, l ## TPEN API -Update one line's text via PATCH with a plain-text body. `` is the trailing path segment of the annotation URI listed above (the last `/`-separated segment). +Update one line's text via PATCH with a plain-text body. ``` PATCH {{pageEndpoint}}/line//text From b6f19d443cd6001cc9694268c6ea3025c9c5b873 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Tue, 28 Apr 2026 11:14:49 -0500 Subject: [PATCH 44/47] Changes during review. Going to test each one now. --- config.js | 10 +++++----- message-handler.js | 8 ++++---- templates/detect-and-transcribe/PROMPT.md | 2 +- templates/detect-columns-and-lines/PROMPT.md | 4 ++-- templates/detect-lines/PROMPT.md | 2 +- templates/detect-order-and-transcribe/PROMPT.md | 4 ++-- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/config.js b/config.js index 3aaebea..519ed67 100644 --- a/config.js +++ b/config.js @@ -22,25 +22,25 @@ const ACTIVE_ENV = ( ?? 'prod' ) -/** @type {Record} */ +/** @type {Record} */ const ENVIRONMENTS = { local: { servicesURL: 'http://localhost:3012', - TPENINTERFACESURL: 'http://localhost:4000' + interfacesURL: 'http://localhost:4000' }, dev: { servicesURL: 'https://dev.api.t-pen.org', - TPENINTERFACESURL: 'http://localhost:4000' + interfacesURL: 'http://localhost:4000' }, prod: { servicesURL: 'https://api.t-pen.org', - TPENINTERFACESURL: 'https://app.t-pen.org' + interfacesURL: 'https://app.t-pen.org' } } /** * Active config for this page load, flattened for convenient destructuring. - * @type {{ env: string, servicesURL: string, TPENINTERFACESURL: string }} + * @type {{ env: string, servicesURL: string, interfacesURL: string }} */ export const CONFIG = { env: ACTIVE_ENV, diff --git a/message-handler.js b/message-handler.js index b29aa45..dba3891 100644 --- a/message-handler.js +++ b/message-handler.js @@ -8,7 +8,7 @@ * the parent replies with `TPEN_ID_TOKEN`. * * Replies are aimed at `parentOrigin`, captured from the first inbound - * message; before any inbound arrives, `CONFIG.TPENINTERFACESURL` is used + * message; before any inbound arrives, `CONFIG.interfacesURL` is used * as the default target. * * @author thehabes @@ -57,14 +57,14 @@ export class MessageHandler { /** * Post a message to the parent frame. Replies target the origin of the * first inbound message; before any inbound arrives we fall back to - * `CONFIG.TPENINTERFACESURL` (the expected production parent). No-op - * when the parent is the page itself. + * `CONFIG.interfacesURL` (the expected production parent). No-op when + * the parent is the page itself. * @param {object} message * @returns {boolean} true when a parent frame exists and the post was dispatched. */ #postToParent(message) { if (window.parent === window) return false - const targetOrigin = this.parentOrigin ?? CONFIG.TPENINTERFACESURL + const targetOrigin = this.parentOrigin ?? CONFIG.interfacesURL window.parent.postMessage(message, targetOrigin) return true } diff --git a/templates/detect-and-transcribe/PROMPT.md b/templates/detect-and-transcribe/PROMPT.md index 84f5311..642520b 100644 --- a/templates/detect-and-transcribe/PROMPT.md +++ b/templates/detect-and-transcribe/PROMPT.md @@ -40,7 +40,7 @@ Use only tools already available in your environment. Do not install packages, l ### Detection (IMAGE_ANALYSIS) -- Bounds MUST be saved as integer coordinates in canvas space. No percents. No `percent:` or `pixel:` prefix on the selector value. +- Bounds MUST be saved as integer coordinates in canvas space. No percentage-based selectors. No `percent:` or `pixel:` prefix on the selector value. - Preserve reading order across the whole page. - Prefer tight bounds when you can measure them; best-effort bounds are acceptable. When uncertain whether a tall run is one line or several, prefer splitting over merging. - Do not include decorative borders, frame rules, ornaments, or illustrations as part of a line. diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index 64a41cd..fbb7d22 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -36,8 +36,8 @@ Use only tools already available in your environment. Do not install packages, l ## Rules -- Bounds MUST be saved as integer coordinates in canvas space. No percents. No `percent:` or `pixel:` prefix on the selector value. -- Column labels must be unique within this run. +- Bounds MUST be saved as integer coordinates in canvas space. No percentage-based selectors. No `percent:` or `pixel:` prefix on the selector value. +- Column labels must be unique within this run. The server rejects labels matching any pre-existing column. - Each line annotation belongs to at most one column. - Preserve reading order across columns and within each column. - Line geometry is the primary accuracy target. Column grouping is secondary — for a single-column page, one column containing every line is correct. diff --git a/templates/detect-lines/PROMPT.md b/templates/detect-lines/PROMPT.md index a7ce916..aea9d7c 100644 --- a/templates/detect-lines/PROMPT.md +++ b/templates/detect-lines/PROMPT.md @@ -36,7 +36,7 @@ Use only tools already available in your environment. Do not install packages, l ## Rules -- Bounds MUST be saved as integer coordinates in canvas space. No percents. No `percent:` or `pixel:` prefix on the selector value. +- Bounds MUST be saved as integer coordinates in canvas space. No percentage-based selectors. No `percent:` or `pixel:` prefix on the selector value. - Preserve reading order across the whole page. - Prefer tight bounds when you can measure them; best-effort bounds are acceptable. When uncertain whether a tall run is one line or several, prefer splitting over merging. - Do not include decorative borders, frame rules, ornaments, or illustrations as part of a line. diff --git a/templates/detect-order-and-transcribe/PROMPT.md b/templates/detect-order-and-transcribe/PROMPT.md index 1cb436f..6fd7293 100644 --- a/templates/detect-order-and-transcribe/PROMPT.md +++ b/templates/detect-order-and-transcribe/PROMPT.md @@ -40,8 +40,8 @@ Use only tools already available in your environment. Do not install packages, l ### Detection (IMAGE_ANALYSIS) -- Bounds MUST be saved as integer coordinates in canvas space. No percents. No `percent:` or `pixel:` prefix on the selector value. -- Column labels must be unique within this run. +- Bounds MUST be saved as integer coordinates in canvas space. No percentage-based selectors. No `percent:` or `pixel:` prefix on the selector value. +- Column labels must be unique within this run. The server rejects labels matching any pre-existing column. - Each line annotation belongs to at most one column. - Preserve reading order across columns and within each column. - Line geometry is the primary accuracy target. Column grouping is secondary — for a single-column page, one column containing every line is correct. From a615664da515a6b4e8fedfce2d12ce84f0264af8 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Tue, 28 Apr 2026 12:33:53 -0500 Subject: [PATCH 45/47] changes during testing --- templates/detect-and-transcribe/PROMPT.md | 3 +- templates/detect-columns/PROMPT.md | 3 +- .../detect-order-and-transcribe/PROMPT.md | 3 +- templates/inject-context.js | 48 +++++++++++------- templates/transcribe-known-lines/PROMPT.md | 50 ++++++++++++------- templates/transcribe-known-lines/index.js | 2 +- 6 files changed, 66 insertions(+), 43 deletions(-) diff --git a/templates/detect-and-transcribe/PROMPT.md b/templates/detect-and-transcribe/PROMPT.md index 642520b..02de0df 100644 --- a/templates/detect-and-transcribe/PROMPT.md +++ b/templates/detect-and-transcribe/PROMPT.md @@ -33,8 +33,7 @@ Use only tools already available in your environment. Do not install packages, l 4. Run text recognition (print or handwriting) on each line's crop. Apply the recognition rules below. 5. If HTTP PUT is available, build the full payload under **TPEN API** and send the request once. On any non-2xx response, stop and report the status and error body — do not emit a fallback payload; the same token and content would be re-submitted through it. 6. If HTTP PUT is unavailable from the start, emit the condensed payload under **Fallback** as the final code block — do not also attempt PUT. -7. Report counts (lines saved/in payload, non-empty text, uncertain) and which path was used (direct PUT or fallback). -8. Report notable ambiguities (e.g., illegible lines transcribed as empty or flagged). +7. Report counts (lines saved/in payload, non-empty text, uncertain), which path was used (direct PUT or fallback), and notable ambiguities (e.g., illegible lines transcribed as empty or flagged). ## Rules diff --git a/templates/detect-columns/PROMPT.md b/templates/detect-columns/PROMPT.md index 7529e5f..1fc4377 100644 --- a/templates/detect-columns/PROMPT.md +++ b/templates/detect-columns/PROMPT.md @@ -40,7 +40,7 @@ Use only tools already available in your environment. Do not install packages, l 5. Build a global reading-order sequence of all existing line ids: columns in reading order; within each column, lines sorted top-to-bottom by the `xywh` y-center. 6. DELETE every existing column on the page (see TPEN API below). On any non-2xx, stop and report. Do not POST or PUT after a DELETE failure. 7. For each detected column, POST `{ label, annotations }` where `annotations` is the contiguous slice of the reading-order id sequence from step 5 that belongs to that column. Choose a unique label per column (e.g., `Column A`, `Column B`) that does not clash with any other label chosen in this run. On any non-2xx, stop and report — columns POSTed before the failure remain persisted. Run step 7 before step 8. -8. Compare the step-5 sequence against the "Existing lines" order index-by-index. If they are identical, skip the PUT. Otherwise, PUT the page with `items` in the step-5 order. Each entry re-uses the existing annotation URI verbatim as its `id`, its `body` reconstructed from the entry's body form, and its `target` rebuilt from the entry's `xywh` selector. On any non-2xx, stop and report. +8. Compare the step-5 sequence against the "Existing lines" order index-by-index. If they are identical, skip the PUT. Otherwise, PUT the page with `items` in the step-5 order. Each entry re-uses the existing annotation URI verbatim as its `id`, its `body` reconstructed from the entry's body form, and its `target` rebuilt from the entry's `xywh` selector — echoing verbatim avoids minting a needless new RERUM version of the line. On any non-2xx, stop and report. 9. Report: columns deleted, columns created, whether the page order was updated, and per-column line counts. ## Rules @@ -51,7 +51,6 @@ Use only tools already available in your environment. Do not install packages, l - Column labels must be unique within this run. The DELETE in step 6 clears every existing column, so no pre-existing label can collide. - Each existing line belongs to exactly one column. - Do not POST a column with an empty `annotations` array — the server rejects it. Skip any detected column that ends up with zero assigned lines. -- Echo each line's existing `body` and `target` unchanged in the PUT. Changing either mints a new RERUM version of the line; the server remaps columns to the new URIs, but echoing verbatim avoids the needless version. ## TPEN API diff --git a/templates/detect-order-and-transcribe/PROMPT.md b/templates/detect-order-and-transcribe/PROMPT.md index 6fd7293..e4901e7 100644 --- a/templates/detect-order-and-transcribe/PROMPT.md +++ b/templates/detect-order-and-transcribe/PROMPT.md @@ -33,8 +33,7 @@ Use only tools already available in your environment. Do not install packages, l 4. Run text recognition (print or handwriting) on each line's crop. Apply the recognition rules below. 5. If HTTP PUT and POST are available, build the full payload under **TPEN API** and PUT the items once in the global reading-order sequence from step 2. If the PUT returns non-2xx, stop and report the status and error body — do not emit a fallback payload; the same token and content would be re-submitted through it. If the PUT succeeds, for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The PUT response's `items` array is guaranteed to be in the same order as the submitted items, so use each line's column index from step 2 to slice the returned ids. Labels must be unique within this run. If a column POST returns non-2xx, stop and report the partial state — do not emit a fallback payload; lines are already saved. 6. If HTTP PUT or POST is unavailable from the start, emit the condensed payload under **Fallback** as the final code block — do not also attempt PUT. -7. Report counts (lines saved/in payload, non-empty text, uncertain, columns created/in payload) and which path was used (direct or fallback). -8. Report notable ambiguities (e.g., illegible lines transcribed as empty or flagged). +7. Report counts (lines saved/in payload, non-empty text, uncertain, columns created/in payload), which path was used (direct or fallback), and notable ambiguities (e.g., illegible lines transcribed as empty or flagged). ## Rules diff --git a/templates/inject-context.js b/templates/inject-context.js index afeca68..391461d 100644 --- a/templates/inject-context.js +++ b/templates/inject-context.js @@ -72,31 +72,41 @@ export function buildTemplateContext(ctx) { * The common case, so it's worth the shorter display. * - `body=` — anything else; echo the JSON verbatim. * - * Existing TPEN line bodies are expected to always carry `type`, `value`, and - * `format`. The `text=` round-trip reconstruction sets `format: "text/plain"`, - * so `only.format === 'text/plain'` is a strict match — any other shape (no - * format, different format, multiple bodies, non-`TextualBody`) drops to - * `body=` to preserve fidelity on the PUT echo. + * Bodies arrive in several shapes: empty (`null`/`undefined`/`""`/`[]`), an + * array of body entries, or a single body object (not wrapped). The unwrapped + * shape comes from `Line.updateText` after a PATCH: it sets + * `this.body = { type, value, format, language }` directly, so PATCHed lines + * round-trip through RERUM as `{type, value, format}` and would otherwise be + * misread as empty. + * + * The `text=` round-trip reconstruction sets `format: "text/plain"`, so + * `format === 'text/plain'` is a strict match — any other shape (no format, + * different format, multiple bodies, non-`TextualBody`) drops to `body=` + * to preserve fidelity on the PUT echo. * @param {any} body an annotation `body` value. * @returns {string} */ function formatBody(body) { - if (!Array.isArray(body) || body.length === 0) return 'body=[]' - if (body.length === 1) { - const only = body[0] - // Require EXACTLY {type, value, format} with the expected values so the - // `text=` → `[{type, value, format}]` round-trip is lossless. Any extra - // field (e.g. `language`, `creator`, `id`) would be silently dropped on - // the PUT echo and trigger a needless RERUM re-version. - const keys = only && typeof only === 'object' ? Object.keys(only) : [] - const isPlainTextual = - keys.length === 3 + if (body === null || body === undefined || body === '') return 'body=[]' + // Require EXACTLY {type, value, format} with the expected values so the + // `text=` → `[{type, value, format}]` round-trip is lossless. Any extra + // field (e.g. `language`, `creator`, `id`) would be silently dropped on + // the PUT echo and trigger a needless RERUM re-version. + const isPlainTextual = (entry) => { + const keys = entry && typeof entry === 'object' && !Array.isArray(entry) + ? Object.keys(entry) : [] + return keys.length === 3 && keys.every(k => k === 'type' || k === 'value' || k === 'format') - && only.type === 'TextualBody' - && typeof only.value === 'string' - && only.format === 'text/plain' - if (isPlainTextual) return `text=${JSON.stringify(only.value)}` + && entry.type === 'TextualBody' + && typeof entry.value === 'string' + && entry.format === 'text/plain' + } + if (Array.isArray(body)) { + if (body.length === 0) return 'body=[]' + if (body.length === 1 && isPlainTextual(body[0])) return `text=${JSON.stringify(body[0].value)}` + return `body=${JSON.stringify(body)}` } + if (isPlainTextual(body)) return `text=${JSON.stringify(body.value)}` return `body=${JSON.stringify(body)}` } diff --git a/templates/transcribe-known-lines/PROMPT.md b/templates/transcribe-known-lines/PROMPT.md index 6b3d861..c5bb2cf 100644 --- a/templates/transcribe-known-lines/PROMPT.md +++ b/templates/transcribe-known-lines/PROMPT.md @@ -11,7 +11,7 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Existing lines -Each entry is ` | xywh= | ` in canvas coordinates. The body form is `body=[]` (empty), `text=""` (single plain-text `TextualBody`), or `body=` (anything else) — use it as context for what's already on the line. The fallback payload re-uses the full annotation URI verbatim as the `id` of each item; the splitscreen tool rebuilds each existing target from the hydrated page before PUTting it, and updates only the body text. +Each entry is ` | xywh= | ` in canvas coordinates. The body form (`body=[]`, `text=""`, or `body=`) is the line's current transcription — see "Rules" for when to keep it vs. replace it. The direct PUT and the fallback both re-use each entry's URI verbatim as the item `id`; the direct PUT additionally rebuilds `target` from the entry's `xywh` selector (see "TPEN API" below). The new transcription replaces the prior body in both paths. {{existingLines}} @@ -22,7 +22,7 @@ All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dim You must have: 1. Vision capability: fetch each line's region as image bytes (e.g. via a IIIF region URL) and read the inked glyphs directly. A fetcher that returns only a prose description of the image does not qualify, and any preview rendered back into chat is downsampled — do not transcribe from a preview. **If you cannot read image bytes directly with the capabilities already available to you, stop now and return a failure report naming the missing capability.** This precondition is hard — fallback does not rescue missing vision. -2. Either HTTP PATCH capability (with `Content-Type: text/plain`), or the ability to emit a fallback JSON code block in your report. If HTTP PATCH is not available, skip straight to the Fallback section — do not retry. +2. Either HTTP PUT capability (with `Content-Type: application/json`), or the ability to emit a fallback JSON code block in your report. If HTTP PUT is not available, skip straight to the Fallback section — do not retry. Use only tools already available in your environment. Do not install packages, libraries, or system utilities. @@ -35,33 +35,48 @@ Use only tools already available in your environment. Do not install packages, l - `pixel_h = round(canvas_h * img_h / {{canvasHeight}})` Crop each line region and verify it visibly contains a single line of inked text. 2. Run text recognition (print or handwriting) over each crop. Apply the recognition rules below. -3. If HTTP PATCH is available, PATCH the text to each line's line-text endpoint — one PATCH per line in the "Existing lines" list. `` is the trailing path segment of the annotation URI listed above (the last `/`-separated segment). On any non-2xx, record the status and continue with the remaining lines. If every PATCH returned non-2xx, stop and report the per-line statuses — do not emit a fallback payload; the same token and content would be re-submitted through it. -4. If HTTP PATCH is unavailable from the start, emit the condensed payload under **Fallback** as the final code block — do not also attempt PATCH. -5. Report counts (lines updated, lines flagged illegible, lines failed) and which path was used. +3. If HTTP PUT is available, build a single page PUT body whose `items` array contains one entry per existing line, in the same order as the "Existing lines" list. Each item is shaped as in "TPEN API" below; set `body` per the confidence ladder in "Rules". Send one PUT to `{{pageEndpoint}}`. On non-2xx, stop and report the status — do not emit a fallback payload; the same token and content would be re-submitted through it. +4. If HTTP PUT is unavailable from the start, emit the condensed payload under **Fallback** as the final code block — do not also attempt PUT. +5. Report counts (lines submitted, lines flagged illegible) and which path was used. ## Rules - Prioritize diplomatic transcription over normalization. Preserve orthography and punctuation as observed. - Use explicit uncertainty markers for unclear glyphs (for example `[a?]`). Do not force certainty. - Do not invent expansions. If an abbreviation mark is present, transcribe the mark; do not silently expand. -- Keep line segmentation stable — one transcription string per existing line annotation. -- If a line's crop is illegible, send an empty body (direct) or emit `"text": ""` (fallback) and report the line id as unresolved — do not fabricate text. In the fallback payload, do not drop the item. +- Confidence ladder per line: confident reading → existing text from "Existing lines" (echo the prior `text=` or `body=` value verbatim) → `body: []` (direct) / `"text": ""` (fallback), only when the line was already empty (`body=[]`). Do not fabricate text. Report any line that fell back to existing text or to empty. Do not drop the item in either path: the direct PUT treats omitted line ids as deletions and updates columns to remove them. ## TPEN API -Update one line's text via PATCH with a plain-text body. +Update every line in a single page PUT. Each `items` entry re-uses an existing annotation URI verbatim as `id`, rebuilds `target` from that line's `xywh` selector, and sets `body` per the confidence ladder in "Rules": ``` -PATCH {{pageEndpoint}}/line//text +PUT {{pageEndpoint}} Authorization: Bearer {{token}} -Content-Type: text/plain +Content-Type: application/json - +{ + "items": [ + { + "id": "", + "body": [{ "type": "TextualBody", "value": "", "format": "text/plain" }], + "target": { + "source": "{{canvasId}}", + "type": "SpecificResource", + "selector": { + "type": "FragmentSelector", + "conformsTo": "http://www.w3.org/TR/media-frags/", + "value": "xywh=x,y,w,h" + } + } + } + ] +} ``` ## Fallback -The fallback tool only accepts JSON, so it uses a single page-level PUT instead of per-line PATCH. When PATCH is unavailable from the start, emit the condensed payload below as the final code block of your report. The TPEN splitscreen tool re-uses each line's existing target from the hydrated page context before PUTting it — the item's `id` must match an entry in "Existing lines" above. +The fallback tool only accepts a condensed payload — re-using URIs but not full targets. When PUT is unavailable from the start, emit the payload below as the final code block of your report. The TPEN splitscreen tool re-uses each line's existing target from the hydrated page context before PUTting it — the item's `id` must match an entry in "Existing lines" above. ``` { @@ -71,15 +86,16 @@ The fallback tool only accepts JSON, so it uses a single page-level PUT instead } ``` -There must be exactly one item per entry in "Existing lines", each re-using that entry's annotation URI verbatim as its `id`. Item order must match the order of "Existing lines" — do not reorder. `text` is an empty string for fully illegible lines — do not drop the item. It must be valid JSON (no comments, no placeholders). +There must be exactly one item per entry in "Existing lines". Item order must match the order of "Existing lines" — do not reorder. Set each `text` per the confidence ladder in "Rules". It must be valid JSON (no comments, no placeholders). ## Completion -Direct PATCH path, report: +Direct PUT path, report: -- operation: `PATCH line text` -- target: {{pageEndpoint}}/line//text per line -- counts: lines updated, lines flagged illegible, lines failed (with HTTP status per failure) +- operation: `PUT page` +- target: {{pageEndpoint}} +- counts: lines submitted, lines flagged illegible +- HTTP status of the PUT Fallback path, report: diff --git a/templates/transcribe-known-lines/index.js b/templates/transcribe-known-lines/index.js index b72b0a3..03be90f 100644 --- a/templates/transcribe-known-lines/index.js +++ b/templates/transcribe-known-lines/index.js @@ -1,5 +1,5 @@ /** - * @file Template: "Transcribe existing lines → PATCH line text". + * @file Template: "Transcribe existing lines → page PUT". * * Targets workflow #1 from the absorbed cubap `_tools/COMMON_TASKS.md`: * Text Recognition Within Known Bounds. From a6b4d2e52ffcfdbb97362a6da67101cc542c06d2 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Tue, 28 Apr 2026 12:52:54 -0500 Subject: [PATCH 46/47] Testing was good. Prepare for merge and a stable main. --- index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index.html b/index.html index c6866b8..c302369 100644 --- a/index.html +++ b/index.html @@ -5,7 +5,7 @@ TPEN-Prompts - + From 09bfa2bdba77e18d0cf10c085ea9ce9618f5a556 Mon Sep 17 00:00:00 2001 From: Bryan Haberberger Date: Tue, 28 Apr 2026 13:10:42 -0500 Subject: [PATCH 47/47] small change for security message --- templates/transcribe-known-lines/PROMPT.md | 7 ++++++- ui-manager.js | 20 +++++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/templates/transcribe-known-lines/PROMPT.md b/templates/transcribe-known-lines/PROMPT.md index c5bb2cf..3641277 100644 --- a/templates/transcribe-known-lines/PROMPT.md +++ b/templates/transcribe-known-lines/PROMPT.md @@ -44,7 +44,12 @@ Use only tools already available in your environment. Do not install packages, l - Prioritize diplomatic transcription over normalization. Preserve orthography and punctuation as observed. - Use explicit uncertainty markers for unclear glyphs (for example `[a?]`). Do not force certainty. - Do not invent expansions. If an abbreviation mark is present, transcribe the mark; do not silently expand. -- Confidence ladder per line: confident reading → existing text from "Existing lines" (echo the prior `text=` or `body=` value verbatim) → `body: []` (direct) / `"text": ""` (fallback), only when the line was already empty (`body=[]`). Do not fabricate text. Report any line that fell back to existing text or to empty. Do not drop the item in either path: the direct PUT treats omitted line ids as deletions and updates columns to remove them. +- Confidence ladder per line: + 1. Confident reading can overwrite existing line text. + 2. Unconfident reading uses "Existing lines" (echo the prior `text=` or `body=` value verbatim). + 3. `body: []` (direct) / `"text": ""` (fallback), only if the line was already empty (`body=[]`). Do not fabricate text. Report any line that fell back to existing text or to empty. + +> Do not drop the item in either path: the direct PUT treats omitted line ids as deletions and updates columns to remove them. ## TPEN API diff --git a/ui-manager.js b/ui-manager.js index dc9c620..1d50b95 100644 --- a/ui-manager.js +++ b/ui-manager.js @@ -206,6 +206,8 @@ export class UIManager { #feedbackTimer = null /** Fallback-panel submit button; toggled by `updateToken`. */ #fallbackSubmit = null + /** Security warning text node; rewritten by `updateToken` when consent arrives. */ + #warningText = null /** * @param {string} [rootId='app'] id of the element to render into. @@ -318,9 +320,11 @@ export class UIManager { if (k === 'Line') this.#lineMetaValue = dd } + const warningSpan = el('span', { text: warningText(token) }) + this.#warningText = warningSpan const warning = el('div', { class: 'warning', attrs: { role: 'note' } }, [ el('strong', { text: 'Security: ' }), - el('span', { text: `The generated prompt carries your TPEN session token so an agentic LLM can manipulate your TPEN data on your behalf. Clicking 'Copy' writes the full token to your clipboard. Only paste it into LLM environments you trust. If the token leaks, log out of TPEN to invalidate it.` }) + warningSpan ]) this.#authButton = null @@ -579,6 +583,7 @@ export class UIManager { this.#authButton.remove() this.#authButton = null } + if (this.#warningText) this.#warningText.textContent = warningText(token) const { projectID, pageID } = this.state if (this.#generateBtn) this.#generateBtn.disabled = !pageID if (this.#fallbackSubmit) { @@ -697,6 +702,19 @@ function truncateToken(token) { return `${token.slice(0, 10)}…${token.slice(-10)}` } +/** + * Pick the security-warning body for the given token state. Pre-consent the + * warning explains what consent will mean; once a token is held it shrinks to + * the operative reminders. + * @param {string|null|undefined} token + * @returns {string} + */ +function warningText(token) { + return token + ? `Only paste the prompt into LLM environments you trust. You can log out of TPEN to invalidate a leaked token.` + : `The generated prompt carries your TPEN session token so an agentic LLM can manipulate your TPEN data on your behalf. Clicking 'Copy' writes the full token to your clipboard. Only paste the prompt into LLM environments you trust. You can log out of TPEN to invalidate a leaked token.` +} + /** * Fallback reload target when the parent didn't forward `parentUrl` via * `TPEN_CONTEXT`. Minted from the parent origin (taken from