diff --git a/ASSISTANTS.md b/ASSISTANTS.md index dafdda5..c42c2c1 100644 --- a/ASSISTANTS.md +++ b/ASSISTANTS.md @@ -6,7 +6,7 @@ You are a small GitHub Pages app that functions as a TPEN AI Prompt Generator. ## TPEN Interfaces -You are a TPEN Interfaces component with an accompanying interface for UI. Specifically, you are a splitscreen tool for the transcription interface. +You are a tool imported into a TPEN Interfaces component. That component is your parent. > The TPEN Interfaces code can be found at https://github.com/CenterForDigitalHumanities/TPEN-interfaces. diff --git a/README.md b/README.md index 40806a3..ddde66f 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,3 @@ # TPEN-Prompts -A small GitHub Pages app that composes well-formatted LLM prompts carrying TPEN3 project context. It emits prompt text only — it does **not** call any LLM. Offered as a splitscreen tool on the TPEN3 transcription interface. +A small GitHub Pages app that composes well-formatted LLM prompts carrying TPEN3 project context. It emits prompt text only — it does **not** call any LLM. diff --git a/config.js b/config.js index ff9da4b..519ed67 100644 --- a/config.js +++ b/config.js @@ -22,25 +22,25 @@ const ACTIVE_ENV = ( ?? 'prod' ) -/** @type {Record} */ +/** @type {Record} */ const ENVIRONMENTS = { local: { servicesURL: 'http://localhost:3012', - TPEN3URL: 'http://localhost:4000' + interfacesURL: 'http://localhost:4000' }, dev: { servicesURL: 'https://dev.api.t-pen.org', - TPEN3URL: 'http://localhost:4000' + interfacesURL: 'http://localhost:4000' }, prod: { servicesURL: 'https://api.t-pen.org', - TPEN3URL: 'https://app.t-pen.org' + interfacesURL: 'https://app.t-pen.org' } } /** * Active config for this page load, flattened for convenient destructuring. - * @type {{ env: string, servicesURL: string, TPEN3URL: string }} + * @type {{ env: string, servicesURL: string, interfacesURL: string }} */ export const CONFIG = { env: ACTIVE_ENV, diff --git a/index.html b/index.html index c6866b8..c302369 100644 --- a/index.html +++ b/index.html @@ -5,7 +5,7 @@ TPEN-Prompts - + diff --git a/message-handler.js b/message-handler.js index f81ecf1..dba3891 100644 --- a/message-handler.js +++ b/message-handler.js @@ -7,19 +7,15 @@ * clicking the consent button sends `REQUEST_TPEN_ID_TOKEN` upstream, and * the parent replies with `TPEN_ID_TOKEN`. * + * Replies are aimed at `parentOrigin`, captured from the first inbound + * message; before any inbound arrives, `CONFIG.interfacesURL` is used + * as the default target. + * * @author thehabes */ import { CONFIG } from './config.js' -// Accept messages only from known TPEN3 origins and the current origin (for -// same-origin dev harnesses). Anything else could inject auth tokens or drive -// the tool's state, so drop silently. -const ALLOWED_ORIGINS = new Set([ - CONFIG.TPEN3URL, - location.origin -]) - /** * Listens on `window` for postMessage traffic from the TPEN3 parent and routes * recognized message types to the `PromptsApp`. @@ -30,17 +26,16 @@ export class MessageHandler { */ constructor(app) { this.app = app - /** Origin of the first trusted inbound message; used as targetOrigin for replies. */ + /** Origin of the first inbound message; used as targetOrigin for replies. */ this.parentOrigin = null window.addEventListener('message', (event) => this.handle(event)) } /** - * Route an incoming postMessage. Origin-gated; unknown types are ignored. + * Route an incoming postMessage. Unknown types are ignored. * @param {MessageEvent} event */ handle(event) { - if (!ALLOWED_ORIGINS.has(event.origin)) return this.parentOrigin ??= event.origin const data = event.data if (!data?.type) return @@ -61,15 +56,15 @@ export class MessageHandler { /** * Post a message to the parent frame. Replies target the origin of the - * first trusted inbound message; before any inbound arrives we fall back - * to `CONFIG.TPEN3URL` (the expected production parent). No-op when the - * parent is the page itself. + * first inbound message; before any inbound arrives we fall back to + * `CONFIG.interfacesURL` (the expected production parent). No-op when + * the parent is the page itself. * @param {object} message * @returns {boolean} true when a parent frame exists and the post was dispatched. */ #postToParent(message) { if (window.parent === window) return false - const targetOrigin = this.parentOrigin ?? CONFIG.TPEN3URL + const targetOrigin = this.parentOrigin ?? CONFIG.interfacesURL window.parent.postMessage(message, targetOrigin) return true } diff --git a/templates/detect-and-transcribe/PROMPT.md b/templates/detect-and-transcribe/PROMPT.md index b098faa..02de0df 100644 --- a/templates/detect-and-transcribe/PROMPT.md +++ b/templates/detect-and-transcribe/PROMPT.md @@ -11,7 +11,9 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Preconditions -All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: +All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. This template only creates new lines: `lineCount` = `{{lineCount}}`. If `lineCount` is not `0`, stop immediately and report — existing line data must not be modified. + +You must have: 1. Ability to fetch the image bytes (or a derivative) and identify line bounds and text from them. Precise pixel measurement is preferred when available; visual estimation and on-sight transcription from the fetched image are acceptable otherwise. 2. Either HTTP PUT capability with `Content-Type: application/json`, or the ability to emit the payload as a fallback JSON code block in your report. If HTTP PUT is not available, skip straight to the Fallback section — do not retry. @@ -21,31 +23,30 @@ Use only tools already available in your environment. Do not install packages, l ## Steps 1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, fetch a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. If you have precise pixel tooling and want tighter bounds or a clearer crop for transcription, request a region server-side as `{base}/x,y,w,h/max/0/default.jpg` and add the crop's `x,y` origin back before applying the canvas conversion below. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. -2. Detect text lines across the whole page in reading order. This task does not create TPEN columns. +2. Detect text lines across the whole page in reading order. This task does not create TPEN columns. Then do exactly one self-review pass to tweak line placement — catch missed lines, merge over-splits, split over-merges, tighten loose bounds. One pass only, then move on. 3. For every line, measure a bounding box in image-pixel space and convert to integer canvas coordinates using: - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` - Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). -4. Run handwriting text recognition on each line's crop. Apply the recognition rules below. -5. If HTTP PUT is available, build the full payload under **TPEN API** — one Annotation per line with the recognized text and `xywh=x,y,w,h` selector — and send the request once. On any non-2xx response, do not retry — fall back. -6. If HTTP PUT is unavailable (or step 5 fell back), emit the condensed payload under **Fallback** as the final code block. -7. Report counts (lines saved/in payload, non-empty text, uncertain) and which path was used (direct PUT or fallback). -8. Report notable ambiguities (e.g., illegible lines transcribed as empty or flagged). + Then clamp `x,y,w,h` so that `0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`. +4. Run text recognition (print or handwriting) on each line's crop. Apply the recognition rules below. +5. If HTTP PUT is available, build the full payload under **TPEN API** and send the request once. On any non-2xx response, stop and report the status and error body — do not emit a fallback payload; the same token and content would be re-submitted through it. +6. If HTTP PUT is unavailable from the start, emit the condensed payload under **Fallback** as the final code block — do not also attempt PUT. +7. Report counts (lines saved/in payload, non-empty text, uncertain), which path was used (direct PUT or fallback), and notable ambiguities (e.g., illegible lines transcribed as empty or flagged). ## Rules ### Detection (IMAGE_ANALYSIS) -- Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value. +- Bounds MUST be saved as integer coordinates in canvas space. No percentage-based selectors. No `percent:` or `pixel:` prefix on the selector value. - Preserve reading order across the whole page. - Prefer tight bounds when you can measure them; best-effort bounds are acceptable. When uncertain whether a tall run is one line or several, prefer splitting over merging. - Do not include decorative borders, frame rules, ornaments, or illustrations as part of a line. -- Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected downstream. +- Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected by humans downstream. - Zero lines detected is an unprocessable outcome. Stop and report — do not PUT, do not emit a fallback payload. An empty `items` array would erase every existing annotation on the page. -### Recognition (HANDWRITING_TEXT_RECOGNITION) +### Recognition (TEXT_RECOGNITION) - Prioritize diplomatic transcription over normalization. - Preserve orthography and punctuation as observed. @@ -84,7 +85,7 @@ Content-Type: application/json ## Fallback -When the direct PUT is impossible or returns non-2xx, emit the condensed payload below as the final code block of your report. The TPEN splitscreen tool expands each item into a full W3C Annotation before PUTting it — do not inline the canvas source, selector boilerplate, or motivation. It must be valid JSON (no comments, no placeholders — substitute the real coordinates and recognized text). +When HTTP PUT is unavailable from the start, emit the condensed payload below as the final code block of your report. The TPEN splitscreen tool expands each item into a full W3C Annotation before PUTting it — do not inline the canvas source, selector boilerplate, or motivation. It must be valid JSON (no comments, no placeholders — substitute the real coordinates and recognized text). ``` { @@ -109,6 +110,5 @@ Fallback path, report: - path: `fallback` - counts: lines in payload, lines with non-empty text, lines flagged uncertain -- HTTP status and error body if a PUT was attempted first - notable ambiguities worth a human review - final code block: the condensed `{ "items": [...] }` JSON for the user to paste diff --git a/templates/detect-columns-and-lines/PROMPT.md b/templates/detect-columns-and-lines/PROMPT.md index f69f3e6..fbb7d22 100644 --- a/templates/detect-columns-and-lines/PROMPT.md +++ b/templates/detect-columns-and-lines/PROMPT.md @@ -9,49 +9,47 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en - Image: {{imageUrl}} - Page endpoint: {{pageEndpoint}} -## Existing columns on this page - -{{existingColumns}} - ## Preconditions -All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: +All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. This template only creates new lines: `lineCount` = `{{lineCount}}`. If `lineCount` is not `0`, stop immediately and report — existing line data must not be modified. + +You must have: 1. Ability to fetch the image bytes (or a derivative) and identify line and column bounds from them. Precise pixel measurement is preferred when available; visual estimation from the fetched image is acceptable otherwise. -2. Either HTTP PUT and POST capability with `Content-Type: application/json`, or the ability to emit the lines-only payload as a fallback JSON code block in your report. Column creation has no fallback — if POST is unavailable, column grouping is dropped. If PUT is unavailable, skip straight to the Fallback section — do not retry. +2. Either HTTP PUT and POST capability with `Content-Type: application/json`, or the ability to emit the lines-only payload as a fallback JSON code block in your report. If either verb is unavailable, skip straight to the Fallback section — do not retry. Column creation has no fallback; it is dropped when the fallback path is taken. Use only tools already available in your environment. Do not install packages, libraries, or system utilities. ## Steps 1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, fetch a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. If you have precise pixel tooling and want tighter bounds on a region, request it server-side as `{base}/x,y,w,h/max/0/default.jpg` and add the crop's `x,y` origin back before applying the canvas conversion below. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. -2. Detect main text column regions in reading order first, then detect the lines inside each column (reading order preserved within each column). If the page visibly has a single text block, create one column containing every detected line — do not subdivide. Track each line's column index (an integer, 0-based) as you detect it. Then flatten into a single global reading-order sequence across columns (column-major: every line in the first column, then the second, etc., adjusted for script tradition). +2. Detect main text column regions in reading order first, then detect the lines inside each column (reading order preserved within each column). If the page visibly has a single text block, create one column containing every detected line — do not subdivide. Track each line's column index (an integer, 0-based) as you detect it. Then flatten into a single global reading-order sequence across columns (column-major: every line in the first column, then the second, etc., adjusted for script tradition). Then do exactly one self-review pass to tweak line placement — catch missed lines, merge over-splits, split over-merges, tighten loose bounds. One pass only, then move on. 3. For every line, measure a bounding box in image-pixel space and convert to integer canvas coordinates using: - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` - Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). -4. If HTTP PUT and POST are available, build the full payload under **TPEN API** and PUT the items once in the global reading-order sequence from step 2. If the PUT returns non-2xx, stop and fall back — lines are not persisted yet. If the PUT succeeds, for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The PUT response's `items` array is guaranteed to be in the same order as the submitted items, so use each line's column index from step 2 to slice the returned ids. Labels must be unique and must not clash with anything in "Existing columns on this page". If a column POST returns non-2xx, stop and report the partial state — do not emit a fallback payload; lines are already saved. -5. If HTTP PUT is unavailable (or the PUT in step 4 failed), emit the condensed payload under **Fallback** as the final code block. Column creation is out of scope for the fallback path. -6. Report counts (lines saved/in payload, columns created/in payload) and which path was used. + Then clamp `x,y,w,h` so that `0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`. +4. If HTTP PUT and POST are available, build the full payload under **TPEN API** and PUT the items once in the global reading-order sequence from step 2. If the PUT returns non-2xx, stop and report the status and error body — do not emit a fallback payload; the same token and content would be re-submitted through it. If the PUT succeeds, for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The PUT response's `items` array is guaranteed to be in the same order as the submitted items, so use each line's column index from step 2 to slice the returned ids. Labels must be unique within this run. If a column POST returns non-2xx, stop and report the partial state — do not emit a fallback payload; lines are already saved. +5. If HTTP PUT or POST is unavailable from the start, emit the condensed payload under **Fallback** as the final code block — do not also attempt PUT. +6. Report counts (lines saved/in payload, columns created/in payload) and which path was used (direct or fallback). ## Rules -- Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value. -- Column labels are page-scoped and must be unique. Do not duplicate an existing column label. +- Bounds MUST be saved as integer coordinates in canvas space. No percentage-based selectors. No `percent:` or `pixel:` prefix on the selector value. +- Column labels must be unique within this run. The server rejects labels matching any pre-existing column. - Each line annotation belongs to at most one column. - Preserve reading order across columns and within each column. - Line geometry is the primary accuracy target. Column grouping is secondary — for a single-column page, one column containing every line is correct. - Prefer tight bounds when you can measure them; best-effort bounds are acceptable. When uncertain whether a tall run is one line or several, prefer splitting over merging. - Do not include decorative borders, frame rules, ornaments, or illustrations as part of a line. - Do not POST a column with an empty `annotations` array — the server rejects it. Skip any detected column that ends up with zero assigned lines. -- Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected downstream. +- Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected by humans downstream. - Zero lines detected is an unprocessable outcome. Stop and report — do not PUT, do not POST a column, do not emit a fallback payload. An empty `items` array would erase every existing annotation on the page. ## TPEN API -Save all lines via a single PUT. The `items` array must contain one annotation per detected line; replace `x,y,w,h` with the integer canvas coordinates computed in step 3. +Save all detected lines via a single PUT. The `items` array must contain one annotation per detected line, in the global reading-order sequence from step 2; replace `x,y,w,h` with the integer canvas coordinates computed in step 3. ``` PUT {{pageEndpoint}} @@ -92,7 +90,7 @@ Content-Type: application/json ## Fallback -When the direct path is unavailable or returns non-2xx, emit the condensed payload below as the final code block of your report, in the global reading-order sequence from step 2. The TPEN splitscreen tool expands each item into a full W3C Annotation before PUTting it — do not inline the canvas source, selector boilerplate, or motivation. It must be valid JSON. Column creation is out of scope for this fallback. +When HTTP PUT or POST is unavailable from the start, emit the condensed payload below as the final code block of your report, in the global reading-order sequence from step 2. The TPEN splitscreen tool expands each item into a full W3C Annotation before PUTting it — do not inline the canvas source, selector boilerplate, or motivation. It must be valid JSON. Column creation is out of scope for this fallback. ``` { @@ -117,6 +115,4 @@ Fallback path, report: - path: `fallback` - counts: lines in payload -- HTTP status and error body if a request was attempted first - final code block: the condensed `{ "items": [...] }` JSON for the user to paste -- list the labels of any columns already created before the failure, so a follow-up pass can avoid duplicating them. diff --git a/templates/detect-columns-and-lines/index.js b/templates/detect-columns-and-lines/index.js index bf2c1c5..7693d6b 100644 --- a/templates/detect-columns-and-lines/index.js +++ b/templates/detect-columns-and-lines/index.js @@ -7,15 +7,12 @@ * @author thehabes */ -import { buildTemplateContext, formatExistingColumns } from '../inject-context.js' +import { buildTemplateContext } from '../inject-context.js' /** @type {import('../../prompt-generator.js').PromptTemplate} */ export const detectColumnsAndLinesTemplate = { id: 'detect-columns-and-lines', label: 'Line Detection + Column Grouping', templateUrl: new URL('./PROMPT.md', import.meta.url), - buildContext: (ctx) => ({ - ...buildTemplateContext(ctx), - existingColumns: formatExistingColumns(ctx.project, ctx.page) - }) + buildContext: buildTemplateContext } diff --git a/templates/detect-columns/PROMPT.md b/templates/detect-columns/PROMPT.md index 36562bb..1fc4377 100644 --- a/templates/detect-columns/PROMPT.md +++ b/templates/detect-columns/PROMPT.md @@ -11,19 +11,13 @@ You are assisting with TPEN manuscript transcription. This task rebuilds the col ## Existing lines -Each entry is ` | xywh= | ` in canvas coordinates, printed in the page's current order. Use the full annotation URI verbatim when assigning lines to columns and when echoing lines in the page PUT. Compare the current order against the reading-order sequence you compute in step 5 to decide whether the PUT in step 8 is necessary. - -The body form is one of: - -- `body=[]` — echo as `[]`. -- `text=""` — echo as `[{ "type": "TextualBody", "value": , "format": "text/plain" }]`. -- `body=` — echo the JSON verbatim. +Each entry is ` | xywh= | ` in canvas coordinates, in the page's current order. Use the URI verbatim when assigning lines to columns and when echoing lines in the page PUT. Echo each body form verbatim: `body=[]` → `[]`; `text=""` → `[{ "type": "TextualBody", "value": , "format": "text/plain" }]` (paste `` as-is — already a JSON string literal, do not re-quote or re-escape); `body=` → the JSON verbatim. Compare the current order against the reading-order sequence from step 5 to decide whether the PUT in step 8 is necessary. {{existingLines}} ## Preconditions -All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions, existing-line list) are provided above. This task operates on an existing line set: `lineCount` = `{{lineCount}}`. If `lineCount` is `0`, stop immediately and return a failure report — this task cannot create lines. +All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions, existing-line list) are provided above. This template only groups existing lines: `lineCount` = `{{lineCount}}`. If `lineCount` is `0`, stop immediately and report — this prompt must not create lines. You must have: @@ -41,12 +35,12 @@ Use only tools already available in your environment. Do not install packages, l - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` - Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). + Then clamp `x,y,w,h` so that `0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`. 4. For each detected column, determine which of the existing line ids (from the list above) belong to it. Assign a line to the column whose canvas-space region contains the center point of the line's `xywh`. If a line's center falls outside every detected column, assign it to the nearest column by Euclidean distance from the center point to the column's region (distance `0` when the point is inside). Each line belongs to exactly one column. 5. Build a global reading-order sequence of all existing line ids: columns in reading order; within each column, lines sorted top-to-bottom by the `xywh` y-center. 6. DELETE every existing column on the page (see TPEN API below). On any non-2xx, stop and report. Do not POST or PUT after a DELETE failure. -7. For each detected column, POST `{ label, annotations }` where `annotations` is the contiguous slice of the reading-order id sequence from step 5 that belongs to that column. Choose a unique label per column (e.g., `Column A`, `Column B`) that does not clash with any other label chosen in this run. On any non-2xx, stop and report — columns POSTed before the failure remain persisted. -8. Compare the step-5 sequence against the "Existing lines" order index-by-index. If they are identical, skip the PUT. Otherwise, PUT the page with `items` in the step-5 order. Each entry re-uses the existing annotation URI verbatim as its `id`, its `body` reconstructed from the entry's body form, and its `target` rebuilt from the entry's `xywh` selector. The server remaps column references when URIs change, but echoing `body` and `target` verbatim avoids minting unnecessary RERUM versions. On any non-2xx, stop and report. +7. For each detected column, POST `{ label, annotations }` where `annotations` is the contiguous slice of the reading-order id sequence from step 5 that belongs to that column. Choose a unique label per column (e.g., `Column A`, `Column B`) that does not clash with any other label chosen in this run. On any non-2xx, stop and report — columns POSTed before the failure remain persisted. Run step 7 before step 8. +8. Compare the step-5 sequence against the "Existing lines" order index-by-index. If they are identical, skip the PUT. Otherwise, PUT the page with `items` in the step-5 order. Each entry re-uses the existing annotation URI verbatim as its `id`, its `body` reconstructed from the entry's body form, and its `target` rebuilt from the entry's `xywh` selector — echoing verbatim avoids minting a needless new RERUM version of the line. On any non-2xx, stop and report. 9. Report: columns deleted, columns created, whether the page order was updated, and per-column line counts. ## Rules @@ -56,12 +50,11 @@ Use only tools already available in your environment. Do not install packages, l - Keep column boundaries tight enough that each line clearly belongs to one column, but generous enough to avoid clipping existing line selectors. - Column labels must be unique within this run. The DELETE in step 6 clears every existing column, so no pre-existing label can collide. - Each existing line belongs to exactly one column. -- Do not POST a column with an empty `annotations` array — the server rejects it. If a detected column would end up with zero assigned lines, merge its assignments into the nearest populated column instead. -- Echo each line's existing `body` and `target` unchanged in the PUT. Changing either mints a new RERUM version of the line; the server remaps columns to the new URIs, but echoing verbatim avoids the needless version. +- Do not POST a column with an empty `annotations` array — the server rejects it. Skip any detected column that ends up with zero assigned lines. ## TPEN API -First, delete all existing columns on the page. Expect `204 No Content` on success (including when the page had no columns): +First, delete all existing columns on the page: ``` DELETE {{pageEndpoint}}/clear-columns diff --git a/templates/detect-lines/PROMPT.md b/templates/detect-lines/PROMPT.md index f21e559..aea9d7c 100644 --- a/templates/detect-lines/PROMPT.md +++ b/templates/detect-lines/PROMPT.md @@ -11,7 +11,9 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Preconditions -All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: +All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. This template only creates new lines: `lineCount` = `{{lineCount}}`. If `lineCount` is not `0`, stop immediately and report — existing line data must not be modified. + +You must have: 1. Ability to fetch the image bytes (or a derivative) and identify line bounds from them. Precise pixel measurement is preferred when available; visual estimation from the fetched image is acceptable otherwise. 2. Either HTTP PUT capability with `Content-Type: application/json`, or the ability to emit the payload as a fallback JSON code block in your report. If HTTP PUT is not available, skip straight to the Fallback section — do not retry. @@ -21,24 +23,24 @@ Use only tools already available in your environment. Do not install packages, l ## Steps 1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, fetch a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. If you have precise pixel tooling and want tighter bounds on a region, request it server-side as `{base}/x,y,w,h/max/0/default.jpg` and add the crop's `x,y` origin back before applying the canvas conversion below. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. -2. Detect text lines across the whole page in reading order. This task does not create TPEN columns. +2. Detect text lines across the whole page in reading order. This task does not create TPEN columns. Then do exactly one self-review pass to tweak line placement — catch missed lines, merge over-splits, split over-merges, tighten loose bounds. One pass only, then move on. 3. For every line, measure a bounding box in image-pixel space and convert to integer canvas coordinates using: - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` - Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). -4. If HTTP PUT is available, build the full payload under **TPEN API** and send the request once. On any non-2xx response, do not retry — fall back. -5. If HTTP PUT is unavailable (or step 4 fell back), emit the condensed payload under **Fallback** as the final code block. + Then clamp `x,y,w,h` so that `0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`. +4. If HTTP PUT is available, build the full payload under **TPEN API** and send the request once. On any non-2xx response, stop and report the status and error body — do not emit a fallback payload; the same token and content would be re-submitted through it. +5. If HTTP PUT is unavailable from the start, emit the condensed payload under **Fallback** as the final code block — do not also attempt PUT. 6. Report count and which path was used (direct PUT or fallback). ## Rules -- Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value. +- Bounds MUST be saved as integer coordinates in canvas space. No percentage-based selectors. No `percent:` or `pixel:` prefix on the selector value. - Preserve reading order across the whole page. - Prefer tight bounds when you can measure them; best-effort bounds are acceptable. When uncertain whether a tall run is one line or several, prefer splitting over merging. - Do not include decorative borders, frame rules, ornaments, or illustrations as part of a line. -- Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected downstream. +- Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected by humans downstream. - Zero lines detected is an unprocessable outcome. Stop and report — do not PUT, do not emit a fallback payload. An empty `items` array would erase every existing annotation on the page. ## TPEN API @@ -71,7 +73,7 @@ Content-Type: application/json ## Fallback -When the direct PUT is impossible or returns non-2xx, emit the condensed payload below as the final code block of your report. The TPEN splitscreen tool expands each item into a full W3C Annotation before PUTting it — do not inline the canvas source, selector boilerplate, or motivation. It must be valid JSON (no comments, no placeholders — substitute the real coordinates). +When HTTP PUT is unavailable from the start, emit the condensed payload below as the final code block of your report. The TPEN splitscreen tool expands each item into a full W3C Annotation before PUTting it — do not inline the canvas source, selector boilerplate, or motivation. It must be valid JSON (no comments, no placeholders — substitute the real coordinates). ``` { @@ -95,5 +97,4 @@ Fallback path, report: - path: `fallback` - count: number of line annotations in the payload -- HTTP status and error body if a PUT was attempted first - final code block: the condensed `{ "items": [...] }` JSON for the user to paste diff --git a/templates/detect-order-and-transcribe/PROMPT.md b/templates/detect-order-and-transcribe/PROMPT.md index f7d5645..e4901e7 100644 --- a/templates/detect-order-and-transcribe/PROMPT.md +++ b/templates/detect-order-and-transcribe/PROMPT.md @@ -1,4 +1,4 @@ -# Task: detect, order, and transcribe every text line on a TPEN3 page end-to-end +# Task: detect columns, order lines, and transcribe every text line on a TPEN3 page end-to-end You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services (direct) or emitted as a fallback JSON payload for the user to paste. @@ -11,41 +11,46 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Preconditions -All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have: +All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. This template only creates new lines: `lineCount` = `{{lineCount}}`. If `lineCount` is not `0`, stop immediately and report — existing line data must not be modified. -1. Ability to fetch the image bytes (or a derivative) and identify line bounds and text from them. Precise pixel measurement is preferred when available; visual estimation and on-sight transcription from the fetched image are acceptable otherwise. -2. Either HTTP PUT capability with `Content-Type: application/json`, or the ability to emit the payload as a fallback JSON code block in your report. If HTTP PUT is not available, skip straight to the Fallback section — do not retry. +You must have: + +1. Ability to fetch the image bytes (or a derivative) and identify column and line bounds plus text from them. Precise pixel measurement is preferred when available; visual estimation and on-sight transcription from the fetched image are acceptable otherwise. +2. Either HTTP PUT and POST capability with `Content-Type: application/json`, or the ability to emit the lines-only payload as a fallback JSON code block in your report. If either verb is unavailable, skip straight to the Fallback section — do not retry. Column creation has no fallback; it is dropped when the fallback path is taken. Use only tools already available in your environment. Do not install packages, libraries, or system utilities. ## Steps 1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, fetch a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. If you have precise pixel tooling and want tighter bounds or a clearer crop for transcription, request a region server-side as `{base}/x,y,w,h/max/0/default.jpg` and add the crop's `x,y` origin back before applying the canvas conversion below. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes. -2. Identify the page's layout. If the page has multiple text blocks side-by-side, determine their reading order (left→right for Latin-script layouts; adjust for script tradition). Within each block, detect lines top-to-bottom. Then flatten into a single global reading-order sequence across blocks (block-major: every line in the first block, then the second, etc.). Single-block pages collapse to one top-to-bottom sequence. This task does not create TPEN columns. +2. Detect main text column regions in reading order first, then detect the lines inside each column (reading order preserved within each column). If the page visibly has a single text block, create one column containing every detected line — do not subdivide. Track each line's column index (an integer, 0-based) as you detect it. Then flatten into a single global reading-order sequence across columns (column-major: every line in the first column, then the second, etc., adjusted for script tradition). Then do exactly one self-review pass to tweak line placement — catch missed lines, merge over-splits, split over-merges, tighten loose bounds. One pass only, then move on. 3. For every line, measure a bounding box in image-pixel space and convert to integer canvas coordinates using: - `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)` - `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)` - `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)` - `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)` - Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`). -4. Run handwriting text recognition on each line's crop. Apply the recognition rules below. -5. If HTTP PUT is available, build the full payload under **TPEN API** in the global reading-order sequence from step 2 and send the request once. On any non-2xx response, do not retry — fall back. -6. If HTTP PUT is unavailable (or step 5 fell back), emit the condensed payload under **Fallback** as the final code block. -7. Report counts (lines saved/in payload, non-empty text, uncertain) and which path was used (direct PUT or fallback). -8. Report notable ambiguities (e.g., illegible lines transcribed as empty or flagged) and the detected block count so reviewers know whether a multi-block layout was recognised. + Then clamp `x,y,w,h` so that `0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`. +4. Run text recognition (print or handwriting) on each line's crop. Apply the recognition rules below. +5. If HTTP PUT and POST are available, build the full payload under **TPEN API** and PUT the items once in the global reading-order sequence from step 2. If the PUT returns non-2xx, stop and report the status and error body — do not emit a fallback payload; the same token and content would be re-submitted through it. If the PUT succeeds, for each column POST `{ label, annotations }` where `annotations` is the contiguous slice of that column's lines from the PUT response. The PUT response's `items` array is guaranteed to be in the same order as the submitted items, so use each line's column index from step 2 to slice the returned ids. Labels must be unique within this run. If a column POST returns non-2xx, stop and report the partial state — do not emit a fallback payload; lines are already saved. +6. If HTTP PUT or POST is unavailable from the start, emit the condensed payload under **Fallback** as the final code block — do not also attempt PUT. +7. Report counts (lines saved/in payload, non-empty text, uncertain, columns created/in payload), which path was used (direct or fallback), and notable ambiguities (e.g., illegible lines transcribed as empty or flagged). ## Rules ### Detection (IMAGE_ANALYSIS) -- Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value. -- The PUT `items` order is the page's canonical reading order; do not interleave lines from different blocks. +- Bounds MUST be saved as integer coordinates in canvas space. No percentage-based selectors. No `percent:` or `pixel:` prefix on the selector value. +- Column labels must be unique within this run. The server rejects labels matching any pre-existing column. +- Each line annotation belongs to at most one column. +- Preserve reading order across columns and within each column. +- Line geometry is the primary accuracy target. Column grouping is secondary — for a single-column page, one column containing every line is correct. - Prefer tight bounds when you can measure them; best-effort bounds are acceptable. When uncertain whether a tall run is one line or several, prefer splitting over merging. - Do not include decorative borders, frame rules, ornaments, or illustrations as part of a line. -- Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected downstream. -- Zero lines detected is an unprocessable outcome. Stop and report — do not PUT, do not emit a fallback payload. An empty `items` array would erase every existing annotation on the page. +- Do not POST a column with an empty `annotations` array — the server rejects it. Skip any detected column that ends up with zero assigned lines. +- Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected by humans downstream. +- Zero lines detected is an unprocessable outcome. Stop and report — do not PUT, do not POST a column, do not emit a fallback payload. An empty `items` array would erase every existing annotation on the page. -### Recognition (HANDWRITING_TEXT_RECOGNITION) +### Recognition (TEXT_RECOGNITION) - Prioritize diplomatic transcription over normalization. - Preserve orthography and punctuation as observed. @@ -56,7 +61,7 @@ Use only tools already available in your environment. Do not install packages, l ## TPEN API -Save every detected line with its transcription in a single PUT. The `items` array must contain one annotation per detected line, in the reading-order sequence from step 2; replace `x,y,w,h` with the integer canvas coordinates computed in step 3, and `` with the recognized text (empty string for fully illegible lines). +Save every detected line with its transcription in a single PUT. The `items` array must contain one annotation per detected line, in the global reading-order sequence from step 2; replace `x,y,w,h` with the integer canvas coordinates computed in step 3, and `` with the recognized text (empty string for fully illegible lines). ``` PUT {{pageEndpoint}} @@ -82,9 +87,22 @@ Content-Type: application/json } ``` +Then POST each column (reuse the same Bearer token as the PUT above): + +``` +POST {{pageEndpoint}}/column +Authorization: Bearer +Content-Type: application/json + +{ + "label": "Column A", + "annotations": ["", ""] +} +``` + ## Fallback -When the direct PUT is impossible or returns non-2xx, emit the condensed payload below as the final code block of your report, in the reading-order sequence from step 2. The TPEN splitscreen tool expands each item into a full W3C Annotation before PUTting it — do not inline the canvas source, selector boilerplate, or motivation. It must be valid JSON (no comments, no placeholders — substitute the real coordinates and recognized text). +When HTTP PUT or POST is unavailable from the start, emit the condensed payload below as the final code block of your report, in the global reading-order sequence from step 2. The TPEN splitscreen tool expands each item into a full W3C Annotation before PUTting it — do not inline the canvas source, selector boilerplate, or motivation. It must be valid JSON (no comments, no placeholders — substitute the real coordinates and recognized text). Column creation is out of scope for this fallback. ``` { @@ -94,21 +112,21 @@ When the direct PUT is impossible or returns non-2xx, emit the condensed payload } ``` -One item per detected line. `target` is the bare selector value (no `#`, no `pixel:` prefix). `text` is an empty string for fully illegible lines — do not drop the item. Item order is the page's canonical reading order; do not interleave lines from different blocks. +One item per detected line, in the global reading-order sequence. `target` is the bare selector value (no `#`, no `pixel:` prefix). `text` is an empty string for fully illegible lines — do not drop the item. ## Completion -Direct PUT path, report: +Direct path, report: -- operation: `PUT page` -- target: {{pageEndpoint}} -- counts: lines saved, lines with non-empty text, lines flagged uncertain, text blocks detected +- operations: `PUT page`, `POST column` (×N) +- target: {{pageEndpoint}} (page) and {{pageEndpoint}}/column +- counts: lines saved, lines with non-empty text, lines flagged uncertain, columns created +- whether lines were saved even if a column POST failed (partial success is acceptable — describe what persists) - notable ambiguities worth a human review Fallback path, report: - path: `fallback` -- counts: lines in payload, lines with non-empty text, lines flagged uncertain, text blocks detected -- HTTP status and error body if a PUT was attempted first +- counts: lines in payload, lines with non-empty text, lines flagged uncertain - notable ambiguities worth a human review - final code block: the condensed `{ "items": [...] }` JSON for the user to paste diff --git a/templates/detect-order-and-transcribe/index.js b/templates/detect-order-and-transcribe/index.js index 255c5cb..911f7cc 100644 --- a/templates/detect-order-and-transcribe/index.js +++ b/templates/detect-order-and-transcribe/index.js @@ -1,9 +1,9 @@ /** - * @file Template: "Detect lines + order + transcribe → PUT page". + * @file Template: "Detect columns + lines + transcribe → PUT page, POST columns". * - * Combines the multi-block reading-order detection from - * detect-columns-and-lines with the handwriting recognition from - * detect-and-transcribe, without creating column annotations. + * Combines the multi-block reading-order and column creation from + * detect-columns-and-lines with the text recognition from + * detect-and-transcribe. * * @author thehabes */ @@ -13,7 +13,7 @@ import { buildTemplateContext } from '../inject-context.js' /** @type {import('../../prompt-generator.js').PromptTemplate} */ export const detectOrderAndTranscribeTemplate = { id: 'detect-order-and-transcribe', - label: 'Line Detection + Ordering + Transcription', + label: 'Line Detection + Column Grouping + Transcription', templateUrl: new URL('./PROMPT.md', import.meta.url), buildContext: buildTemplateContext } diff --git a/templates/inject-context.js b/templates/inject-context.js index b21ab71..391461d 100644 --- a/templates/inject-context.js +++ b/templates/inject-context.js @@ -4,13 +4,13 @@ * Every template consumes a superset of flat `{{name}}` variables produced by * `buildTemplateContext`. Individual templates only reference the subset they * need in their PROMPT.md body — unused keys simply don't render. Templates - * that need richer context (e.g. existing column listings) spread this result - * and layer their own keys on top. + * that need richer context (e.g. an `existingLines` listing) spread this + * result and layer their own keys on top. * * @author thehabes */ -import { getIRI, parseXywh, trailingId } from '../iiif-ids.js' +import { getIRI, parseXywh } from '../iiif-ids.js' /** * Pull the first image body URL off a IIIF canvas, or null if none is present. @@ -72,31 +72,41 @@ export function buildTemplateContext(ctx) { * The common case, so it's worth the shorter display. * - `body=` — anything else; echo the JSON verbatim. * - * Existing TPEN line bodies are expected to always carry `type`, `value`, and - * `format`. The `text=` round-trip reconstruction sets `format: "text/plain"`, - * so `only.format === 'text/plain'` is a strict match — any other shape (no - * format, different format, multiple bodies, non-`TextualBody`) drops to - * `body=` to preserve fidelity on the PUT echo. + * Bodies arrive in several shapes: empty (`null`/`undefined`/`""`/`[]`), an + * array of body entries, or a single body object (not wrapped). The unwrapped + * shape comes from `Line.updateText` after a PATCH: it sets + * `this.body = { type, value, format, language }` directly, so PATCHed lines + * round-trip through RERUM as `{type, value, format}` and would otherwise be + * misread as empty. + * + * The `text=` round-trip reconstruction sets `format: "text/plain"`, so + * `format === 'text/plain'` is a strict match — any other shape (no format, + * different format, multiple bodies, non-`TextualBody`) drops to `body=` + * to preserve fidelity on the PUT echo. * @param {any} body an annotation `body` value. * @returns {string} */ function formatBody(body) { - if (!Array.isArray(body) || body.length === 0) return 'body=[]' - if (body.length === 1) { - const only = body[0] - // Require EXACTLY {type, value, format} with the expected values so the - // `text=` → `[{type, value, format}]` round-trip is lossless. Any extra - // field (e.g. `language`, `creator`, `id`) would be silently dropped on - // the PUT echo and trigger a needless RERUM re-version. - const keys = only && typeof only === 'object' ? Object.keys(only) : [] - const isPlainTextual = - keys.length === 3 + if (body === null || body === undefined || body === '') return 'body=[]' + // Require EXACTLY {type, value, format} with the expected values so the + // `text=` → `[{type, value, format}]` round-trip is lossless. Any extra + // field (e.g. `language`, `creator`, `id`) would be silently dropped on + // the PUT echo and trigger a needless RERUM re-version. + const isPlainTextual = (entry) => { + const keys = entry && typeof entry === 'object' && !Array.isArray(entry) + ? Object.keys(entry) : [] + return keys.length === 3 && keys.every(k => k === 'type' || k === 'value' || k === 'format') - && only.type === 'TextualBody' - && typeof only.value === 'string' - && only.format === 'text/plain' - if (isPlainTextual) return `text=${JSON.stringify(only.value)}` + && entry.type === 'TextualBody' + && typeof entry.value === 'string' + && entry.format === 'text/plain' + } + if (Array.isArray(body)) { + if (body.length === 0) return 'body=[]' + if (body.length === 1 && isPlainTextual(body[0])) return `text=${JSON.stringify(body[0].value)}` + return `body=${JSON.stringify(body)}` } + if (isPlainTextual(body)) return `text=${JSON.stringify(body.value)}` return `body=${JSON.stringify(body)}` } @@ -125,24 +135,3 @@ export function formatExistingLines(fetchedPage) { return `- ${lineUri} | ${xywh} | ${formatBody(item?.body)}` }).join('\n') } - -/** - * Render the current column state for a given page as a markdown bullet list. - * Used by templates that must avoid duplicate column labels. Columns live on - * `project.layers[].pages[]`; the `/resolved` page endpoint does not emit - * them, so the project graph is the only source. - * @param {any} project the TPEN project object. - * @param {any} page the page object returned by `fetchPageResolved`. - * @returns {string} - */ -export function formatExistingColumns(project, page) { - const tail = trailingId(page) - const projectPage = (project?.layers ?? []) - .flatMap(l => l.pages ?? []) - .find(pg => trailingId(pg) === tail) - const cols = projectPage?.columns ?? [] - if (cols.length === 0) { - return '- (No existing columns on this page — labels must be unique when created.)' - } - return cols.map(c => `- ${c.label ?? '(unlabeled)'}`).join('\n') -} diff --git a/templates/transcribe-known-lines/PROMPT.md b/templates/transcribe-known-lines/PROMPT.md index 0d0c9dc..3641277 100644 --- a/templates/transcribe-known-lines/PROMPT.md +++ b/templates/transcribe-known-lines/PROMPT.md @@ -11,18 +11,18 @@ You are assisting with TPEN manuscript transcription. Perform the task end-to-en ## Existing lines -Each entry is ` | xywh= | ` in canvas coordinates. The body form is `body=[]` (empty), `text=""` (single plain-text `TextualBody`), or `body=` (anything else) — use it as context for what's already on the line. The fallback payload re-uses the full annotation URI verbatim as the `id` of each item; the splitscreen tool rebuilds each existing target from the hydrated page before PUTting it, and updates only the body text. +Each entry is ` | xywh= | ` in canvas coordinates. The body form (`body=[]`, `text=""`, or `body=`) is the line's current transcription — see "Rules" for when to keep it vs. replace it. The direct PUT and the fallback both re-use each entry's URI verbatim as the item `id`; the direct PUT additionally rebuilds `target` from the entry's `xywh` selector (see "TPEN API" below). The new transcription replaces the prior body in both paths. {{existingLines}} ## Preconditions -All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions, existing-line list) are provided above. This template only revises existing lines: `lineCount` = `{{lineCount}}`. If `lineCount` is `0`, stop immediately and report. +All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions, existing-line list) are provided above. This template only revises existing lines: `lineCount` = `{{lineCount}}`. If `lineCount` is `0`, stop immediately and report — this prompt must not create lines. You must have: 1. Vision capability: fetch each line's region as image bytes (e.g. via a IIIF region URL) and read the inked glyphs directly. A fetcher that returns only a prose description of the image does not qualify, and any preview rendered back into chat is downsampled — do not transcribe from a preview. **If you cannot read image bytes directly with the capabilities already available to you, stop now and return a failure report naming the missing capability.** This precondition is hard — fallback does not rescue missing vision. -2. Either HTTP PATCH capability (with `Content-Type: text/plain`), or the ability to emit a fallback JSON code block in your report. If HTTP PATCH is not available, skip straight to the Fallback section — do not retry. +2. Either HTTP PUT capability (with `Content-Type: application/json`), or the ability to emit a fallback JSON code block in your report. If HTTP PUT is not available, skip straight to the Fallback section — do not retry. Use only tools already available in your environment. Do not install packages, libraries, or system utilities. @@ -34,34 +34,54 @@ Use only tools already available in your environment. Do not install packages, l - `pixel_w = round(canvas_w * img_w / {{canvasWidth}})` - `pixel_h = round(canvas_h * img_h / {{canvasHeight}})` Crop each line region and verify it visibly contains a single line of inked text. -2. Run handwriting text recognition over each crop. Apply the recognition rules below. -3. If HTTP PATCH is available, PATCH the text to each line's line-text endpoint — one PATCH per line in the "Existing lines" list. On any non-2xx, record the status and continue with the remaining lines. If every PATCH returned non-2xx, treat PATCH as unavailable and proceed to step 4. If HTTP PATCH is unavailable from the start, skip directly to step 4. -4. If you reached this step because PATCH was unavailable or every attempt failed, emit the condensed payload under **Fallback** as the final code block. -5. Report counts (lines updated, lines flagged illegible, lines failed) and which path was used. +2. Run text recognition (print or handwriting) over each crop. Apply the recognition rules below. +3. If HTTP PUT is available, build a single page PUT body whose `items` array contains one entry per existing line, in the same order as the "Existing lines" list. Each item is shaped as in "TPEN API" below; set `body` per the confidence ladder in "Rules". Send one PUT to `{{pageEndpoint}}`. On non-2xx, stop and report the status — do not emit a fallback payload; the same token and content would be re-submitted through it. +4. If HTTP PUT is unavailable from the start, emit the condensed payload under **Fallback** as the final code block — do not also attempt PUT. +5. Report counts (lines submitted, lines flagged illegible) and which path was used. ## Rules - Prioritize diplomatic transcription over normalization. Preserve orthography and punctuation as observed. - Use explicit uncertainty markers for unclear glyphs (for example `[a?]`). Do not force certainty. - Do not invent expansions. If an abbreviation mark is present, transcribe the mark; do not silently expand. -- Keep line segmentation stable — one transcription string per existing line annotation. -- If a line's crop is illegible, send an empty body (direct) or emit `"text": ""` (fallback) and report the line id as unresolved — do not fabricate text. In the fallback payload, do not drop the item. +- Confidence ladder per line: + 1. Confident reading can overwrite existing line text. + 2. Unconfident reading uses "Existing lines" (echo the prior `text=` or `body=` value verbatim). + 3. `body: []` (direct) / `"text": ""` (fallback), only if the line was already empty (`body=[]`). Do not fabricate text. Report any line that fell back to existing text or to empty. + +> Do not drop the item in either path: the direct PUT treats omitted line ids as deletions and updates columns to remove them. ## TPEN API -Update one line's text via PATCH with a plain-text body. `` is the trailing path segment of the annotation URI listed above (the last `/`-separated segment). +Update every line in a single page PUT. Each `items` entry re-uses an existing annotation URI verbatim as `id`, rebuilds `target` from that line's `xywh` selector, and sets `body` per the confidence ladder in "Rules": ``` -PATCH {{pageEndpoint}}/line//text +PUT {{pageEndpoint}} Authorization: Bearer {{token}} -Content-Type: text/plain +Content-Type: application/json - +{ + "items": [ + { + "id": "", + "body": [{ "type": "TextualBody", "value": "", "format": "text/plain" }], + "target": { + "source": "{{canvasId}}", + "type": "SpecificResource", + "selector": { + "type": "FragmentSelector", + "conformsTo": "http://www.w3.org/TR/media-frags/", + "value": "xywh=x,y,w,h" + } + } + } + ] +} ``` ## Fallback -The fallback tool only accepts JSON, so it uses a single page-level PUT instead of per-line PATCH. When PATCH is unavailable or every attempt returned non-2xx, emit the condensed payload below as the final code block of your report. The TPEN splitscreen tool re-uses each line's existing target from the hydrated page context before PUTting it — the item's `id` must match an entry in "Existing lines" above. +The fallback tool only accepts a condensed payload — re-using URIs but not full targets. When PUT is unavailable from the start, emit the payload below as the final code block of your report. The TPEN splitscreen tool re-uses each line's existing target from the hydrated page context before PUTting it — the item's `id` must match an entry in "Existing lines" above. ``` { @@ -71,19 +91,19 @@ The fallback tool only accepts JSON, so it uses a single page-level PUT instead } ``` -There must be exactly one item per entry in "Existing lines", each re-using that entry's annotation URI verbatim as its `id`. Item order must match the order of "Existing lines" — do not reorder. `text` is an empty string for fully illegible lines — do not drop the item. It must be valid JSON (no comments, no placeholders). +There must be exactly one item per entry in "Existing lines". Item order must match the order of "Existing lines" — do not reorder. Set each `text` per the confidence ladder in "Rules". It must be valid JSON (no comments, no placeholders). ## Completion -Direct PATCH path, report: +Direct PUT path, report: -- operation: `PATCH line text` -- target: {{pageEndpoint}}/line//text per line -- counts: lines updated, lines flagged illegible, lines failed (with HTTP status per failure) +- operation: `PUT page` +- target: {{pageEndpoint}} +- counts: lines submitted, lines flagged illegible +- HTTP status of the PUT Fallback path, report: - path: `fallback` - counts: lines in payload, lines flagged illegible -- HTTP status and error body if a PATCH was attempted first - final code block: the condensed `{ "items": [...] }` JSON for the user to paste diff --git a/templates/transcribe-known-lines/index.js b/templates/transcribe-known-lines/index.js index b72b0a3..03be90f 100644 --- a/templates/transcribe-known-lines/index.js +++ b/templates/transcribe-known-lines/index.js @@ -1,5 +1,5 @@ /** - * @file Template: "Transcribe existing lines → PATCH line text". + * @file Template: "Transcribe existing lines → page PUT". * * Targets workflow #1 from the absorbed cubap `_tools/COMMON_TASKS.md`: * Text Recognition Within Known Bounds. diff --git a/ui-manager.js b/ui-manager.js index 4b9aeab..1d50b95 100644 --- a/ui-manager.js +++ b/ui-manager.js @@ -206,6 +206,8 @@ export class UIManager { #feedbackTimer = null /** Fallback-panel submit button; toggled by `updateToken`. */ #fallbackSubmit = null + /** Security warning text node; rewritten by `updateToken` when consent arrives. */ + #warningText = null /** * @param {string} [rootId='app'] id of the element to render into. @@ -318,9 +320,11 @@ export class UIManager { if (k === 'Line') this.#lineMetaValue = dd } + const warningSpan = el('span', { text: warningText(token) }) + this.#warningText = warningSpan const warning = el('div', { class: 'warning', attrs: { role: 'note' } }, [ el('strong', { text: 'Security: ' }), - el('span', { text: `The generated prompt carries your TPEN session token so an agentic LLM can manipulate your TPEN data on your behalf. Clicking 'Copy' writes the full token to your clipboard. Only paste it into LLM environments you trust.` }) + warningSpan ]) this.#authButton = null @@ -579,6 +583,7 @@ export class UIManager { this.#authButton.remove() this.#authButton = null } + if (this.#warningText) this.#warningText.textContent = warningText(token) const { projectID, pageID } = this.state if (this.#generateBtn) this.#generateBtn.disabled = !pageID if (this.#fallbackSubmit) { @@ -697,6 +702,19 @@ function truncateToken(token) { return `${token.slice(0, 10)}…${token.slice(-10)}` } +/** + * Pick the security-warning body for the given token state. Pre-consent the + * warning explains what consent will mean; once a token is held it shrinks to + * the operative reminders. + * @param {string|null|undefined} token + * @returns {string} + */ +function warningText(token) { + return token + ? `Only paste the prompt into LLM environments you trust. You can log out of TPEN to invalidate a leaked token.` + : `The generated prompt carries your TPEN session token so an agentic LLM can manipulate your TPEN data on your behalf. Clicking 'Copy' writes the full token to your clipboard. Only paste the prompt into LLM environments you trust. You can log out of TPEN to invalidate a leaked token.` +} + /** * Fallback reload target when the parent didn't forward `parentUrl` via * `TPEN_CONTEXT`. Minted from the parent origin (taken from