Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
5d94a89
prompt tuning pass 1
thehabes Apr 21, 2026
8e644e2
Change their selection labels
thehabes Apr 21, 2026
d4b4c82
cleanup while testing
thehabes Apr 21, 2026
92f596a
prompt tuning round 2
thehabes Apr 21, 2026
0c42a66
accuracy bundle
thehabes Apr 22, 2026
a36c1a1
Tuning round 3
thehabes Apr 22, 2026
b5d8ff9
Changes from review
thehabes Apr 22, 2026
ce1d9c2
Now that's a demo
thehabes Apr 22, 2026
d565c74
Now that's a demo
thehabes Apr 22, 2026
9af6429
Now that's a demo
thehabes Apr 22, 2026
0255000
Order principle applied. New prompt to transcribe and include orderi…
thehabes Apr 22, 2026
1c14e54
Restore JSON-paste fallback submission flow
thehabes Apr 23, 2026
94a981b
recovering
thehabes Apr 23, 2026
d999104
recovering
thehabes Apr 23, 2026
a9ad8bb
changes during review
thehabes Apr 23, 2026
99623b1
changes during review
thehabes Apr 23, 2026
6114cd6
changes during review
thehabes Apr 23, 2026
31a2983
changes during review
thehabes Apr 23, 2026
f85b135
changes during review
thehabes Apr 23, 2026
68b9c40
condensed communication
thehabes Apr 23, 2026
26127d7
condensed communication
thehabes Apr 23, 2026
4977c7f
condensed communication
thehabes Apr 23, 2026
e5da7a8
changes from review
thehabes Apr 23, 2026
59d7363
condense
thehabes Apr 23, 2026
37643a4
changes during review
thehabes Apr 24, 2026
ac4b2aa
this is not right
thehabes Apr 24, 2026
3863853
this is not right
thehabes Apr 24, 2026
b7ec4df
this is right
thehabes Apr 24, 2026
b72f85c
changes during review
thehabes Apr 24, 2026
8682b35
small hack to get the transcription interface to refresh when the fal…
thehabes Apr 24, 2026
12202c1
Merge pull request #6 from CenterForDigitalHumanities/restore-api-usa…
thehabes Apr 24, 2026
b3074fc
Changes during review
thehabes Apr 24, 2026
48eb2d3
lock it in
thehabes Apr 24, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 0 additions & 20 deletions auth.js
Original file line number Diff line number Diff line change
Expand Up @@ -99,23 +99,3 @@ export function persistToken(token) {
localStorage.setItem(TOKEN_KEY, token)
return token
}

/**
* Extract the agent IRI from a TPEN JWT.
*
* Mirrors tpen3-interfaces/components/iiif-tools/index.js:getAgentIRIFromToken.
* The agent IRI lives in a custom claim whose key ends with `/agent`
* (typically `http://store.rerum.io/agent`).
* @param {string} token
* @returns {string|null}
*/
export function getAgentIRIFromToken(token) {
try {
const decoded = decodeJwt(token)
if (!decoded || typeof decoded !== 'object') return null
const key = Object.keys(decoded).find(k => k.endsWith('/agent')) || 'http://store.rerum.io/agent'
return decoded[key] ?? null
} catch {
return null
}
}
32 changes: 32 additions & 0 deletions iiif-ids.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,35 @@ export function trailingId(value) {
const parts = String(iri).split('/').filter(Boolean)
return parts.pop() ?? null
}

/**
* Pull a Media Fragments `xywh=…` selector value out of any of the target
* shapes that flow through this app:
*
* - W3C `SpecificResource` object with `selector.value` (or `selector[0].value`
* when the selector is wrapped in an array).
* - Bare string target like `"<canvasIRI>#xywh=10,20,300,40"` — historical
* annotations stored this way still show up in hydrated pages.
* - Already-bare selector like `"xywh=10,20,300,40"` — the shape prompts emit
* in condensed fallback payloads.
*
* Returns the full `"xywh=…"` form (suitable for a `FragmentSelector.value`)
* or `null` if no selector is present. Strips the non-standard `pixel:`
* prefix that Annotorious produces.
* @param {any} target a target value: string, `SpecificResource`, or nullish.
* @returns {string|null}
*/
export function parseXywh(target) {
if (typeof target === 'string') {
if (!target.includes('xywh=')) return null
return target.slice(target.indexOf('xywh=')).replace(/^xywh=pixel:/, 'xywh=')
}
if (target && typeof target === 'object') {
const sel = target.selector
const value = Array.isArray(sel) ? sel[0]?.value : sel?.value
if (typeof value === 'string' && value.includes('xywh=')) {
return value.slice(value.indexOf('xywh=')).replace(/^xywh=pixel:/, 'xywh=')
}
}
return null
}
21 changes: 18 additions & 3 deletions main.js
Original file line number Diff line number Diff line change
Expand Up @@ -110,14 +110,29 @@ export class PromptsApp {
return
}

// Hydrated items carry more than `{id, type}`; unhydrated vault refs
// carry only those two keys.
const isHydratedItem = it => it && typeof it === 'object' &&
(it.target !== undefined || it.body !== undefined || it.motivation !== undefined)

const isPageHydrated = p =>
Array.isArray(p?.items) &&
(p.items.length === 0 || isHydratedItem(p.items[0]))

const isProjectHydrated = p =>
Array.isArray(p?.layers) &&
(p.layers.length === 0 || Array.isArray(p.layers[0]?.pages))

// Upgrade a stub project (no layers) when we have a token.
if (!project.layers && this.token) {
if (this.token && !isProjectHydrated(project)) {
console.warn('[tpen-prompts] refetching project — parent sent unhydrated payload')
try { project = await fetchProject(projectID, this.token) } catch (err) { console.warn('fetchProject failed', err) }
}

const pageID = page ? (trailingId(page) ?? '') : ''
// Upgrade a stub page (no items) when we have a token and a pageID.
if (page && !Array.isArray(page.items) && this.token && pageID) {
// Upgrade a stub page (unhydrated items) when we have a token and a pageID.
if (this.token && pageID && !isPageHydrated(page)) {
console.warn('[tpen-prompts] refetching page — parent sent unhydrated payload')
try { page = await fetchPageResolved(projectID, pageID, this.token) ?? page } catch (err) { console.warn('fetchPageResolved failed', err) }
}

Expand Down
2 changes: 2 additions & 0 deletions prompt-generator.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import { detectColumnsTemplate } from './templates/detect-columns/index.js'
import { detectLinesTemplate } from './templates/detect-lines/index.js'
import { detectColumnsAndLinesTemplate } from './templates/detect-columns-and-lines/index.js'
import { detectAndTranscribeTemplate } from './templates/detect-and-transcribe/index.js'
import { detectOrderAndTranscribeTemplate } from './templates/detect-order-and-transcribe/index.js'

/**
* @typedef {object} PromptTemplate
Expand Down Expand Up @@ -42,6 +43,7 @@ register(detectColumnsTemplate)
register(detectLinesTemplate)
register(detectColumnsAndLinesTemplate)
register(detectAndTranscribeTemplate)
register(detectOrderAndTranscribeTemplate)

/**
* Fetch every registered template's markdown body once and cache it. Must be
Expand Down
25 changes: 24 additions & 1 deletion styles.css
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,8 @@ button:hover:not(:disabled) { filter: brightness(1.05); }
button:focus-visible,
input:focus-visible,
select:focus-visible,
textarea:focus-visible {
textarea:focus-visible,
summary:focus-visible {
outline: 2px solid var(--accent);
outline-offset: 2px;
}
Expand Down Expand Up @@ -163,3 +164,25 @@ textarea:focus-visible {
.tool-header {
text-align: center;
}

.fallback {
margin-top: 1rem;
border: 1px solid var(--border);
border-radius: 4px;
padding: 0.5rem 0.75rem;
background: var(--surface);
}
.fallback summary {
cursor: pointer;
font-size: 0.9rem;
color: var(--muted);
padding: 0.25rem 0;
}
.fallback summary:hover,
.fallback summary:focus-visible { color: var(--text); }
.fallback .hint {
margin: 0.5rem 0;
font-size: 0.85rem;
color: var(--muted);
}
.fallback textarea { margin-top: 0.25rem; }
87 changes: 41 additions & 46 deletions templates/detect-and-transcribe/PROMPT.md
Original file line number Diff line number Diff line change
@@ -1,46 +1,49 @@
# Task: detect and transcribe every text line on a TPEN3 page end-to-end

You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services.
You are assisting with TPEN manuscript transcription. Perform the task end-to-end and stop only when the result has been persisted via TPEN Services (direct) or emitted as a fallback JSON payload for the user to paste.

## Context

- Project: {{projectID}}
- Page: {{pageID}}
- Canvas: {{canvasId}}
- Canvas Dimensions: {{canvasWidth}} × {{canvasHeight}}
- Image: {{imageUrl}}
- Manifest: {{manifestUri}}
- User Agent URI: {{userAgentURI}}
- Page endpoint: {{pageEndpoint}}

## Preconditions

1. Required context present: `projectID`, `pageID`, `canvasId`, `{{token}}`. If any is missing, stop and report.
2. Vision capability: you must be able to load the page image as raw bytes, measure pixel coordinates, and crop/inspect per-line regions.
3. Authorization: `{{token}}` must be usable for PUT against the page endpoint (and optionally PATCH for per-line text updates).
4. HTTP PUT capability with `Content-Type: application/json`; optional PATCH with `Content-Type: text/plain`.
All required inputs (`canvasId`, `token`, `pageEndpoint`, `imageUrl`, canvas dimensions) are provided above. You must have:

If any precondition fails, stop and return a concise failure report.
1. Ability to fetch the image bytes (or a derivative) and identify line bounds and text from them. Precise pixel measurement is preferred when available; visual estimation and on-sight transcription from the fetched image are acceptable otherwise.
2. Either HTTP PUT capability with `Content-Type: application/json`, or the ability to emit the payload as a fallback JSON code block in your report. If HTTP PUT is not available, skip straight to the Fallback section — do not retry.

Use only tools already available in your environment. Do not install packages, libraries, or system utilities.

## Steps

1. Resolve canvas dimensions. Use `{{canvasWidth}}`/`{{canvasHeight}}` when numeric. Otherwise GET `{{canvasId}}` and read `width`/`height`. If that fails, GET `{{manifestUri}}` and find the matching canvas in `items` by id.
2. Fetch the page image. Detect every text line in reading order.
3. For each line, measure a bounding box and convert to integer canvas coordinates. Clamp to the canvas and round.
1. Resolve `img_w`, `img_h`. If `{{imageUrl}}` looks like a IIIF Image API endpoint (path matches `…/{region}/{size}/{rotation}/{quality}.{format}`), strip that suffix to get `{base}`, then GET `{base}/info.json` for the dimensions. For the page-overview pass, fetch a small derivative `{base}/full/1500,/0/default.jpg` and scale measured coordinates back via `source = derivative * info.width / 1500`. If you have precise pixel tooling and want tighter bounds or a clearer crop for transcription, request a region server-side as `{base}/x,y,w,h/max/0/default.jpg` and add the crop's `x,y` origin back before applying the canvas conversion below. Otherwise GET `{{imageUrl}}` once and read dimensions from the bytes.
2. Detect text lines across the whole page in reading order. This task does not create TPEN columns.
3. For every line, measure a bounding box in image-pixel space and convert to integer canvas coordinates using:
- `canvas_x = round(pixel_x * {{canvasWidth}} / img_w)`
- `canvas_y = round(pixel_y * {{canvasHeight}} / img_h)`
- `canvas_w = round(pixel_w * {{canvasWidth}} / img_w)`
- `canvas_h = round(pixel_h * {{canvasHeight}} / img_h)`
Then clamp to the canvas (`0 ≤ x`, `x + w ≤ {{canvasWidth}}`, `0 ≤ y`, `y + h ≤ {{canvasHeight}}`).
4. Run handwriting text recognition on each line's crop. Apply the recognition rules below.
5. Build one Annotation per line with the recognized text as the `TextualBody` value and `xywh=x,y,w,h` (integer canvas coordinates).
6. PUT the full set of line annotations to the page endpoint in a single request.
7. Optionally PATCH a specific line's text afterward if a recognition result needs a later revision (see TPEN API).
8. Report counts (lines saved) and notable ambiguities (e.g., illegible lines transcribed as empty or flagged).
5. If HTTP PUT is available, build the full payload under **TPEN API** — one Annotation per line with the recognized text and `xywh=x,y,w,h` selector — and send the request once. On any non-2xx response, do not retry — fall back.
6. If HTTP PUT is unavailable (or step 5 fell back), emit the condensed payload under **Fallback** as the final code block.
7. Report counts (lines saved/in payload, non-empty text, uncertain) and which path was used (direct PUT or fallback).
8. Report notable ambiguities (e.g., illegible lines transcribed as empty or flagged).

## Rules

### Detection (IMAGE_ANALYSIS)

- Preserve reading order. Prefer high recall for likely text lines over aggressive pruning.
- Keep line boxes tight but do not clip ascenders/descenders.
- Flag ambiguous regions in the report rather than silently dropping them.
- Bounds MUST be saved as integer coordinates in canvas space. No percent, no `pixel:` prefix on the selector value.
- Preserve reading order across the whole page.
- Prefer tight bounds when you can measure them; best-effort bounds are acceptable. When uncertain whether a tall run is one line or several, prefer splitting over merging.
- Do not include decorative borders, frame rules, ornaments, or illustrations as part of a line.
- Completion beats refusal: approximate bounds on most lines are more useful than nothing — this data will be reviewed and corrected downstream.
- Zero lines detected is an unprocessable outcome. Stop and report — do not PUT, do not emit a fallback payload. An empty `items` array would erase every existing annotation on the page.

### Recognition (HANDWRITING_TEXT_RECOGNITION)

Expand All @@ -53,7 +56,7 @@ If any precondition fails, stop and return a concise failure report.

## TPEN API

Save every detected line with its transcription in a single PUT:
Save every detected line with its transcription in a single PUT. The `items` array must contain one annotation per detected line; replace `x,y,w,h` with the integer canvas coordinates computed in step 3, and `<recognized line text>` with the recognized text (empty string for fully illegible lines).

```
PUT {{pageEndpoint}}
Expand All @@ -63,8 +66,6 @@ Content-Type: application/json
{
"items": [
{
"type": "Annotation",
"@context": "http://www.w3.org/ns/anno.jsonld",
"body": [{ "type": "TextualBody", "value": "<recognized line text>", "format": "text/plain" }],
"target": {
"source": "{{canvasId}}",
Expand All @@ -81,39 +82,33 @@ Content-Type: application/json
}
```

Optional per-line text revision after the PUT:
## Fallback

```
PATCH {{pageEndpoint}}/line/<lineId>/text
Authorization: Bearer {{token}}
Content-Type: text/plain
When the direct PUT is impossible or returns non-2xx, emit the condensed payload below as the final code block of your report. The TPEN splitscreen tool expands each item into a full W3C Annotation before PUTting it — do not inline the canvas source, selector boilerplate, or motivation. It must be valid JSON (no comments, no placeholders — substitute the real coordinates and recognized text).

<updated transcription text>
```

Error handling (both calls):

```javascript
if (!response.ok) {
throw new Error(`TPEN API ${response.status}: ${await response.text()}`)
{
"items": [
{ "text": "<recognized line text>", "target": "xywh=x,y,w,h" }
]
}
```

One item per detected line, in reading order. `target` is the bare selector value (no `#`, no `pixel:` prefix). `text` is an empty string for fully illegible lines — do not drop the item.

## Completion

On success, report:
Direct PUT path, report:

- operation: `PUT page` (plus any follow-up `PATCH line text`)
- target: `{{pageEndpoint}}`
- operation: `PUT page`
- target: {{pageEndpoint}}
- counts: lines saved, lines with non-empty text, lines flagged uncertain
- notable ambiguities worth a human review

On failure, report:

- the failing stage (image fetch, detection, recognition, PUT, or PATCH)
- HTTP status and error body
- recommended next step
Fallback path, report:

## Fallback

If vision / write capability is missing, do not fabricate geometry or transcriptions. Report what is missing and stop.
- path: `fallback`
- counts: lines in payload, lines with non-empty text, lines flagged uncertain
- HTTP status and error body if a PUT was attempted first
- notable ambiguities worth a human review
- final code block: the condensed `{ "items": [...] }` JSON for the user to paste
2 changes: 1 addition & 1 deletion templates/detect-and-transcribe/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import { buildTemplateContext } from '../inject-context.js'
/** @type {import('../../prompt-generator.js').PromptTemplate} */
export const detectAndTranscribeTemplate = {
id: 'detect-and-transcribe',
label: 'Auto Main Content Detection + Auto Transcription',
label: 'Line Detection + Transcription',
templateUrl: new URL('./PROMPT.md', import.meta.url),
buildContext: buildTemplateContext
}
Loading
Loading