-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconverter.html
More file actions
401 lines (343 loc) · 16.9 KB
/
converter.html
File metadata and controls
401 lines (343 loc) · 16.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" translate="no" class="notranslate" lang="en">
<head>
<title>MANO - Converter | Convert and Merge Transcriptions</title>
<meta name="keywords" content="PAGE-XML, TEI-XML, converter, manuscripts, digital humanities, transcription, MANO">
<meta name="description" content="Convert and merge multiple PAGE-XML transcription files into a unified TEI-XML manuscript file compatible with the MANO Transcription Viewer.">
<meta charset="UTF-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1.0 shrink-to-fit=no"/>
<meta name="google" content="notranslate"/>
<link rel="canonical" href="https://mano-project.github.io/converter.html"/>
<link rel="icon" type="image/png" href="images/mano-logo_nuovoFont.png">
<link rel="alternate" hreflang="en" href="URL">
<meta property="og:type" content="website">
<meta property="og:title" content="MANO – Converter">
<meta property="og:description" content="Convert multiple PAGE-XML files into a unified TEI-XML manuscript transcription using MANO’s online converter.">
<meta property="og:image" content="https://mano-project.github.io/images/mano-logo_nuovoFont.png">
<meta property="og:url" content="https://mano-project.github.io/converter.html">
<script type="application/ld+json">
{
"@context": "https://schema.org",
"@type": "WebApplication",
"name": "MANO – XML Converter",
"url": "https://mano-project.github.io/converter.html",
"applicationCategory": "Data Conversion Tool",
"description": "An online converter for merging multiple PAGE-XML files into a single TEI-XML transcription compatible with the MANO Transcription Viewer.",
"creator": {
"@type": "Person",
"name": "Michela Parma",
"affiliation": "University of Mainz"
}
"inLanguage": "en"
}
</script>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/css/bootstrap.min.css" rel="stylesheet">
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/js/bootstrap.bundle.min.js"></script>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.3/font/bootstrap-icons.css">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.0/css/all.min.css">
<link rel="stylesheet" type="text/css" href="css/style.css" media="screen"/>
<style>
html, body {
height: 100%;
margin: 0;
display: flex;
flex-direction: column;
}
.container {
flex: 1; /*Push footer down */
}
footer.footer {
margin-top: auto; /*Always stick footer at bottom */
}
.dropzone {
border: 2px dashed #ccc;
padding: 2rem;
text-align: center;
border-radius: .5rem;
color: #888;
cursor: pointer;
}
.file-list { margin-top: 1rem; }
</style>
</head>
<body>
<nav class="navbar bg-body-tertiary ">
<div class="container-fluid d-flex justify-content-between align-items-center position-relative py-2">
<!-- Invisible spacer to balance layout -->
<div style="width: 80px;"></div>
<!-- Centered logo -->
<div class="position-absolute start-50 translate-middle-x text-center">
<a class="navbar-brand" href="index.html">
<img src="images/mano-logo_nuovoFont.png" alt="MANO project logo" width="65" class="d-inline-block align-text-top">
</a>
</div>
<!-- Offcanvas toggle aligned right -->
<button class="navbar-toggler" type="button" data-bs-toggle="offcanvas" data-bs-target="#offcanvasNavbar"
aria-controls="offcanvasNavbar" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div class="offcanvas offcanvas-end" tabindex="-1" id="offcanvasNavbar" aria-labelledby="offcanvasNavbarLabel">
<div class="offcanvas-header">
<h5 class="offcanvas-title" id="offcanvasNavbarLabel"><MANO></h5>
<button type="button" class="btn-close" data-bs-dismiss="offcanvas" aria-label="Close"></button>
</div>
<div class="offcanvas-body">
<ul class="navbar-nav justify-content-end flex-grow-1 pe-3">
<li class="nav-item"><a class="nav-link" href="index.html">Home</a></li>
<li class="nav-item"><a class="nav-link" href="resources.html">Resources</a></li>
<li class="nav-item"><a class="nav-link" href="editor.html">Metadata Editor</a></li>
<li class="nav-item"><a class="nav-link" href="collection.html">Metadata Collection</a></li>
<li class="nav-item"><a class="nav-link active" href="viewer.html">Transcription Viewer</a></li>
<li class="nav-item"><a class="nav-link" href="documentation.html">Documentation</a></li>
<li class="nav-item"><a class="nav-link" href="about.html">About</a></li>
<hr class="my-3">
<div class="text-center">
<h6 class="mb-3">Participate in shaping the future of <MANO></h6>
<a href="https://survey.zdv.uni-mainz.de/index.php/822542?newtest=Y&lang=en" class="btn btn-sm btn-outline-primary" target="_blank" rel="noopener">Take the user survey</a>
</div>
</ul>
</div>
</div>
</div>
</nav>
<div class="container mt-4 mb-3">
<button onclick="history.back()" class="btn back-btn btn-sm btn-outline-secondary" aria-label="Go back">Back</button>
<h1 class="page-title">Convert PAGE-XML into TEI-XML</h1>
<button type="button" class="btn btn-primary responsive-btn" data-bs-toggle="modal" data-bs-target="#infoConverter" aria-label="Open About Converter">
<i class="bi bi-info-circle"></i> About
</button>
<!-- About Converter -->
<div class="modal fade" id="infoConverter" tabindex="-1" aria-labelledby="infoConverterLabel" aria-hidden="true">
<div class="modal-dialog modal-lg">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title" id="infoConverterLabel">About Converter</h5>
<button type="button" class="btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
</div>
<div class="modal-body">
<p>The converter allows users to upload one or more <strong>PAGE-XML</strong> files and merge them into a single <strong>TEI-XML</strong> transcription. This workflow is designed for cases in which a manuscript transcription is divided across multiple XML files, as commonly produced by platforms such as Transkribus.</p>
<p>When multiple files are uploaded, the converter processes them in the order shown in the file list. Users may adjust the sequence using the <strong>Move up</strong> and <strong>Move down</strong> buttons to ensure that pages are merged in the correct order.</p>
<p>The resulting TEI-XML includes basic structural elements such as <strong><pb></strong> for page breaks and a simplified block of transcription text extracted from the PAGE-XML files. Once generated, the combined TEI-XML file can be downloaded and uploaded in the <strong>Transcription Viewer</strong> for inspection, visualisation, and further editing.</p>
</div>
<div class="modal-footer">
<button type="button" class="btn btn-secondary" data-bs-dismiss="modal" aria-label="Close">Close</button>
</div>
</div>
</div>
</div>
<p class="text-center mt-3">Upload one or more PAGE-XML files. The converter will merge them into a single TEI file.</p>
<!-- Upload area -->
<div class="dropzone" id="dropzone">
<p>Click or drag & drop PAGE-XML files here</p>
<input type="file" id="fileInput" accept=".xml" multiple hidden>
</div>
<!-- File list -->
<ul id="fileList" class="list-group file-list"></ul>
<!-- Convert button -->
<div class="text-center mt-3">
<button id="convertBtn" class="btn btn-success" disabled aria-label="Convert">Convert</button>
<a id="downloadBtn" class="btn btn-primary d-none" download="combined-transcription.xml">Download</a>
<a href="viewer.html" class="btn btn-outline-secondary d-none" id="viewBtn">Go to Transcription Viewer</a>
</div>
</div>
<footer class="footer bg-body-tertiary text-center py-4">
<div class="container">
<!-- Logo centered -->
<div class="mb-3">
<a class="navbar-brand" href="index.html">
<img src="images/mano-logo_nuovoFont.png" alt="MANO project logo" width="50">
</a>
</div>
<!-- Links centered in one line -->
<div class="mb-3">
<a class="footer-link mx-2" href="index.html">Home</a>
<a class="footer-link mx-2" href="resources.html">Resources</a>
<a class="footer-link mx-2" href="editor.html">Metadata Editor</a>
<a class="footer-link mx-2" href="collection.html">Metadata Collection</a>
<a class="footer-link mx-2" href="viewer.html">Transcription Viewer</a>
<a class="footer-link mx-2" href="documentation.html">Documentation</a>
<a class="footer-link mx-2" href="about.html">About</a>
</div>
<!-- Copyright centered -->
<div class="text-center mt-2">
<span>© 2025 <span class="mano"><MANO></span></span>
</div>
</div>
</footer>
<div class="py-1 text-center">
<small>
<span class="m-1">
<a href="imprint.html" style="color: black!important">Imprint & Privacy</a>
</span> |
<span class="m-1">
<a href="https://github.com/orgs/mano-project/repositories" target="_blank" style="color: black!important"><i class="fa-brands fa-github"></i> GitHub Repositories</a>
</span> |
<span class="m-1">
<a href="https://creativecommons.org/licenses/by-nc-sa/4.0/" target="_blank" style="color: black!important">CC BY-NC-SA 4.0</a><img src="https://mirrors.creativecommons.org/presskit/icons/cc.svg" alt="Creative Commons" style="max-width: 1em;max-height:1em;margin-left: .2em;"><img src="https://mirrors.creativecommons.org/presskit/icons/by.svg" alt="Attribution" style="max-width: 1em;max-height:1em;margin-left: .2em;"><img src="https://mirrors.creativecommons.org/presskit/icons/nc.svg" alt="Non Commercial" style="max-width: 1em;max-height:1em;margin-left: .2em;"><img src="https://mirrors.creativecommons.org/presskit/icons/sa.svg" alt="Share Alike" style="max-width: 1em;max-height:1em;margin-left: .2em;">
</span>
</small>
</div>
<script>
const dropzone = document.getElementById('dropzone');
const fileInput = document.getElementById('fileInput');
const fileList = document.getElementById('fileList');
const convertBtn = document.getElementById('convertBtn');
const downloadBtn = document.getElementById('downloadBtn');
const viewBtn = document.getElementById('viewBtn');
let selectedFiles = [];
// Handle dropzone click
dropzone.addEventListener('click', () => fileInput.click());
// Handle file input change
fileInput.addEventListener('change', (e) => handleFiles(e.target.files));
// Drag & drop
dropzone.addEventListener('dragover', (e) => {
e.preventDefault();
dropzone.style.borderColor = '#007bff';
});
dropzone.addEventListener('dragleave', () => {
dropzone.style.borderColor = '#ccc';
});
dropzone.addEventListener('drop', (e) => {
e.preventDefault();
dropzone.style.borderColor = '#ccc';
handleFiles(e.dataTransfer.files);
});
function handleFiles(files) {
selectedFiles = Array.from(files);
renderFileList();
convertBtn.disabled = selectedFiles.length === 0;
}
function renderFileList() {
fileList.innerHTML = '';
selectedFiles.forEach(f => {
const li = document.createElement('li');
li.className = 'list-group-item';
li.textContent = `${f.name} (${Math.round(f.size/1024)} KB)`;
fileList.appendChild(li);
});
}
// Main conversion logic
convertBtn.addEventListener('click', async () => {
const fileContents = [];
for (let f of selectedFiles) {
const text = await f.text();
fileContents.push({ name: f.name, content: text });
}
const tei = convertPageXMLtoTEI(fileContents);
const blob = new Blob([tei], { type: 'application/xml' });
const url = URL.createObjectURL(blob);
downloadBtn.href = url;
downloadBtn.classList.remove('d-none');
viewBtn.classList.remove('d-none');
// hide the convert button after conversion
convertBtn.classList.add('d-none');
});
// Converter function
function convertPageXMLtoTEI(files) {
let teiPages = [];
let pageNum = 1;
let creator = '', createdDate = '', lastChange = '', comments = '';
for (const file of files) {
const parser = new DOMParser();
const xmlDoc = parser.parseFromString(file.content, 'application/xml');
// Metadata
if (!creator) {
creator = xmlDoc.querySelector('Metadata > Creator')?.textContent || '';
createdDate = xmlDoc.querySelector('Metadata > Created')?.textContent || '';
lastChange = xmlDoc.querySelector('Metadata > LastChange')?.textContent || '';
comments = xmlDoc.querySelector('Metadata > Comments')?.textContent || '';
}
// Page info
const pageNode = xmlDoc.getElementsByTagName('Page')[0];
const facs = pageNode?.getAttribute('imageFilename') || `page${pageNum}.jpg`;
// Collect lines
const lines = xmlDoc.getElementsByTagName('TextLine');
let lineOutput = [];
let lineNum = 1;
for (let line of lines) {
// Grab ALL TextEquiv nodes in this line
const textEquivs = line.getElementsByTagName('TextEquiv');
if (textEquivs.length === 0) continue;
// The LAST TextEquiv is the full line text
const lastEquiv = textEquivs[textEquivs.length - 1];
const unicodeNode = lastEquiv.getElementsByTagName('Unicode')[0];
if (!unicodeNode) continue;
const fullLineText = unicodeNode.textContent.trim();
// Add <lb> + full line
lineOutput.push(`<lb n="${lineNum}"/>${fullLineText}`);
lineNum++;
}
teiPages.push(`
<pb n="${pageNum}" facs="${facs}"/>
<p>
${lineOutput.join('\n')}
</p>`);
pageNum++;
}
// Build combined TEI
return `<?xml version="1.0" encoding="UTF-8"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0">
<teiHeader>
<fileDesc>
<titleStmt>
<title>Combined Transcription</title>
</titleStmt>
<publicationStmt>
<p>XML-TEI generated from PAGE-XML using the converter tool in
<ref target="https://mano-project.github.io/">MANO</ref>.
</p>
</publicationStmt>
<sourceDesc>
<p>Source: Transkribus Export (Created: ${createdDate} | Last Change: ${lastChange})</p>
</sourceDesc>
</fileDesc>
</teiHeader>
<text>
<body>
${teiPages.join('\\n')}
</body>
</text>
</TEI>`;
}
//Adjust files position to be converted in the correct order
function renderFileList() {
fileList.innerHTML = '';
selectedFiles.forEach((f, index) => {
const li = document.createElement('li');
li.className = 'list-group-item d-flex justify-content-between align-items-center';
li.dataset.index = index;
li.innerHTML = `
<span>${f.name} (${Math.round(f.size/1024)} KB)</span>
<div>
<button class="btn btn-sm btn-outline-secondary move-up" aria-label="Move up">▲</button>
<button class="btn btn-sm btn-outline-secondary move-down" aria-label="Move downl">▼</button>
</div>
`;
fileList.appendChild(li);
});
// Attach up/down button functionality
fileList.querySelectorAll('.move-up').forEach(btn => {
btn.addEventListener('click', (e) => {
const li = e.target.closest('li');
const index = parseInt(li.dataset.index);
if (index > 0) {
[selectedFiles[index-1], selectedFiles[index]] = [selectedFiles[index], selectedFiles[index-1]];
renderFileList();
}
});
});
fileList.querySelectorAll('.move-down').forEach(btn => {
btn.addEventListener('click', (e) => {
const li = e.target.closest('li');
const index = parseInt(li.dataset.index);
if (index < selectedFiles.length-1) {
[selectedFiles[index+1], selectedFiles[index]] = [selectedFiles[index], selectedFiles[index+1]];
renderFileList();
}
});
});
}
</script>
<script src="JS/adjustButtonSize.js"></script>
</body>
</html>