Skip to content

Commit 13aadec

Browse files
committed
Updated
1 parent 4606232 commit 13aadec

3 files changed

Lines changed: 108 additions & 1 deletion

File tree

js/export/writeTabular.js

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,20 @@ import ocr from '../objects/ocrObjects.js';
33
import { inputData, opt } from '../containers/app.js';
44
import { extractTableContent } from '../extractTables.js';
55

6+
/**
7+
* Convert a 0-based column index to an Excel column reference (A, B, ..., Z, AA, AB, ...).
8+
* @param {number} index
9+
*/
10+
function colIndexToRef(index) {
11+
let ref = '';
12+
let n = index;
13+
do {
14+
ref = String.fromCharCode(65 + (n % 26)) + ref;
15+
n = Math.floor(n / 26) - 1;
16+
} while (n >= 0);
17+
return ref;
18+
}
19+
620
/**
721
* @param {Object} params
822
* @param {ReturnType<extractTableContent>} params.tableWordObj
@@ -206,3 +220,35 @@ export async function writeXlsx({
206220

207221
return zipFileData;
208222
}
223+
224+
/**
225+
* Create a single-sheet xlsx workbook from plain string data.
226+
* @param {Array<Array<string>>} rows - 2D array of cell values.
227+
*/
228+
export async function writeXlsxFromStrings(rows) {
229+
const { xlsxStrings, sheetStart, sheetEnd } = await import('./resources/xlsxFiles.js');
230+
const { Uint8ArrayWriter, TextReader, ZipWriter } = await import('../../lib/zip.js/index.js');
231+
232+
let sheetContent = sheetStart;
233+
for (let i = 0; i < rows.length; i++) {
234+
sheetContent += `<row r="${String(i + 1)}">`;
235+
for (let j = 0; j < rows[i].length; j++) {
236+
const cellText = ocr.escapeXml(rows[i][j] ?? '');
237+
sheetContent += `<c r="${colIndexToRef(j)}${String(i + 1)}" t="inlineStr"><is><t xml:space="preserve">${cellText}</t></is></c>`;
238+
}
239+
sheetContent += '</row>';
240+
}
241+
sheetContent += sheetEnd;
242+
243+
const zipFileWriter = new Uint8ArrayWriter();
244+
const zipWriter = new ZipWriter(zipFileWriter);
245+
246+
await zipWriter.add('xl/worksheets/sheet1.xml', new TextReader(sheetContent));
247+
248+
for (let i = 0; i < xlsxStrings.length; i++) {
249+
await zipWriter.add(xlsxStrings[i].path, new TextReader(xlsxStrings[i].content));
250+
}
251+
252+
await zipWriter.close();
253+
return await zipFileWriter.getData();
254+
}

scribe.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ import {
4141
calcConf, checkOcrWordsAdjacent, mergeOcrWords, splitOcrWord,
4242
} from './js/utils/ocrUtils.js';
4343
import { assignParagraphs } from './js/utils/reflowPars.js';
44-
import { writeXlsx } from './js/export/writeTabular.js';
44+
import { writeXlsx, writeXlsxFromStrings } from './js/export/writeTabular.js';
4545
import { calcColumnBounds, detectTablesInPage, makeTableFromBbox } from './js/utils/detectTables.js';
4646
import { ca } from './js/canvasAdapter.js';
4747
import { addHighlights, clearHighlights } from './js/addHighlights.js';
@@ -234,6 +234,8 @@ class utils {
234234

235235
static writeXlsx = writeXlsx;
236236

237+
static writeXlsxFromStrings = writeXlsxFromStrings;
238+
237239
// Misc utils
238240
static calcBoxOverlap = calcBoxOverlap;
239241

tests/module/exportPdf.spec.js

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,65 @@ describe('Check addHighlights and clearHighlights.', function () {
191191
assert.throws(() => scribe.addHighlights([{ page: 0 }]));
192192
}).timeout(10000);
193193

194+
it('addHighlights with multi-line range highlights all lines including the last', async () => {
195+
await scribe.importFiles([`${ASSETS_PATH_KARMA}/testocr.abbyy.xml`]);
196+
const lineCount = scribe.data.ocr.active[0].lines.length;
197+
assert.isAbove(lineCount, 3, 'Document should have more than 3 lines');
198+
199+
const result = scribe.addHighlights([{ page: 0, startLine: 0, endLine: lineCount - 1 }]);
200+
assert.strictEqual(result.totalLinesHighlighted, lineCount);
201+
202+
// Verify the last line specifically has all words annotated
203+
const lastLine = scribe.data.ocr.active[0].lines[lineCount - 1];
204+
const annots = scribe.data.annotations.pages[0];
205+
let lastLineAnnotCount = 0;
206+
for (const word of lastLine.words) {
207+
if (annots.some((a) => a.bbox === word.bbox)) lastLineAnnotCount++;
208+
}
209+
assert.strictEqual(lastLineAnnotCount, lastLine.words.length, 'All words on the last line should be annotated');
210+
scribe.clearHighlights();
211+
}).timeout(10000);
212+
213+
after(async () => {
214+
await scribe.terminate();
215+
});
216+
}).timeout(120000);
217+
218+
describe('Check MCP highlight flow preserves data file.', function () {
219+
this.timeout(30000);
220+
221+
it('Multi-line highlights should work when ensureFileLoaded is called without dataFile on already-loaded document', async () => {
222+
const { ensureFileLoaded, resetState } = await import('../../mcp/index.js');
223+
resetState();
224+
225+
const imgPath = `${ASSETS_PATH_KARMA}/testocr.png`;
226+
const dataPath = `${ASSETS_PATH_KARMA}/testocr.abbyy.xml`;
227+
228+
await ensureFileLoaded(imgPath, dataPath);
229+
const lineCount = scribe.data.ocr.active[0].lines.length;
230+
assert.strictEqual(lineCount, 8);
231+
232+
await ensureFileLoaded(imgPath, undefined);
233+
const lineCountAfter = scribe.data.ocr.active[0]?.lines?.length ?? 0;
234+
235+
assert.strictEqual(lineCountAfter, 8);
236+
237+
// Verify multi-line highlights work for the full range including the last line
238+
const result = scribe.addHighlights([{ page: 0, startLine: 0, endLine: lineCount - 1 }]);
239+
assert.strictEqual(result.totalLinesHighlighted, lineCount);
240+
241+
const lastLine = scribe.data.ocr.active[0].lines[lineCount - 1];
242+
const annots = scribe.data.annotations.pages[0];
243+
let lastLineAnnotCount = 0;
244+
for (const word of lastLine.words) {
245+
if (annots.some((a) => a.bbox === word.bbox)) lastLineAnnotCount++;
246+
}
247+
assert.strictEqual(lastLineAnnotCount, 7);
248+
249+
scribe.clearHighlights();
250+
resetState();
251+
}).timeout(20000);
252+
194253
after(async () => {
195254
await scribe.terminate();
196255
});

0 commit comments

Comments
 (0)