Skip to content

Commit ee17488

Browse files
committed
Use DecompressionStream in async code
Usually, content stream or fonts are compressed using FlateDecode. So use the DecompressionStream API to decompress those streams in the async code path.
1 parent 6a4a3b0 commit ee17488

5 files changed

Lines changed: 60 additions & 6 deletions

File tree

src/core/cmap.js

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,12 @@ class CMapFactory {
699699
if (encoding instanceof Name) {
700700
return createBuiltInCMap(encoding.name, fetchBuiltInCMap);
701701
} else if (encoding instanceof BaseStream) {
702+
if (encoding.isAsync) {
703+
const bytes = await encoding.asyncGetBytes();
704+
if (bytes) {
705+
encoding = new Stream(bytes, 0, bytes.length, encoding.dict);
706+
}
707+
}
702708
const parsedCMap = await parseCMap(
703709
/* cMap = */ new CMap(),
704710
/* lexer = */ new Lexer(encoding),

src/core/document.js

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,14 +61,14 @@ import {
6161
RefSetCache,
6262
} from "./primitives.js";
6363
import { getXfaFontDict, getXfaFontName } from "./xfa_fonts.js";
64+
import { NullStream, Stream } from "./stream.js";
6465
import { BaseStream } from "./base_stream.js";
6566
import { calculateMD5 } from "./calculate_md5.js";
6667
import { Catalog } from "./catalog.js";
6768
import { clearGlobalCaches } from "./cleanup_helper.js";
6869
import { DatasetReader } from "./dataset_reader.js";
6970
import { Intersector } from "./intersector.js";
7071
import { Linearization } from "./parser.js";
71-
import { NullStream } from "./stream.js";
7272
import { ObjectLoader } from "./object_loader.js";
7373
import { OperatorList } from "./operator_list.js";
7474
import { PartialEvaluator } from "./evaluator.js";
@@ -271,9 +271,31 @@ class Page {
271271
const content = await this.pdfManager.ensure(this, "content");
272272

273273
if (content instanceof BaseStream && !content.isImageStream) {
274+
if (content.isAsync) {
275+
const bytes = await content.asyncGetBytes();
276+
if (bytes) {
277+
return new Stream(bytes, 0, bytes.length, content.dict);
278+
}
279+
}
274280
return content;
275281
}
276282
if (Array.isArray(content)) {
283+
const promises = [];
284+
for (let i = 0, ii = content.length; i < ii; i++) {
285+
const item = content[i];
286+
if (item instanceof BaseStream && item.isAsync) {
287+
promises.push(
288+
item.asyncGetBytes().then(bytes => {
289+
if (bytes) {
290+
content[i] = new Stream(bytes, 0, bytes.length, item.dict);
291+
}
292+
})
293+
);
294+
}
295+
}
296+
if (promises.length > 0) {
297+
await Promise.all(promises);
298+
}
277299
return new StreamsSequenceStream(
278300
content,
279301
this.#onSubStreamError.bind(this)

src/core/evaluator.js

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1706,7 +1706,7 @@ class PartialEvaluator {
17061706
return null;
17071707
}
17081708

1709-
getOperatorList({
1709+
async getOperatorList({
17101710
stream,
17111711
task,
17121712
resources,
@@ -1715,6 +1715,13 @@ class PartialEvaluator {
17151715
fallbackFontDict = null,
17161716
prevRefs = null,
17171717
}) {
1718+
if (stream.isAsync) {
1719+
const bytes = await stream.asyncGetBytes();
1720+
if (bytes) {
1721+
stream = new Stream(bytes, 0, bytes.length, stream.dict);
1722+
}
1723+
}
1724+
17181725
const objId = stream.dict?.objId;
17191726
const seenRefs = new RefSet(prevRefs);
17201727

@@ -2373,7 +2380,7 @@ class PartialEvaluator {
23732380
});
23742381
}
23752382

2376-
getTextContent({
2383+
async getTextContent({
23772384
stream,
23782385
task,
23792386
resources,
@@ -2389,6 +2396,13 @@ class PartialEvaluator {
23892396
prevRefs = null,
23902397
intersector = null,
23912398
}) {
2399+
if (stream.isAsync) {
2400+
const bytes = await stream.asyncGetBytes();
2401+
if (bytes) {
2402+
stream = new Stream(bytes, 0, bytes.length, stream.dict);
2403+
}
2404+
}
2405+
23922406
const objId = stream.dict?.objId;
23932407
const seenRefs = new RefSet(prevRefs);
23942408

@@ -4565,8 +4579,16 @@ class PartialEvaluator {
45654579
if (fontFile) {
45664580
if (!(fontFile instanceof BaseStream)) {
45674581
throw new FormatError("FontFile should be a stream");
4568-
} else if (fontFile.isEmpty) {
4569-
throw new FormatError("FontFile is empty");
4582+
} else {
4583+
if (fontFile.isAsync) {
4584+
const bytes = await fontFile.asyncGetBytes();
4585+
if (bytes) {
4586+
fontFile = new Stream(bytes, 0, bytes.length, fontFile.dict);
4587+
}
4588+
}
4589+
if (fontFile.isEmpty) {
4590+
throw new FormatError("FontFile is empty");
4591+
}
45704592
}
45714593
}
45724594
} catch (ex) {

src/core/flate_stream.js

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,8 @@ const fixedDistCodeTab = [
122122
];
123123

124124
class FlateStream extends DecodeStream {
125+
#isAsync = true;
126+
125127
constructor(str, maybeLength) {
126128
super(maybeLength);
127129

@@ -200,6 +202,7 @@ class FlateStream extends DecodeStream {
200202
// decoder.
201203
// We already get the bytes from the underlying stream, so we just reuse
202204
// them to avoid get them again.
205+
this.#isAsync = false;
203206
this.stream = new Stream(
204207
bytes,
205208
2 /* = header size (see ctor) */,
@@ -212,7 +215,7 @@ class FlateStream extends DecodeStream {
212215
}
213216

214217
get isAsync() {
215-
return true;
218+
return this.#isAsync;
216219
}
217220

218221
getBits(bits) {

src/core/stream.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ class Stream extends BaseStream {
5151
const strEnd = this.end;
5252

5353
if (!length) {
54+
this.pos = strEnd;
5455
return bytes.subarray(pos, strEnd);
5556
}
5657
let end = pos + length;

0 commit comments

Comments
 (0)