Skip to content

Conversation

@BellYun
Copy link

@BellYun BellYun commented Jan 22, 2026

Description

This PR implements the getPagesInfo() method requested in #20530.
Currently, retrieving layout data for all pages requires calling getPage(i) sequentially, which causes N worker round-trips. This PR introduces a batch retrieval method that fetches view, rotate, and userUnit for all pages in a single round-trip.

Changes

  • API: Added PDFDocumentProxy.prototype.getPagesInfo() in src/display/api.js.
  • Worker: Added GetPagesInfo handler in src/core/worker.js.
  • Tests: Added unit tests in test/unit/api_spec.js to ensure data consistency matches getPage().

Performance Verification

I measured the performance improvement using a benchmark script.
The structural improvement (N -> 1 round-trip) provides significant speedup, especially for large documents.

Document Method Time (avg) Improvement
tracemonkey.pdf (14 pages) getPage loop 1.5ms -
getPagesInfo (New) 0.9ms 1.67x Faster
pdf.pdf (1,310 pages) getPage loop 83.3ms -
getPagesInfo (New) 44.1ms 1.89x Faster

Screenshot:
image

🔻 Click to see the benchmark logic

I verified the logic using a script similar to this (simplified for reproducibility in unit tests):

describe("getPagesInfo benchmark & verification", function () {
  const tracemonkeyFileName = "tracemonkey.pdf";

  describe("performance comparison", function () {
    let pdfLoadingTask, pdfDocument;

    beforeAll(async function () {
      pdfLoadingTask = getDocument(buildGetDocumentParams(tracemonkeyFileName));
      pdfDocument = await pdfLoadingTask.promise;
    });

    afterAll(async function () {
      await pdfLoadingTask.destroy();
    });

    it("should measure execution time vs multiple getPage calls", async function () {
      const numPages = pdfDocument.numPages;

      // Warm up
      await pdfDocument.getPagesInfo();

      // Method 1: Multiple getPage() calls
      const startGetPage = performance.now();
      const pageInfosOld = [];
      for (let i = 1; i <= numPages; i++) {
        const page = await pdfDocument.getPage(i);
        pageInfosOld.push({
          view: page.view,
          rotate: page.rotate,
          userUnit: page.userUnit,
        });
      }
      const endGetPage = performance.now();
      const getPageTime = endGetPage - startGetPage;

      // Method 2: Single getPagesInfo() call
      const startGetPagesInfo = performance.now();
      const pageInfosNew = await pdfDocument.getPagesInfo();
      const endGetPagesInfo = performance.now();
      const getPagesInfoTime = endGetPagesInfo - startGetPagesInfo;

      // Log results
      console.log(`\n--- Performance Results (${numPages} pages) ---`);
      console.log(`Multiple getPage(): ${getPageTime.toFixed(2)}ms`);
      console.log(`Single getPagesInfo(): ${getPagesInfoTime.toFixed(2)}ms`);
      console.log(`Speedup: ${(getPageTime / Math.max(getPagesInfoTime, 0.01)).toFixed(2)}x`);

      // Verify Consistency
      expect(pageInfosNew.length).toEqual(pageInfosOld.length);
      for (let i = 0; i < pageInfosNew.length; i++) {
        expect(pageInfosNew[i].view).toEqual(pageInfosOld[i].view);
      }
    });
  });

  describe("memory & stability", function () {
    it("should not show significant memory growth on repeated calls", async function () {
      const loadingTask = getDocument(buildGetDocumentParams(tracemonkeyFileName));
      const pdfDoc = await loadingTask.promise;
      
      const getMemory = () => (performance.memory ? performance.memory.usedJSHeapSize : 0);
      const startMem = getMemory();

      for (let i = 0; i < 20; i++) {
        await pdfDoc.getPagesInfo();
      }
      
      const endMem = getMemory();
      if (startMem > 0) {
         console.log(`Memory Growth: ${((endMem - startMem) / 1024 / 1024).toFixed(2)}MB`);
      }
      await loadingTask.destroy();
    });

    it("should handle concurrent calls", async function () {
      const loadingTask = getDocument(buildGetDocumentParams(tracemonkeyFileName));
      const pdfDoc = await loadingTask.promise;
      const results = await Promise.all([
        pdfDoc.getPagesInfo(),
        pdfDoc.getPagesInfo(),
        pdfDoc.getPagesInfo()
      ]);
      expect(results[0]).toEqual(results[1]);
      await loadingTask.destroy();
    });
  });
});

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants