Skip to content

Commit 00b58b7

Browse files
committed
♻️ harmonize getting page count from a local input source
1 parent 45845d4 commit 00b58b7

4 files changed

Lines changed: 41 additions & 29 deletions

File tree

src/main/java/com/mindee/input/LocalInputSource.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package com.mindee.input;
22

33
import com.mindee.image.ImageCompressor;
4+
import com.mindee.pdf.PDFUtils;
45
import com.mindee.pdf.PdfBoxApi;
56
import com.mindee.pdf.PdfCompressor;
67
import com.mindee.pdf.PdfOperation;
@@ -48,6 +49,14 @@ public LocalInputSource(String fileAsBase64, String filename) {
4849
this.filename = filename;
4950
}
5051

52+
/**
53+
* Get the number of pages in the document.
54+
* @return the number of pages in the current file.
55+
* @throws IOException If an I/O error occurs during the PDF operation.
56+
*/
57+
public int getPageCount() throws IOException {
58+
return PDFUtils.getNumberOfPages(this.file);
59+
}
5160

5261
/**
5362
* Applies PDF-specific operations on the current file based on the specified {@code PageOptions}.

src/main/java/com/mindee/pdf/PDFUtils.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,16 @@ private PDFUtils() {
3939
* @param inputSource The PDF file.
4040
*/
4141
public static int getNumberOfPages(LocalInputSource inputSource) throws IOException {
42-
PDDocument document = Loader.loadPDF(inputSource.getFile());
42+
return getNumberOfPages(inputSource.getFile());
43+
}
44+
45+
/**
46+
* Get the number of pages in the PDF.
47+
*
48+
* @param pdfBytes The PDF file as a byte array.
49+
*/
50+
public static int getNumberOfPages(byte[] pdfBytes) throws IOException {
51+
PDDocument document = Loader.loadPDF(pdfBytes);
4352
int pageCount = document.getNumberOfPages();
4453
document.close();
4554
return pageCount;

src/test/java/com/mindee/input/LocalInputSourceTest.java

Lines changed: 22 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -21,58 +21,51 @@
2121
import org.junit.jupiter.api.Test;
2222

2323
public class LocalInputSourceTest {
24+
void assertMultipagePDF(LocalInputSource localInputSource, File file) throws IOException {
25+
Assertions.assertNotNull(localInputSource);
26+
Assertions.assertTrue(localInputSource.isPdf());
27+
Assertions.assertTrue(localInputSource.hasSourceText());
28+
Assertions.assertEquals(3, localInputSource.getPageCount());
29+
Assertions.assertEquals("multipage_cut-3.pdf", localInputSource.getFilename());
30+
Assertions.assertArrayEquals(localInputSource.getFile(), Files.readAllBytes(file.toPath()));
31+
}
32+
2433
@Test
2534
void loadDocument_withFile_mustReturnAValidLocalInputSource() throws IOException {
26-
File file = new File("src/test/resources/file_types/pdf/multipage.pdf");
35+
File file = new File("src/test/resources/file_types/pdf/multipage_cut-3.pdf");
2736
LocalInputSource localInputSource = new LocalInputSource(file);
28-
Assertions.assertNotNull(localInputSource);
29-
Assertions.assertArrayEquals(localInputSource.getFile(), Files.readAllBytes(file.toPath()));
37+
assertMultipagePDF(localInputSource, file);
3038
}
3139

3240
@Test
3341
void loadDocument_withInputStream_mustReturnAValidLocalInputSource() throws IOException {
34-
File file = new File("src/test/resources/file_types/pdf/multipage.pdf");
42+
File file = new File("src/test/resources/file_types/pdf/multipage_cut-3.pdf");
3543
LocalInputSource localInputSource = new LocalInputSource(
3644
Files.newInputStream(file.toPath()),
37-
"multipage.pdf"
45+
"multipage_cut-3.pdf"
3846
);
39-
Assertions.assertNotNull(localInputSource);
40-
Assertions.assertArrayEquals(localInputSource.getFile(), Files.readAllBytes(file.toPath()));
47+
assertMultipagePDF(localInputSource, file);
4148
}
4249

4350
@Test
4451
void loadDocument_withByteArray_mustReturnAValidLocalInputSource() throws IOException {
45-
File file = new File("src/test/resources/file_types/pdf/multipage.pdf");
52+
File file = new File("src/test/resources/file_types/pdf/multipage_cut-3.pdf");
4653
LocalInputSource localInputSource = new LocalInputSource(
4754
Files.readAllBytes(file.toPath()),
48-
"multipage.pdf"
55+
"multipage_cut-3.pdf"
4956
);
50-
Assertions.assertNotNull(localInputSource);
51-
Assertions.assertArrayEquals(localInputSource.getFile(), Files.readAllBytes(file.toPath()));
57+
assertMultipagePDF(localInputSource, file);
5258
}
5359

5460
@Test
5561
void loadDocument_withBase64Encoded_mustReturnAValidLocalInputSource() throws IOException {
56-
File file = new File("src/test/resources/file_types/pdf/multipage.pdf");
62+
File file = new File("src/test/resources/file_types/pdf/multipage_cut-3.pdf");
5763
String encodedFile = Base64.encodeBase64String(Files.readAllBytes(file.toPath()));
5864
LocalInputSource localInputSource = new LocalInputSource(
5965
encodedFile,
60-
"multipage.pdf"
66+
"multipage_cut-3.pdf"
6167
);
62-
Assertions.assertNotNull(localInputSource);
63-
Assertions.assertArrayEquals(localInputSource.getFile(), Files.readAllBytes(file.toPath()));
64-
}
65-
66-
@Test
67-
void pdf_inputSource_withText_mustDetectSourceText() throws MindeeException, IOException {
68-
File file = new File("src/test/resources/file_types/pdf/multipage.pdf");
69-
String encodedFile = Base64.encodeBase64String(Files.readAllBytes(file.toPath()));
70-
LocalInputSource localInputSource = new LocalInputSource(
71-
encodedFile,
72-
"multipage.pdf"
73-
);
74-
Assertions.assertNotNull(localInputSource);
75-
Assertions.assertTrue(localInputSource.hasSourceText());
68+
assertMultipagePDF(localInputSource, file);
7669
}
7770

7871
@Test
@@ -84,6 +77,7 @@ void pdf_inputSource_withoutText_mustNotDetectSourceText() throws MindeeExceptio
8477
"default_sample.pdf"
8578
);
8679
Assertions.assertNotNull(localInputSource);
80+
Assertions.assertTrue(localInputSource.isPdf());
8781
Assertions.assertFalse(localInputSource.hasSourceText());
8882
}
8983

@@ -96,6 +90,7 @@ void image_inputSource_mustNotDetectSourceText() throws MindeeException, IOExcep
9690
"default_sample.jpg"
9791
);
9892
Assertions.assertNotNull(localInputSource);
93+
Assertions.assertFalse(localInputSource.isPdf());
9994
Assertions.assertFalse(localInputSource.hasSourceText());
10095
}
10196

src/test/java/com/mindee/input/URLInputSourceTest.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
public class URLInputSourceTest {
1717

1818
private static final String TEST_URL = "https://example.com/testfile.pdf";
19-
private static final String TEST_LOCAL_FILENAME = "testfile.pdf";
2019
private TestableURLInputSource urlInputSource;
2120

2221
@BeforeEach

0 commit comments

Comments
 (0)