66import java .awt .image .BufferedImage ;
77import java .io .ByteArrayInputStream ;
88import java .io .ByteArrayOutputStream ;
9- import java .io .File ;
109import java .io .IOException ;
1110import java .util .ArrayList ;
1211import java .util .List ;
1716import org .apache .pdfbox .pdmodel .PDDocument ;
1817import org .apache .pdfbox .pdmodel .PDPage ;
1918import org .apache .pdfbox .pdmodel .PDPageContentStream ;
20- import org .apache .pdfbox .pdmodel .common .PDRectangle ;
2119import org .apache .pdfbox .pdmodel .graphics .image .LosslessFactory ;
2220import org .apache .pdfbox .pdmodel .graphics .image .PDImageXObject ;
23- import org .apache .pdfbox .rendering .ImageType ;
24- import org .apache .pdfbox .rendering .PDFRenderer ;
2521
2622/**
2723 * PDF extraction class.
2824 */
29- public class PDFExtractorBase implements PDFExtraction {
25+ public class BasePDFExtractor {
3026 protected final PDDocument sourcePdf ;
3127 protected final String filename ;
3228
@@ -36,9 +32,9 @@ public class PDFExtractorBase implements PDFExtraction {
3632 * @param source The local source.
3733 * @throws IOException Throws if the file can't be accessed.
3834 */
39- public PDFExtractorBase (LocalInputSource source ) throws IOException {
35+ public BasePDFExtractor (LocalInputSource source ) throws IOException {
4036 this .filename = source .getFilename ();
41- if (source .isPdf ()) {
37+ if (source .isPDF ()) {
4238 this .sourcePdf = Loader .loadPDF (source .getFile ());
4339 } else {
4440 var document = new PDDocument ();
@@ -60,60 +56,6 @@ public PDFExtractorBase(LocalInputSource source) throws IOException {
6056 }
6157 }
6258
63- // @Override
64- // public PdfPageImage pdfPageToImage(
65- // byte[] fileBytes,
66- // String filename,
67- // int pageNumber
68- // ) throws IOException {
69- // int index = pageNumber - 1;
70- // PDDocument document = Loader.loadPDF(fileBytes);
71- // var pdfRenderer = new PDFRenderer(document);
72- // BufferedImage imageBuffer = pdfPageToImageBuffer(index, document, pdfRenderer);
73- // document.close();
74- // return new PdfPageImage(imageBuffer, index, filename, "jpg");
75- // }
76- //
77- // @Override
78- // public List<PdfPageImage> pdfToImages(byte[] fileBytes, String filename) throws IOException {
79- // PDDocument document = Loader.loadPDF(fileBytes);
80- // var pdfRenderer = new PDFRenderer(document);
81- // List<PdfPageImage> pdfPageImages = new ArrayList<>();
82- // for (int i = 0; i < document.getNumberOfPages(); i++) {
83- // var imageBuffer = pdfPageToImageBuffer(i, document, pdfRenderer);
84- // pdfPageImages.add(new PdfPageImage(imageBuffer, i, filename, "jpg"));
85- // }
86- // document.close();
87- // return pdfPageImages;
88- // }
89-
90- private BufferedImage pdfPageToImageBuffer (
91- int index ,
92- PDDocument document ,
93- PDFRenderer pdfRenderer
94- ) throws IOException {
95- PDRectangle bbox = document .getPage (index ).getBBox ();
96- float dimension = bbox .getWidth () * bbox .getHeight ();
97- int dpi ;
98- if (dimension < 200000 ) {
99- dpi = 300 ;
100- } else if (dimension < 300000 ) {
101- dpi = 250 ;
102- } else {
103- dpi = 200 ;
104- }
105- return pdfRenderer .renderImageWithDPI (index , dpi , ImageType .RGB );
106- }
107-
108- /**
109- * Get the number of pages in the PDF file.
110- *
111- * @return The number of pages in the PDF file.
112- */
113- public int getPageCount () {
114- return sourcePdf .getNumberOfPages ();
115- }
116-
11759 /**
11860 * Converts an array to a buffered image.
11961 *
@@ -197,24 +139,6 @@ private static byte[] createPdfFromExistingPdf(
197139 return output ;
198140 }
199141
200- /**
201- * Merge specified PDF pages together.
202- *
203- * @param file The PDF file.
204- * @param pageNumbers Lit of page numbers to merge together.
205- */
206- @ Override
207- public byte [] mergePdfPages (File file , List <Integer > pageNumbers ) throws IOException {
208- PDDocument document = Loader .loadPDF (file );
209- return mergePdfPages (document , pageNumbers , true );
210- }
211-
212- @ Override
213- public byte [] mergePdfPages (PDDocument document , List <Integer > pageNumbers ) throws IOException {
214- return mergePdfPages (document , pageNumbers , true );
215- }
216-
217- @ Override
218142 public byte [] mergePdfPages (
219143 PDDocument document ,
220144 List <Integer > pageNumbers ,
0 commit comments