22
33import com .mindee .MindeeException ;
44import com .mindee .input .PageOptions ;
5+ import java .awt .image .BufferedImage ;
56import java .io .ByteArrayOutputStream ;
67import java .io .IOException ;
78import java .util .ArrayList ;
1314import java .util .stream .IntStream ;
1415import org .apache .pdfbox .Loader ;
1516import org .apache .pdfbox .pdmodel .PDDocument ;
17+ import org .apache .pdfbox .pdmodel .common .PDRectangle ;
18+ import org .apache .pdfbox .rendering .ImageType ;
19+ import org .apache .pdfbox .rendering .PDFRenderer ;
1620
1721/**
1822 * Allows performing various operations on PDFs.
1923 */
2024public final class PDFBoxApi implements PDFOperation {
2125
2226 @ Override
23- public SplitPDF split (SplitQuery splitQuery ) throws IOException {
27+ public SplitPDF split (byte [] fileBytes , PageOptions pageOptions ) throws IOException {
2428
25- if (!checkPdfOpen (splitQuery . getFile () )) {
29+ if (!checkPdfOpen (fileBytes )) {
2630 throw new MindeeException ("This document cannot be open and cannot be split." );
2731 }
2832
29- try (var originalDocument = Loader .loadPDF (splitQuery . getFile () )) {
33+ try (var originalDocument = Loader .loadPDF (fileBytes )) {
3034 try (var splitDocument = new PDDocument ()) {
31- int totalOriginalPages = countPages ( splitQuery . getFile () );
35+ int totalOriginalPages = getNumberOfPages ( fileBytes );
3236
33- if (totalOriginalPages < splitQuery . getPageOptions () .getOnMinPages ()) {
34- return new SplitPDF (splitQuery . getFile () , totalOriginalPages );
37+ if (totalOriginalPages < pageOptions .getOnMinPages ()) {
38+ return new SplitPDF (fileBytes , totalOriginalPages );
3539 }
3640
37- var pageRange = getPageRanges (splitQuery . getPageOptions () , totalOriginalPages );
41+ var pageRange = getPageRanges (pageOptions , totalOriginalPages );
3842 pageRange
3943 .stream ()
4044 .filter (i -> i < totalOriginalPages )
@@ -43,12 +47,65 @@ public SplitPDF split(SplitQuery splitQuery) throws IOException {
4347 try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream ()) {
4448 splitDocument .save (outputStream );
4549 byte [] splitPdf = outputStream .toByteArray ();
46- return new SplitPDF (splitPdf , countPages (splitPdf ));
50+ return new SplitPDF (splitPdf , getNumberOfPages (splitPdf ));
4751 }
4852 }
4953 }
5054 }
5155
56+ @ Override
57+ public int getNumberOfPages (byte [] fileBytes ) throws IOException {
58+ var document = Loader .loadPDF (fileBytes );
59+ int pageCount = document .getNumberOfPages ();
60+ document .close ();
61+ return pageCount ;
62+ }
63+
64+ @ Override
65+ public PdfPageImage pdfPageToImage (
66+ byte [] fileBytes ,
67+ String filename ,
68+ int pageNumber
69+ ) throws IOException {
70+ int index = pageNumber - 1 ;
71+ PDDocument document = Loader .loadPDF (fileBytes );
72+ var pdfRenderer = new PDFRenderer (document );
73+ BufferedImage imageBuffer = pdfPageToImageBuffer (index , document , pdfRenderer );
74+ document .close ();
75+ return new PdfPageImage (imageBuffer , index , filename , "jpg" );
76+ }
77+
78+ @ Override
79+ public List <PdfPageImage > pdfToImages (byte [] fileBytes , String filename ) throws IOException {
80+ PDDocument document = Loader .loadPDF (fileBytes );
81+ var pdfRenderer = new PDFRenderer (document );
82+ List <PdfPageImage > pdfPageImages = new ArrayList <>();
83+ for (int i = 0 ; i < document .getNumberOfPages (); i ++) {
84+ var imageBuffer = pdfPageToImageBuffer (i , document , pdfRenderer );
85+ pdfPageImages .add (new PdfPageImage (imageBuffer , i , filename , "jpg" ));
86+ }
87+ document .close ();
88+ return pdfPageImages ;
89+ }
90+
91+ private BufferedImage pdfPageToImageBuffer (
92+ int index ,
93+ PDDocument document ,
94+ PDFRenderer pdfRenderer
95+ ) throws IOException {
96+ PDRectangle bbox = document .getPage (index ).getBBox ();
97+ float dimension = bbox .getWidth () * bbox .getHeight ();
98+ int dpi ;
99+ if (dimension < 200000 ) {
100+ dpi = 300 ;
101+ } else if (dimension < 300000 ) {
102+ dpi = 250 ;
103+ } else {
104+ dpi = 200 ;
105+ }
106+ return pdfRenderer .renderImageWithDPI (index , dpi , ImageType .RGB );
107+ }
108+
52109 private List <Integer > getPageRanges (PageOptions pageOptions , Integer numberOfPages ) {
53110
54111 Set <Integer > pages = Optional
@@ -81,8 +138,4 @@ private boolean checkPdfOpen(byte[] documentFile) {
81138 }
82139 return opens ;
83140 }
84-
85- private int countPages (byte [] documentFile ) throws IOException {
86- return PDFUtils .getNumberOfPages (documentFile );
87- }
88141}
0 commit comments