Skip to content

Commit 08585d6

Browse files
committed
will it work?
1 parent 4dc08e0 commit 08585d6

17 files changed

Lines changed: 146 additions & 337 deletions

src/main/java/com/mindee/image/ImageExtractor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ public ImageExtractor(LocalInputSource source) throws IOException {
2929
this.filename = source.getFilename();
3030
this.pageImages = new ArrayList<>();
3131

32-
if (source.isPdf()) {
32+
if (source.isPDF()) {
3333
this.saveFormat = "jpg";
3434
var pdfPageImages = pdfToImages(source.getFile(), this.filename);
3535
for (PdfPageImage pdfPageImage : pdfPageImages) {

src/main/java/com/mindee/input/LocalInputSource.java

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
import com.mindee.image.ImageCompressor;
44
import com.mindee.pdf.PDFCompression;
55
import com.mindee.pdf.PDFCompressor;
6-
import com.mindee.pdf.PDFInputSource;
7-
import com.mindee.pdf.PDFInputSourcer;
6+
import com.mindee.pdf.PDFInputOperation;
7+
import com.mindee.pdf.PDFInputOperator;
88
import java.io.File;
99
import java.io.IOException;
1010
import java.io.InputStream;
@@ -18,16 +18,18 @@
1818
/**
1919
* A source document for Mindee API operations.
2020
*/
21-
public final class LocalInputSource {
21+
public class LocalInputSource {
2222

2323
@Getter
2424
private byte[] file;
2525
@Getter
2626
private final String filename;
2727
@Setter
28-
private PDFInputSource pdfOperation;
28+
private PDFInputOperation pdfInputOperator;
2929
@Setter
30-
private PDFCompressor pdfCompressor;
30+
private PDFCompression pdfCompressor;
31+
// Store here to avoid recalculating every time.
32+
private Boolean isPDF;
3133

3234
public LocalInputSource(InputStream file, String filename) throws IOException {
3335
this.file = IOUtils.toByteArray(file);
@@ -60,14 +62,14 @@ public LocalInputSource(String fileAsBase64, String filename) {
6062
this.filename = filename;
6163
}
6264

63-
public PDFInputSource getPdfOperation() {
64-
if (this.pdfOperation == null) {
65-
this.pdfOperation = new PDFInputSourcer();
65+
private PDFInputOperation getPdfInputOperator() {
66+
if (this.pdfInputOperator == null) {
67+
this.pdfInputOperator = new PDFInputOperator();
6668
}
67-
return this.pdfOperation;
69+
return this.pdfInputOperator;
6870
}
6971

70-
public PDFCompression getPdfCompressor() {
72+
private PDFCompression getPdfCompressor() {
7173
if (this.pdfCompressor == null) {
7274
this.pdfCompressor = new PDFCompressor();
7375
}
@@ -81,10 +83,10 @@ public PDFCompression getPdfCompressor() {
8183
* @throws IOException If an I/O error occurs during the PDF operation.
8284
*/
8385
public int getPageCount() throws IOException {
84-
if (!this.isPdf()) {
86+
if (!this.isPDF()) {
8587
return 1;
8688
}
87-
return getPdfOperation().getNumberOfPages(this.file);
89+
return getPdfInputOperator().getPageCount(this.file);
8890
}
8991

9092
/**
@@ -94,17 +96,19 @@ public int getPageCount() throws IOException {
9496
* @throws IOException If an I/O error occurs during the PDF operation.
9597
*/
9698
public void applyPageOptions(PageOptions pageOptions) throws IOException {
97-
if (pageOptions != null && this.isPdf()) {
98-
this.file = getPdfOperation().split(this.file, pageOptions).getFile();
99+
if (pageOptions != null && this.isPDF()) {
100+
this.file = getPdfInputOperator().split(this.file, pageOptions).getFile();
99101
}
100102
}
101103

102-
public boolean isPdf() {
103-
return getPdfOperation().isPdf(this.file);
104-
}
105-
106-
public boolean hasSourceText() {
107-
return getPdfOperation().hasSourceText(this.file);
104+
/**
105+
* Returns true if the file is a PDF.
106+
*/
107+
public boolean isPDF() {
108+
if (this.isPDF == null) {
109+
this.isPDF = getPdfInputOperator().isPDF(this.file);
110+
}
111+
return this.isPDF;
108112
}
109113

110114
public LocalInputSource compress(
@@ -114,9 +118,9 @@ public LocalInputSource compress(
114118
Boolean forceSourceText,
115119
Boolean disableSourceText
116120
) throws IOException {
117-
if (isPdf()) {
121+
if (isPDF()) {
118122
this.file = getPdfCompressor()
119-
.compressPdf(this.file, quality, forceSourceText, disableSourceText);
123+
.compressPDF(this.file, quality, forceSourceText, disableSourceText);
120124
} else {
121125
this.file = ImageCompressor.compressImage(this.file, quality, maxWidth, maxHeight);
122126
}

src/main/java/com/mindee/pdf/PDFExtractorBase.java renamed to src/main/java/com/mindee/pdf/BasePDFExtractor.java

Lines changed: 4 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import java.awt.image.BufferedImage;
77
import java.io.ByteArrayInputStream;
88
import java.io.ByteArrayOutputStream;
9-
import java.io.File;
109
import java.io.IOException;
1110
import java.util.ArrayList;
1211
import java.util.List;
@@ -17,16 +16,13 @@
1716
import org.apache.pdfbox.pdmodel.PDDocument;
1817
import org.apache.pdfbox.pdmodel.PDPage;
1918
import org.apache.pdfbox.pdmodel.PDPageContentStream;
20-
import org.apache.pdfbox.pdmodel.common.PDRectangle;
2119
import org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory;
2220
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
23-
import org.apache.pdfbox.rendering.ImageType;
24-
import org.apache.pdfbox.rendering.PDFRenderer;
2521

2622
/**
2723
* PDF extraction class.
2824
*/
29-
public class PDFExtractorBase implements PDFExtraction {
25+
public class BasePDFExtractor {
3026
protected final PDDocument sourcePdf;
3127
protected final String filename;
3228

@@ -36,9 +32,9 @@ public class PDFExtractorBase implements PDFExtraction {
3632
* @param source The local source.
3733
* @throws IOException Throws if the file can't be accessed.
3834
*/
39-
public PDFExtractorBase(LocalInputSource source) throws IOException {
35+
public BasePDFExtractor(LocalInputSource source) throws IOException {
4036
this.filename = source.getFilename();
41-
if (source.isPdf()) {
37+
if (source.isPDF()) {
4238
this.sourcePdf = Loader.loadPDF(source.getFile());
4339
} else {
4440
var document = new PDDocument();
@@ -60,60 +56,6 @@ public PDFExtractorBase(LocalInputSource source) throws IOException {
6056
}
6157
}
6258

63-
// @Override
64-
// public PdfPageImage pdfPageToImage(
65-
// byte[] fileBytes,
66-
// String filename,
67-
// int pageNumber
68-
// ) throws IOException {
69-
// int index = pageNumber - 1;
70-
// PDDocument document = Loader.loadPDF(fileBytes);
71-
// var pdfRenderer = new PDFRenderer(document);
72-
// BufferedImage imageBuffer = pdfPageToImageBuffer(index, document, pdfRenderer);
73-
// document.close();
74-
// return new PdfPageImage(imageBuffer, index, filename, "jpg");
75-
// }
76-
//
77-
// @Override
78-
// public List<PdfPageImage> pdfToImages(byte[] fileBytes, String filename) throws IOException {
79-
// PDDocument document = Loader.loadPDF(fileBytes);
80-
// var pdfRenderer = new PDFRenderer(document);
81-
// List<PdfPageImage> pdfPageImages = new ArrayList<>();
82-
// for (int i = 0; i < document.getNumberOfPages(); i++) {
83-
// var imageBuffer = pdfPageToImageBuffer(i, document, pdfRenderer);
84-
// pdfPageImages.add(new PdfPageImage(imageBuffer, i, filename, "jpg"));
85-
// }
86-
// document.close();
87-
// return pdfPageImages;
88-
// }
89-
90-
private BufferedImage pdfPageToImageBuffer(
91-
int index,
92-
PDDocument document,
93-
PDFRenderer pdfRenderer
94-
) throws IOException {
95-
PDRectangle bbox = document.getPage(index).getBBox();
96-
float dimension = bbox.getWidth() * bbox.getHeight();
97-
int dpi;
98-
if (dimension < 200000) {
99-
dpi = 300;
100-
} else if (dimension < 300000) {
101-
dpi = 250;
102-
} else {
103-
dpi = 200;
104-
}
105-
return pdfRenderer.renderImageWithDPI(index, dpi, ImageType.RGB);
106-
}
107-
108-
/**
109-
* Get the number of pages in the PDF file.
110-
*
111-
* @return The number of pages in the PDF file.
112-
*/
113-
public int getPageCount() {
114-
return sourcePdf.getNumberOfPages();
115-
}
116-
11759
/**
11860
* Converts an array to a buffered image.
11961
*
@@ -154,10 +96,7 @@ public List<ExtractedPDF> extractSubDocuments(
15496
+ splitName[1];
15597
extractedPDFs
15698
.add(
157-
new ExtractedPDF(
158-
Loader.loadPDF(mergePdfPages(this.sourcePdf, pageIndexElement, false)),
159-
fieldFilename
160-
)
99+
new ExtractedPDF(mergePdfPages(this.sourcePdf, pageIndexElement, false), fieldFilename)
161100
);
162101
}
163102
return extractedPDFs;
@@ -197,24 +136,6 @@ private static byte[] createPdfFromExistingPdf(
197136
return output;
198137
}
199138

200-
/**
201-
* Merge specified PDF pages together.
202-
*
203-
* @param file The PDF file.
204-
* @param pageNumbers Lit of page numbers to merge together.
205-
*/
206-
@Override
207-
public byte[] mergePdfPages(File file, List<Integer> pageNumbers) throws IOException {
208-
PDDocument document = Loader.loadPDF(file);
209-
return mergePdfPages(document, pageNumbers, true);
210-
}
211-
212-
@Override
213-
public byte[] mergePdfPages(PDDocument document, List<Integer> pageNumbers) throws IOException {
214-
return mergePdfPages(document, pageNumbers, true);
215-
}
216-
217-
@Override
218139
public byte[] mergePdfPages(
219140
PDDocument document,
220141
List<Integer> pageNumbers,
Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,27 @@
11
package com.mindee.pdf;
22

33
import com.mindee.input.LocalInputSource;
4-
import java.io.ByteArrayOutputStream;
5-
import java.io.File;
64
import java.io.IOException;
5+
import java.nio.file.Files;
76
import java.nio.file.Paths;
87
import lombok.Getter;
9-
import org.apache.pdfbox.pdmodel.PDDocument;
108

119
/**
1210
* An extracted sub-PDF.
1311
*/
1412
@Getter
1513
public class ExtractedPDF {
16-
private final PDDocument pdf;
14+
private final byte[] fileBytes;
1715
private final String filename;
1816

1917
/**
2018
* Default constructor.
2119
*
22-
* @param pdf PDF wrapper object.
20+
* @param fileBytes PDF file as bytes.
2321
* @param filename Name of the extracted file.
2422
*/
25-
public ExtractedPDF(PDDocument pdf, String filename) {
26-
this.pdf = pdf;
23+
public ExtractedPDF(byte[] fileBytes, String filename) {
24+
this.fileBytes = fileBytes;
2725
this.filename = filename;
2826
}
2927

@@ -35,8 +33,7 @@ public ExtractedPDF(PDDocument pdf, String filename) {
3533
*/
3634
public void writeToFile(String outputPath) throws IOException {
3735
var pdfPath = Paths.get(outputPath, this.filename);
38-
var outputfile = new File(pdfPath.toString());
39-
this.pdf.save(outputfile);
36+
Files.write(pdfPath, this.fileBytes);
4037
}
4138

4239
/**
@@ -46,8 +43,6 @@ public void writeToFile(String outputPath) throws IOException {
4643
* @throws IOException Throws if the file can't be accessed.
4744
*/
4845
public LocalInputSource asInputSource() throws IOException {
49-
var output = new ByteArrayOutputStream();
50-
this.pdf.save(output);
51-
return new LocalInputSource(output.toByteArray(), this.filename);
46+
return new LocalInputSource(this.fileBytes, this.filename);
5247
}
5348
}

src/main/java/com/mindee/pdf/PDFCompression.java

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,26 +3,26 @@
33
import java.io.IOException;
44

55
public interface PDFCompression {
6-
byte[] compressPdf(
7-
byte[] pdfData,
6+
byte[] compressPDF(
7+
byte[] fileBytes,
88
Integer imageQuality,
99
Boolean forceSourceTextCompression,
1010
Boolean disableSourceText
1111
) throws IOException;
1212

13-
default byte[] compressPdf(
14-
byte[] pdfData,
13+
default byte[] compressPDF(
14+
byte[] fileBytes,
1515
Integer imageQuality,
1616
Boolean forceSourceTextCompression
1717
) throws IOException {
18-
return compressPdf(pdfData, imageQuality, forceSourceTextCompression, true);
18+
return compressPDF(fileBytes, imageQuality, forceSourceTextCompression, true);
1919
}
2020

21-
default byte[] compressPdf(byte[] pdfData, Integer imageQuality) throws IOException {
22-
return compressPdf(pdfData, imageQuality, false, true);
21+
default byte[] compressPDF(byte[] fileBytes, Integer imageQuality) throws IOException {
22+
return compressPDF(fileBytes, imageQuality, false, true);
2323
}
2424

25-
default byte[] compressPdf(byte[] pdfData) throws IOException {
26-
return compressPdf(pdfData, 85, false, true);
25+
default byte[] compressPDF(byte[] fileBytes) throws IOException {
26+
return compressPDF(fileBytes, 85, false, true);
2727
}
2828
}

0 commit comments

Comments
 (0)