Skip to content

Commit 9d3b2c7

Browse files
authored
✨ add ability to extract sub-images from PDFs (#143)
1 parent 7d6245e commit 9d3b2c7

6 files changed

Lines changed: 172 additions & 49 deletions

File tree

src/main/java/com/mindee/CommandLineInterface.java

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import com.mindee.product.custom.CustomV1;
1212
import com.mindee.product.invoice.InvoiceV4;
1313
import com.mindee.product.invoicesplitter.InvoiceSplitterV1;
14+
import com.mindee.product.multireceiptsdetector.MultiReceiptsDetectorV1;
1415
import com.mindee.product.passport.PassportV1;
1516
import com.mindee.product.receipt.ReceiptV4;
1617
import java.io.File;
@@ -79,39 +80,47 @@ public static void main(String[] args) {
7980
System.exit(exitCode);
8081
}
8182

82-
@Command(name = "invoice", description = "Invokes the invoice API")
83+
@Command(name = "invoice", description = "Invokes the Invoice API")
8384
void invoiceMethod(
8485
@Parameters(index = "0", paramLabel = "<path>", scope = ScopeType.LOCAL)
8586
File file
8687
) throws IOException {
8788
System.out.println(standardProductOutput(InvoiceV4.class, file));
8889
}
8990

90-
@Command(name = "receipt", description = "Invokes the receipt API")
91+
@Command(name = "receipt", description = "Invokes the Expense Receipt API")
9192
void receiptMethod(
9293
@Parameters(index = "0", paramLabel = "<path>", scope = ScopeType.LOCAL)
9394
File file
9495
) throws IOException {
9596
System.out.println(standardProductOutput(ReceiptV4.class, file));
9697
}
9798

98-
@Command(name = "passport", description = "Invokes the passport API")
99+
@Command(name = "multi-receipt-detector", description = "Invokes the Multi Receipts Detector API")
100+
void multiReceiptDetectorMethod(
101+
@Parameters(index = "0", paramLabel = "<path>", scope = ScopeType.LOCAL)
102+
File file
103+
) throws IOException {
104+
System.out.println(standardProductOutput(MultiReceiptsDetectorV1.class, file));
105+
}
106+
107+
@Command(name = "passport", description = "Invokes the Passport API")
99108
void passportMethod(
100109
@Parameters(index = "0", paramLabel = "<path>", scope = ScopeType.LOCAL)
101110
File file
102111
) throws IOException {
103112
System.out.println(standardProductOutput(PassportV1.class, file));
104113
}
105114

106-
@Command(name = "invoice-splitter", description = "Invokes the invoice-splitter API")
115+
@Command(name = "invoice-splitter", description = "Invokes the Invoice Splitter API")
107116
void invoiceSplitterMethod(
108117
@Parameters(index = "0", paramLabel = "<path>", scope = ScopeType.LOCAL)
109118
File file
110119
) throws IOException, InterruptedException {
111120
System.out.println(standardProductAsyncOutput(InvoiceSplitterV1.class, file));
112121
}
113122

114-
@Command(name = "custom", description = "Invokes a builder API")
123+
@Command(name = "custom", description = "Invokes a Custom API")
115124
void customMethod(
116125
@Option(
117126
names = {"-a", "--account"},

src/main/java/com/mindee/MindeeClient.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -492,8 +492,7 @@ private byte[] getSplitFile(
492492
PageOptions pageOptions
493493
) throws IOException {
494494
byte[] splitFile;
495-
boolean isPDF = InputSourceUtils.isPdf(localInputSource.getFilename());
496-
if (pageOptions == null || !isPDF) {
495+
if (pageOptions == null || !localInputSource.isPdf()) {
497496
splitFile = localInputSource.getFile();
498497
} else {
499498
splitFile = pdfOperation.split(

src/main/java/com/mindee/extraction/ImageExtractor.java

Lines changed: 79 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@
66
import com.mindee.input.InputSourceUtils;
77
import com.mindee.input.LocalInputSource;
88
import com.mindee.parsing.standard.PositionData;
9+
import com.mindee.pdf.PDFUtils;
10+
import com.mindee.pdf.PdfPageImage;
911
import java.awt.image.BufferedImage;
1012
import java.io.ByteArrayInputStream;
11-
import java.io.File;
1213
import java.io.IOException;
1314
import java.util.ArrayList;
1415
import java.util.List;
@@ -18,17 +19,16 @@
1819
* Extract sub-images from an image.
1920
*/
2021
public class ImageExtractor {
21-
private final BufferedImage bufferedImage;
22+
private final List<BufferedImage> pageImages;
2223
private final String filename;
24+
private final String saveFormat;
2325

2426
/**
2527
* Init from a path.
2628
* @param filePath Path to the file.
2729
*/
2830
public ImageExtractor(String filePath) throws IOException {
29-
File file = new File(filePath);
30-
this.filename = file.getName();
31-
this.bufferedImage = ImageIO.read(file);
31+
this(new LocalInputSource(filePath));
3232
}
3333

3434
/**
@@ -37,29 +37,79 @@ public ImageExtractor(String filePath) throws IOException {
3737
*/
3838
public ImageExtractor(LocalInputSource source) throws IOException {
3939
this.filename = source.getFilename();
40-
ByteArrayInputStream input = new ByteArrayInputStream(source.getFile());
41-
this.bufferedImage = ImageIO.read(input);
40+
this.pageImages = new ArrayList<>();
41+
42+
if (source.isPdf()) {
43+
this.saveFormat = "jpg";
44+
List<PdfPageImage> pdfPageImages = PDFUtils.pdfToImages(source);
45+
for (PdfPageImage pdfPageImage : pdfPageImages) {
46+
this.pageImages.add(pdfPageImage.getImage());
47+
}
48+
} else {
49+
String[] splitName = InputSourceUtils.splitNameStrict(this.filename);
50+
this.saveFormat = splitName[1].toLowerCase();
51+
52+
ByteArrayInputStream input = new ByteArrayInputStream(source.getFile());
53+
this.pageImages.add(ImageIO.read(input));
54+
}
55+
}
56+
57+
/**
58+
* @return The number of pages in the file.
59+
*/
60+
public int getPageCount() {
61+
return this.pageImages.size();
4262
}
4363

4464
/**
4565
* Extract images from a list of fields having position data.
66+
* Use this when the input file is a PDF with multiple pages.
4667
* @param fields List of Fields to extract.
68+
* @param pageIndex The page index to extract, begins at 0.
4769
* @return A list of {@link ExtractedImage}.
4870
*/
49-
public <FieldT extends PositionData> List<ExtractedImage> extractImages(List<FieldT> fields) {
50-
return extractImages(fields, this.filename);
71+
public <FieldT extends PositionData> List<ExtractedImage> extractImagesFromPage(
72+
List<FieldT> fields,
73+
int pageIndex
74+
) {
75+
return extractImagesFromPage(fields, pageIndex, this.filename);
5176
}
5277

5378
/**
5479
* Extract images from a list of fields having position data.
80+
* Use this when the input file is a PDF with multiple pages.
5581
* @param fields List of Fields to extract.
56-
* @param filename The base output filename.
82+
* @param pageIndex The page index to extract, begins at 0.
83+
* @param outputName The base output filename, must have an image extension.
5784
* @return A list of {@link ExtractedImage}.
5885
*/
59-
public <FieldT extends PositionData> List<ExtractedImage> extractImages(List<FieldT> fields, String filename) {
86+
public <FieldT extends PositionData> List<ExtractedImage> extractImagesFromPage(
87+
List<FieldT> fields,
88+
int pageIndex,
89+
String outputName
90+
) {
91+
String filename;
92+
if (this.getPageCount() > 1) {
93+
String[] splitName = InputSourceUtils.splitNameStrict(outputName);
94+
filename = splitName[0] + "." + this.saveFormat;
95+
} else {
96+
filename = outputName;
97+
}
98+
return extractFromPage(fields, pageIndex, filename);
99+
}
100+
101+
private <FieldT extends PositionData> List<ExtractedImage> extractFromPage(
102+
List<FieldT> fields,
103+
int pageIndex,
104+
String outputName
105+
) {
106+
String[] splitName = InputSourceUtils.splitNameStrict(outputName);
107+
String filename = String.format("%s_page-%3s.%s", splitName[0], pageIndex + 1, splitName[1])
108+
.replace(" ", "0");
109+
60110
List<ExtractedImage> extractedImages = new ArrayList<>();
61111
for (int i = 0; i < fields.size(); i++) {
62-
ExtractedImage extractedImage = extractImage(fields.get(i), filename, i+1);
112+
ExtractedImage extractedImage = extractImage(fields.get(i), pageIndex, i+1, filename);
63113
if (extractedImage != null) {
64114
extractedImages.add(extractedImage);
65115
}
@@ -71,9 +121,15 @@ public <FieldT extends PositionData> List<ExtractedImage> extractImages(List<Fie
71121
* Extract an image from a field having position data.
72122
* @param field The field to extract.
73123
* @param index The index to use for naming the extracted image.
124+
* @param pageIndex The page index to extract, begins at 0.
74125
* @return The {@link ExtractedImage}, or <code>null</code> if the field does not have valid position data.
75126
*/
76-
public <FieldT extends PositionData> ExtractedImage extractImage(FieldT field, String filename, int index) {
127+
public <FieldT extends PositionData> ExtractedImage extractImage(
128+
FieldT field,
129+
int pageIndex,
130+
int index,
131+
String filename
132+
) {
77133
String[] splitName = InputSourceUtils.splitNameStrict(filename);
78134
String saveFormat = splitName[1].toLowerCase();
79135
Polygon boundingBox = field.getBoundingBox();
@@ -84,27 +140,29 @@ public <FieldT extends PositionData> ExtractedImage extractImage(FieldT field, S
84140
String fieldFilename = splitName[0]
85141
+ String.format("_%3s", index).replace(" ", "0")
86142
+ "."
87-
+ splitName[1];
88-
return new ExtractedImage(extractImage(bbox), fieldFilename, saveFormat);
143+
+ saveFormat;
144+
return new ExtractedImage(extractImage(bbox, pageIndex), fieldFilename, saveFormat);
89145
}
90146

91147
/**
92148
* Extract an image from a field having position data.
93149
* @param field The field to extract.
94150
* @param index The index to use for naming the extracted image.
151+
* @param pageIndex The page index to extract, begins at 0.
95152
* @return The {@link ExtractedImage}, or <code>null</code> if the field does not have valid position data.
96153
*/
97-
public <FieldT extends PositionData> ExtractedImage extractImage(FieldT field, int index) {
98-
return extractImage(field, this.filename, index);
154+
public <FieldT extends PositionData> ExtractedImage extractImage(FieldT field, int pageIndex, int index) {
155+
return extractImage(field, pageIndex, index, this.filename);
99156
}
100157

101-
private BufferedImage extractImage(Bbox bbox) {
102-
int width = this.bufferedImage.getWidth();
103-
int height = this.bufferedImage.getHeight();
158+
private BufferedImage extractImage(Bbox bbox, int pageIndex) {
159+
BufferedImage image = this.pageImages.get(pageIndex);
160+
int width = image.getWidth();
161+
int height = image.getHeight();
104162
int minX = (int) Math.round(bbox.getMinX() * width);
105163
int maxX = (int) Math.round(bbox.getMaxX() * width);
106164
int minY = (int) Math.round(bbox.getMinY() * height);
107165
int maxY = (int) Math.round(bbox.getMaxY() * height);
108-
return this.bufferedImage.getSubimage(minX, minY, maxX - minX, maxY - minY);
166+
return image.getSubimage(minX, minY, maxX - minX, maxY - minY);
109167
}
110168
}

src/main/java/com/mindee/input/LocalInputSource.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,8 @@ public LocalInputSource(String fileAsBase64, String filename) {
4242
this.file = Base64.getDecoder().decode(fileAsBase64.getBytes());
4343
this.filename = filename;
4444
}
45+
46+
public boolean isPdf() {
47+
return InputSourceUtils.isPdf(this.filename);
48+
}
4549
}

src/test/java/com/mindee/extraction/ImageExtractorTest.java

Lines changed: 73 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22

33
import com.fasterxml.jackson.databind.JavaType;
44
import com.fasterxml.jackson.databind.ObjectMapper;
5+
import com.mindee.MindeeException;
56
import com.mindee.input.LocalInputSource;
7+
import com.mindee.parsing.common.Page;
68
import com.mindee.parsing.common.PredictResponse;
79
import com.mindee.product.barcodereader.BarcodeReaderV1;
810
import com.mindee.product.barcodereader.BarcodeReaderV1Document;
@@ -51,39 +53,90 @@ public void givenAnImage_shouldExtractPositionFields() throws IOException {
5153
"src/test/resources/products/multi_receipts_detector/default_sample.jpg"
5254
);
5355
PredictResponse<MultiReceiptsDetectorV1> response = getMultiReceiptsPrediction("complete");
54-
MultiReceiptsDetectorV1Document prediction = response.getDocument().getInference().getPrediction();
56+
MultiReceiptsDetectorV1 inference = response.getDocument().getInference();
5557

5658
ImageExtractor extractor = new ImageExtractor(image);
57-
List<ExtractedImage> subImages = extractor.extractImages(prediction.getReceipts());
58-
for (int i = 0; i < subImages.size(); i++) {
59-
ExtractedImage extractedImage = subImages.get(i);
60-
Assertions.assertNotNull(extractedImage.getImage());
61-
extractedImage.writeToFile("src/test/resources/output/");
62-
63-
LocalInputSource source = extractedImage.asInputSource();
64-
Assertions.assertEquals(
65-
String.format("default_sample_%3s.jpg", i + 1).replace(" ", "0"),
66-
source.getFilename()
59+
Assertions.assertEquals(1, extractor.getPageCount());
60+
61+
for (Page<MultiReceiptsDetectorV1Document> page : inference.getPages()) {
62+
List<ExtractedImage> subImages = extractor.extractImagesFromPage(
63+
page.getPrediction().getReceipts(), page.getPageId()
6764
);
65+
for (int i = 0; i < subImages.size(); i++) {
66+
ExtractedImage extractedImage = subImages.get(i);
67+
Assertions.assertNotNull(extractedImage.getImage());
68+
extractedImage.writeToFile("src/test/resources/output/");
69+
70+
LocalInputSource source = extractedImage.asInputSource();
71+
Assertions.assertEquals(
72+
String.format("default_sample_page-001_%3s.jpg", i + 1).replace(" ", "0"),
73+
source.getFilename()
74+
);
75+
}
6876
}
6977
}
7078

7179
@Test
7280
public void givenAnImage_shouldExtractValueFields() throws IOException {
7381
String imagePath = "src/test/resources/products/barcode_reader/default_sample.jpg";
7482
PredictResponse<BarcodeReaderV1> response = getBarcodeReaderPrediction("complete");
75-
BarcodeReaderV1Document prediction = response.getDocument().getInference().getPrediction();
83+
BarcodeReaderV1 inference = response.getDocument().getInference();
7684

7785
ImageExtractor extractor = new ImageExtractor(imagePath);
78-
List<ExtractedImage> codes1D = extractor.extractImages(prediction.getCodes1D(), "barcodes_1D.png");
79-
for (ExtractedImage extractedImage : codes1D) {
80-
Assertions.assertNotNull(extractedImage.getImage());
81-
extractedImage.writeToFile("src/test/resources/output/");
86+
Assertions.assertEquals(1, extractor.getPageCount());
87+
88+
for (Page<BarcodeReaderV1Document> page : inference.getPages()) {
89+
List<ExtractedImage> codes1D = extractor.extractImagesFromPage(
90+
page.getPrediction().getCodes1D(), page.getPageId(), "barcodes_1D.png"
91+
);
92+
for (int i = 0; i < codes1D.size(); i++) {
93+
ExtractedImage extractedImage = codes1D.get(i);
94+
Assertions.assertNotNull(extractedImage.getImage());
95+
LocalInputSource source = extractedImage.asInputSource();
96+
Assertions.assertEquals(
97+
String.format("barcodes_1D_page-001_%3s.png", i + 1).replace(" ", "0"),
98+
source.getFilename()
99+
);
100+
extractedImage.writeToFile("src/test/resources/output/");
101+
}
102+
List<ExtractedImage> codes2D = extractor.extractImagesFromPage(
103+
page.getPrediction().getCodes2D(), page.getPageId(),"barcodes_2D.png"
104+
);
105+
for (ExtractedImage extractedImage : codes2D) {
106+
Assertions.assertNotNull(extractedImage.getImage());
107+
extractedImage.writeToFile("src/test/resources/output/");
108+
}
82109
}
83-
List<ExtractedImage> codes2D = extractor.extractImages(prediction.getCodes2D(), "barcodes_2D.png");
84-
for (ExtractedImage extractedImage : codes2D) {
85-
Assertions.assertNotNull(extractedImage.getImage());
86-
extractedImage.writeToFile("src/test/resources/output/");
110+
}
111+
112+
@Test
113+
public void givenAPdf_shouldExtractPositionFields() throws IOException {
114+
LocalInputSource image = new LocalInputSource(
115+
"src/test/resources/products/multi_receipts_detector/multipage_sample.pdf"
116+
);
117+
PredictResponse<MultiReceiptsDetectorV1> response = getMultiReceiptsPrediction("multipage_sample");
118+
MultiReceiptsDetectorV1 inference = response.getDocument().getInference();
119+
120+
ImageExtractor extractor = new ImageExtractor(image);
121+
Assertions.assertEquals(2, extractor.getPageCount());
122+
123+
for (Page<MultiReceiptsDetectorV1Document> page : inference.getPages()) {
124+
List<ExtractedImage> subImages = extractor.extractImagesFromPage(
125+
page.getPrediction().getReceipts(),
126+
page.getPageId()
127+
);
128+
129+
for (int i = 0; i < subImages.size(); i++) {
130+
ExtractedImage extractedImage = subImages.get(i);
131+
Assertions.assertNotNull(extractedImage.getImage());
132+
extractedImage.writeToFile("src/test/resources/output/");
133+
134+
LocalInputSource source = extractedImage.asInputSource();
135+
Assertions.assertEquals(
136+
String.format("multipage_sample_page-%3s_%3s.jpg", page.getPageId() + 1, i + 1).replace(" ", "0"),
137+
source.getFilename()
138+
);
139+
}
87140
}
88141
}
89142
}

0 commit comments

Comments
 (0)