Skip to content

Commit eec0d95

Browse files
committed
✨ add Crop for v2
1 parent 4f74a63 commit eec0d95

6 files changed

Lines changed: 137 additions & 10 deletions

File tree

src/main/java/com/mindee/image/ExtractedImage.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ public class ExtractedImage {
1717
private final BufferedImage image;
1818
private final String filename;
1919
private final String saveFormat;
20+
private final int pageId;
2021

2122
/**
2223
* Default constructor.
@@ -25,10 +26,11 @@ public class ExtractedImage {
2526
* @param filename Name of the extracted image.
2627
* @param saveFormat Format to save the image as, defaults to PNG.
2728
*/
28-
public ExtractedImage(BufferedImage image, String filename, String saveFormat) {
29+
public ExtractedImage(BufferedImage image, String filename, String saveFormat, int pageId) {
2930
this.image = image;
3031
this.filename = filename;
3132
this.saveFormat = saveFormat;
33+
this.pageId = pageId;
3234
}
3335

3436
/**
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
package com.mindee.image;
2+
3+
import java.io.IOException;
4+
import java.nio.file.Path;
5+
import java.util.ArrayList;
6+
7+
public class ExtractedImages extends ArrayList<ExtractedImage> {
8+
public void saveAllToDisk(String outputPath) throws IOException {
9+
saveAllToDisk(Path.of(outputPath));
10+
}
11+
12+
public void saveAllToDisk(Path outputPath) throws IOException {
13+
for (ExtractedImage image : this) {
14+
image.writeToFile(outputPath);
15+
}
16+
}
17+
}

src/main/java/com/mindee/image/ImageExtractor.java

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,16 +25,18 @@ public class ImageExtractor {
2525
private final String saveFormat;
2626

2727
public ImageExtractor(LocalInputSource source) throws IOException {
28-
this.filename = source.getFilename();
28+
2929
this.pageImages = new ArrayList<>();
3030

3131
if (source.isPDF()) {
3232
this.saveFormat = "jpg";
33-
var pdfPageImages = pdfToImages(source.getFile(), this.filename);
33+
var pdfPageImages = pdfToImages(source.getFile(), source.getFilename());
3434
for (PDFPageImage pdfPageImage : pdfPageImages) {
3535
this.pageImages.add(pdfPageImage.getImage());
3636
}
37+
this.filename = source.getFilename() + "." + this.saveFormat;
3738
} else {
39+
this.filename = source.getFilename();
3840
String[] splitName = InputSourceUtils.splitNameStrict(this.filename);
3941
this.saveFormat = splitName[1].toLowerCase();
4042

@@ -43,7 +45,7 @@ public ImageExtractor(LocalInputSource source) throws IOException {
4345
}
4446
}
4547

46-
public List<PDFPageImage> pdfToImages(byte[] fileBytes, String filename) throws IOException {
48+
private List<PDFPageImage> pdfToImages(byte[] fileBytes, String filename) throws IOException {
4749
PDDocument document = Loader.loadPDF(fileBytes);
4850
var pdfRenderer = new PDFRenderer(document);
4951
List<PDFPageImage> pdfPageImages = new ArrayList<>();
@@ -90,7 +92,7 @@ public int getPageCount() {
9092
* @param pageIndex The page index to extract, begins at 0.
9193
* @return A list of {@link ExtractedImage}.
9294
*/
93-
public <FieldT extends PositionDataField> List<ExtractedImage> extractImagesFromPage(
95+
public <FieldT extends PositionDataField> ExtractedImages extractImagesFromPage(
9496
List<FieldT> fields,
9597
int pageIndex
9698
) {
@@ -106,7 +108,7 @@ public <FieldT extends PositionDataField> List<ExtractedImage> extractImagesFrom
106108
* @param outputName The base output filename, must have an image extension.
107109
* @return A list of {@link ExtractedImage}.
108110
*/
109-
public <FieldT extends PositionDataField> List<ExtractedImage> extractImagesFromPage(
111+
public <FieldT extends PositionDataField> ExtractedImages extractImagesFromPage(
110112
List<FieldT> fields,
111113
int pageIndex,
112114
String outputName
@@ -121,7 +123,7 @@ public <FieldT extends PositionDataField> List<ExtractedImage> extractImagesFrom
121123
return extractFromPage(fields, pageIndex, filename);
122124
}
123125

124-
private <FieldT extends PositionDataField> List<ExtractedImage> extractFromPage(
126+
private <FieldT extends PositionDataField> ExtractedImages extractFromPage(
125127
List<FieldT> fields,
126128
int pageIndex,
127129
String outputName
@@ -131,7 +133,7 @@ private <FieldT extends PositionDataField> List<ExtractedImage> extractFromPage(
131133
.format("%s_page-%3s.%s", splitName[0], pageIndex + 1, splitName[1])
132134
.replace(" ", "0");
133135

134-
var extractedImages = new ArrayList<ExtractedImage>();
136+
var extractedImages = new ExtractedImages();
135137
for (int i = 0; i < fields.size(); i++) {
136138
ExtractedImage extractedImage = extractImage(fields.get(i), pageIndex, i + 1, filename);
137139
if (extractedImage != null) {
@@ -171,7 +173,8 @@ public <FieldT extends PositionDataField> ExtractedImage extractImage(
171173
return new ExtractedImage(
172174
extractImage(polygon.getAsBbox(), pageIndex),
173175
fieldFilename,
174-
saveFormat
176+
saveFormat,
177+
pageIndex
175178
);
176179
}
177180

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
package com.mindee.v2.fileOperations;
2+
3+
import com.mindee.image.ExtractedImage;
4+
import com.mindee.image.ExtractedImages;
5+
import com.mindee.image.ImageExtractor;
6+
import com.mindee.input.LocalInputSource;
7+
import com.mindee.v2.product.crop.CropItem;
8+
import java.io.IOException;
9+
import java.util.List;
10+
11+
public class Crop {
12+
private final ImageExtractor imageExtractor;
13+
14+
public Crop(LocalInputSource inputSource) throws IOException {
15+
this.imageExtractor = new ImageExtractor(inputSource);
16+
}
17+
18+
public ExtractedImage extractSingleCrop(CropItem cropItem) throws IOException {
19+
return this.imageExtractor
20+
.extractImage(cropItem.getLocation(), cropItem.getLocation().getPage(), 0);
21+
}
22+
23+
public ExtractedImages extractCrops(List<CropItem> cropItems) {
24+
var extractedImages = new ExtractedImages();
25+
for (int i = 0; i < cropItems.size(); i++) {
26+
var cropItem = cropItems.get(i);
27+
extractedImages
28+
.add(
29+
this.imageExtractor
30+
.extractImage(cropItem.getLocation(), cropItem.getLocation().getPage(), i + 1)
31+
);
32+
}
33+
return extractedImages;
34+
}
35+
}

src/test/java/com/mindee/v1/fileOperation/InvoiceSplitterAutoExtractionIT.java renamed to src/test/java/com/mindee/v1/fileOperations/InvoiceSplitterAutoExtractionIT.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package com.mindee.v1.fileOperation;
1+
package com.mindee.v1.fileOperations;
22

33
import static com.mindee.TestingUtilities.getV1ResourcePath;
44
import static com.mindee.TestingUtilities.levenshteinRatio;
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
package com.mindee.v2.fileOperations;
2+
3+
import static com.mindee.TestingUtilities.getResourcePath;
4+
import static com.mindee.TestingUtilities.getV2ResourcePath;
5+
import static org.junit.jupiter.api.Assertions.assertEquals;
6+
import static org.junit.jupiter.api.Assertions.assertTrue;
7+
8+
import com.mindee.input.LocalInputSource;
9+
import com.mindee.v2.parsing.LocalResponse;
10+
import com.mindee.v2.product.crop.CropResponse;
11+
import java.awt.image.BufferedImage;
12+
import java.nio.file.Files;
13+
import org.junit.jupiter.api.Test;
14+
15+
class CropTest {
16+
17+
@Test
18+
void processesSinglePageCropSplitCorrectly() throws Exception {
19+
var inputSample = new LocalInputSource(getV2ResourcePath("products/crop/default_sample.jpg"));
20+
var localResponse = new LocalResponse(getV2ResourcePath("products/crop/default_sample.json"));
21+
var doc = localResponse.deserializeResponse(CropResponse.class);
22+
23+
var extractedCrops = new Crop(inputSample)
24+
.extractCrops(doc.getInference().getResult().getCrops());
25+
26+
assertEquals(2, extractedCrops.size());
27+
28+
var crop0 = extractedCrops.get(0);
29+
assertEquals(0, crop0.getPageId());
30+
assertEquals("default_sample_001.jpg", crop0.getFilename());
31+
32+
assertEquals(1056, crop0.getImage().getWidth());
33+
assertEquals(2070, crop0.getImage().getHeight());
34+
35+
var outputPath = getResourcePath("output");
36+
extractedCrops.saveAllToDisk(outputPath);
37+
assertTrue(Files.exists(outputPath.resolve("default_sample_001.jpg")));
38+
assertTrue(Files.exists(outputPath.resolve("default_sample_002.jpg")));
39+
}
40+
41+
@Test
42+
void processesMultiPageReceiptSplitCorrectly() throws Exception {
43+
var inputSample = new LocalInputSource(getV2ResourcePath("products/crop/multipage_sample.pdf"));
44+
var localResponse = new LocalResponse(getV2ResourcePath("products/crop/multipage_sample.json"));
45+
var doc = localResponse.deserializeResponse(CropResponse.class);
46+
47+
var extractedCrops = new Crop(inputSample)
48+
.extractCrops(doc.getInference().getResult().getCrops());
49+
50+
assertEquals(5, extractedCrops.size());
51+
52+
var crop0 = extractedCrops.get(0);
53+
assertEquals(0, crop0.getPageId());
54+
assertEquals("multipage_sample.pdf_001.jpg", crop0.getFilename());
55+
assertEquals(555, crop0.getImage().getWidth());
56+
assertEquals(1533, crop0.getImage().getHeight());
57+
58+
var crop3 = extractedCrops.get(3);
59+
assertEquals(1, crop3.getPageId());
60+
BufferedImage bitmap3 = crop3.getImage();
61+
assertEquals("multipage_sample.pdf_004.jpg", crop3.getFilename());
62+
assertEquals(562, crop3.getImage().getWidth());
63+
assertEquals(974, crop3.getImage().getHeight());
64+
65+
var outputPath = getResourcePath("output");
66+
extractedCrops.saveAllToDisk(outputPath);
67+
assertTrue(Files.exists(outputPath.resolve("multipage_sample.pdf_001.jpg")));
68+
assertTrue(Files.exists(outputPath.resolve("multipage_sample.pdf_005.jpg")));
69+
}
70+
}

0 commit comments

Comments
 (0)