@@ -25,16 +25,18 @@ public class ImageExtractor {
2525 private final String saveFormat ;
2626
2727 public ImageExtractor (LocalInputSource source ) throws IOException {
28- this . filename = source . getFilename ();
28+
2929 this .pageImages = new ArrayList <>();
3030
3131 if (source .isPDF ()) {
3232 this .saveFormat = "jpg" ;
33- var pdfPageImages = pdfToImages (source .getFile (), this . filename );
33+ var pdfPageImages = pdfToImages (source .getFile (), source . getFilename () );
3434 for (PDFPageImage pdfPageImage : pdfPageImages ) {
3535 this .pageImages .add (pdfPageImage .getImage ());
3636 }
37+ this .filename = source .getFilename () + "." + this .saveFormat ;
3738 } else {
39+ this .filename = source .getFilename ();
3840 String [] splitName = InputSourceUtils .splitNameStrict (this .filename );
3941 this .saveFormat = splitName [1 ].toLowerCase ();
4042
@@ -43,7 +45,7 @@ public ImageExtractor(LocalInputSource source) throws IOException {
4345 }
4446 }
4547
46- public List <PDFPageImage > pdfToImages (byte [] fileBytes , String filename ) throws IOException {
48+ private List <PDFPageImage > pdfToImages (byte [] fileBytes , String filename ) throws IOException {
4749 PDDocument document = Loader .loadPDF (fileBytes );
4850 var pdfRenderer = new PDFRenderer (document );
4951 List <PDFPageImage > pdfPageImages = new ArrayList <>();
@@ -90,7 +92,7 @@ public int getPageCount() {
9092 * @param pageIndex The page index to extract, begins at 0.
9193 * @return A list of {@link ExtractedImage}.
9294 */
93- public <FieldT extends PositionDataField > List < ExtractedImage > extractImagesFromPage (
95+ public <FieldT extends PositionDataField > ExtractedImages extractImagesFromPage (
9496 List <FieldT > fields ,
9597 int pageIndex
9698 ) {
@@ -106,7 +108,7 @@ public <FieldT extends PositionDataField> List<ExtractedImage> extractImagesFrom
106108 * @param outputName The base output filename, must have an image extension.
107109 * @return A list of {@link ExtractedImage}.
108110 */
109- public <FieldT extends PositionDataField > List < ExtractedImage > extractImagesFromPage (
111+ public <FieldT extends PositionDataField > ExtractedImages extractImagesFromPage (
110112 List <FieldT > fields ,
111113 int pageIndex ,
112114 String outputName
@@ -121,7 +123,7 @@ public <FieldT extends PositionDataField> List<ExtractedImage> extractImagesFrom
121123 return extractFromPage (fields , pageIndex , filename );
122124 }
123125
124- private <FieldT extends PositionDataField > List < ExtractedImage > extractFromPage (
126+ private <FieldT extends PositionDataField > ExtractedImages extractFromPage (
125127 List <FieldT > fields ,
126128 int pageIndex ,
127129 String outputName
@@ -131,7 +133,7 @@ private <FieldT extends PositionDataField> List<ExtractedImage> extractFromPage(
131133 .format ("%s_page-%3s.%s" , splitName [0 ], pageIndex + 1 , splitName [1 ])
132134 .replace (" " , "0" );
133135
134- var extractedImages = new ArrayList < ExtractedImage > ();
136+ var extractedImages = new ExtractedImages ();
135137 for (int i = 0 ; i < fields .size (); i ++) {
136138 ExtractedImage extractedImage = extractImage (fields .get (i ), pageIndex , i + 1 , filename );
137139 if (extractedImage != null ) {
@@ -171,7 +173,8 @@ public <FieldT extends PositionDataField> ExtractedImage extractImage(
171173 return new ExtractedImage (
172174 extractImage (polygon .getAsBbox (), pageIndex ),
173175 fieldFilename ,
174- saveFormat
176+ saveFormat ,
177+ pageIndex
175178 );
176179 }
177180
0 commit comments