mindee
diff --git a/‎src/main/java/com/mindee/input/LocalInputSource.java‎
Lines changed: 17 additions & 7 deletions b/‎src/main/java/com/mindee/input/LocalInputSource.java‎
Lines changed: 17 additions & 7 deletions
diff --git a/‎…in/java/com/mindee/pdf/PDFExtractor.java‎ ‎…ava/com/mindee/pdf/BasePDFExtractor.java‎src/main/java/com/mindee/pdf/PDFExtractor.java renamed to src/main/java/com/mindee/pdf/BasePDFExtractor.java
Lines changed: 7 additions & 82 deletions b/‎…in/java/com/mindee/pdf/PDFExtractor.java‎ ‎…ava/com/mindee/pdf/BasePDFExtractor.java‎src/main/java/com/mindee/pdf/PDFExtractor.java renamed to src/main/java/com/mindee/pdf/BasePDFExtractor.java
Lines changed: 7 additions & 82 deletions
diff --git a/‎…/main/java/com/mindee/pdf/PdfBoxApi.java‎ ‎…/main/java/com/mindee/pdf/PDFBoxApi.java‎src/main/java/com/mindee/pdf/PdfBoxApi.java renamed to src/main/java/com/mindee/pdf/PDFBoxApi.java
Lines changed: 11 additions & 16 deletions b/‎…/main/java/com/mindee/pdf/PdfBoxApi.java‎ ‎…/main/java/com/mindee/pdf/PDFBoxApi.java‎src/main/java/com/mindee/pdf/PdfBoxApi.java renamed to src/main/java/com/mindee/pdf/PDFBoxApi.java
Lines changed: 11 additions & 16 deletions
diff --git a/‎…n/java/com/mindee/pdf/PdfCompressor.java‎ ‎…n/java/com/mindee/pdf/PDFCompressor.java‎src/main/java/com/mindee/pdf/PdfCompressor.java renamed to src/main/java/com/mindee/pdf/PDFCompressor.java
Lines changed: 6 additions & 7 deletions b/‎…n/java/com/mindee/pdf/PdfCompressor.java‎ ‎…n/java/com/mindee/pdf/PDFCompressor.java‎src/main/java/com/mindee/pdf/PdfCompressor.java renamed to src/main/java/com/mindee/pdf/PDFCompressor.java
Lines changed: 6 additions & 7 deletions
diff --git a/‎…in/java/com/mindee/pdf/PdfOperation.java‎ ‎…in/java/com/mindee/pdf/PDFOperation.java‎src/main/java/com/mindee/pdf/PdfOperation.java renamed to src/main/java/com/mindee/pdf/PDFOperation.java
Lines changed: 2 additions & 2 deletions b/‎…in/java/com/mindee/pdf/PdfOperation.java‎ ‎…in/java/com/mindee/pdf/PDFOperation.java‎src/main/java/com/mindee/pdf/PdfOperation.java renamed to src/main/java/com/mindee/pdf/PDFOperation.java
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/main/java/com/mindee/pdf/PDFUtils.java‎
Lines changed: 5 additions & 5 deletions b/‎src/main/java/com/mindee/pdf/PDFUtils.java‎
Lines changed: 5 additions & 5 deletions
@@ -1,10 +1,10 @@
 package com.mindee.input;
 
 import com.mindee.image.ImageCompressor;
+import com.mindee.pdf.PDFBoxApi;
+import com.mindee.pdf.PDFCompressor;
+import com.mindee.pdf.PDFOperation;
 import com.mindee.pdf.PDFUtils;
-import com.mindee.pdf.PdfBoxApi;
-import com.mindee.pdf.PdfCompressor;
-import com.mindee.pdf.PdfOperation;
 import com.mindee.pdf.SplitQuery;
 import java.io.File;
 import java.io.IOException;
@@ -13,16 +13,20 @@
 import java.nio.file.Path;
 import java.util.Base64;
 import lombok.Getter;
+import lombok.Setter;
 import org.apache.pdfbox.io.IOUtils;
 
 /**
  * A source document for Mindee API operations.
  */
-@Getter
 public final class LocalInputSource {
 
+  @Getter
   private byte[] file;
+  @Getter
   private final String filename;
+  @Setter
+  private PDFOperation pdfOperation;
 
   public LocalInputSource(InputStream file, String filename) throws IOException {
     this.file = IOUtils.toByteArray(file);
@@ -55,6 +59,13 @@ public LocalInputSource(String fileAsBase64, String filename) {
     this.filename = filename;
   }
 
+  public PDFOperation getPdfOperation() {
+    if (this.pdfOperation == null) {
+      this.pdfOperation = new PDFBoxApi();
+    }
+    return this.pdfOperation;
+  }
+
   /**
    * Get the number of pages in the document.
    *
@@ -76,8 +87,7 @@ public int getPageCount() throws IOException {
    */
   public void applyPageOptions(PageOptions pageOptions) throws IOException {
     if (pageOptions != null && this.isPdf()) {
-      PdfOperation pdfOperation = new PdfBoxApi();
-      this.file = pdfOperation.split(new SplitQuery(this.file, pageOptions)).getFile();
+      this.file = getPdfOperation().split(new SplitQuery(this.file, pageOptions)).getFile();
     }
   }
 
@@ -97,7 +107,7 @@ public void compress(
       Boolean disableSourceText
   ) throws IOException {
     if (isPdf()) {
-      this.file = PdfCompressor.compressPdf(this.file, quality, forceSourceText, disableSourceText);
+      this.file = PDFCompressor.compressPdf(this.file, quality, forceSourceText, disableSourceText);
     } else {
       this.file = ImageCompressor.compressImage(this.file, quality, maxWidth, maxHeight);
     }
 
@@ -5,14 +5,11 @@
 import com.mindee.MindeeException;
 import com.mindee.input.InputSourceUtils;
 import com.mindee.input.LocalInputSource;
-import com.mindee.v1.product.invoicesplitter.InvoiceSplitterV1InvoicePageGroup;
 import java.awt.image.BufferedImage;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Iterator;
 import java.util.List;
-import java.util.stream.Collectors;
 import javax.imageio.ImageIO;
 import org.apache.pdfbox.Loader;
 import org.apache.pdfbox.pdmodel.PDDocument;
@@ -24,33 +21,23 @@
 /**
  * PDF extraction class.
  */
-public class PDFExtractor {
-  private final PDDocument sourcePdf;
-  private final String filename;
-
-  /**
-   * Init from a path.
-   *
-   * @param filePath Path to the file.
-   * @throws IOException Throws if the file can't be accessed.
-   */
-  public PDFExtractor(String filePath) throws IOException {
-    this(new LocalInputSource(filePath));
-  }
+public class BasePDFExtractor {
+  protected final PDDocument sourcePdf;
+  protected final String filename;
 
   /**
    * Init from a {@link LocalInputSource}.
    *
    * @param source The local source.
    * @throws IOException Throws if the file can't be accessed.
    */
-  public PDFExtractor(LocalInputSource source) throws IOException {
+  protected BasePDFExtractor(LocalInputSource source) throws IOException {
     this.filename = source.getFilename();
     if (source.isPdf()) {
       this.sourcePdf = Loader.loadPDF(source.getFile());
     } else {
-      PDDocument document = new PDDocument();
-      PDPage page = new PDPage();
+      var document = new PDDocument();
+      var page = new PDPage();
       document.addPage(page);
       BufferedImage bufferedImage = byteArrayToBufferedImage(source.getFile());
       PDImageXObject pdImage = LosslessFactory.createFromImage(document, bufferedImage);
@@ -65,7 +52,6 @@ public PDFExtractor(LocalInputSource source) throws IOException {
           );
       }
       this.sourcePdf = document;
-
     }
   }
 
@@ -101,7 +87,7 @@ public static BufferedImage byteArrayToBufferedImage(byte[] byteArray) throws IO
   public List<ExtractedPDF> extractSubDocuments(
       List<List<Integer>> pageIndexes
   ) throws IOException {
-    List<ExtractedPDF> extractedPDFs = new ArrayList<>();
+    var extractedPDFs = new ArrayList<ExtractedPDF>();
 
     for (List<Integer> pageIndexElement : pageIndexes) {
       if (pageIndexElement.isEmpty()) {
@@ -126,65 +112,4 @@ public List<ExtractedPDF> extractSubDocuments(
     }
     return extractedPDFs;
   }
-
-  /**
-   * Extract invoices from the given page indexes (from an invoice-splitter prediction).
-   *
-   * @param pageIndexes List of page indexes.
-   * @return a list of extracted files.
-   * @throws IOException Throws if the file can't be accessed.
-   */
-  public List<ExtractedPDF> extractInvoices(
-      List<InvoiceSplitterV1InvoicePageGroup> pageIndexes
-  ) throws IOException {
-
-    List<List<Integer>> indexes = pageIndexes
-      .stream()
-      .map(InvoiceSplitterV1InvoicePageGroup::getPageIndexes)
-      .collect(Collectors.toList());
-
-    return extractSubDocuments(indexes);
-  }
-
-  /**
-   * Extract invoices from the given page indexes (from an invoice-splitter prediction).
-   *
-   * @param pageIndexes List of page indexes.
-   * @param strict Whether the extraction should strictly follow the confidence scores or not.
-   * @return a list of extracted files.
-   * @throws IOException Throws if the file can't be accessed.
-   */
-  public List<ExtractedPDF> extractInvoices(
-      List<InvoiceSplitterV1InvoicePageGroup> pageIndexes,
-      boolean strict
-  ) throws IOException {
-    List<List<Integer>> correctPageIndexes = new ArrayList<>();
-    if (!strict) {
-      return extractInvoices(pageIndexes);
-    }
-    Iterator<InvoiceSplitterV1InvoicePageGroup> iterator = pageIndexes.iterator();
-    List<Integer> currentList = new ArrayList<>();
-    Double previousConfidence = null;
-    while (iterator.hasNext()) {
-      InvoiceSplitterV1InvoicePageGroup pageIndex = iterator.next();
-      Double confidence = pageIndex.getConfidence();
-      List<Integer> pageList = pageIndex.getPageIndexes();
-
-      if (confidence == 1.0 && previousConfidence == null) {
-        currentList = new ArrayList<>(pageList);
-      } else if (confidence == 1.0) {
-        correctPageIndexes.add(currentList);
-        currentList = new ArrayList<>(pageList);
-      } else if (confidence == 0.0 && !iterator.hasNext()) {
-        currentList.addAll(pageList);
-        correctPageIndexes.add(currentList);
-      } else {
-        correctPageIndexes.add(currentList);
-        correctPageIndexes.add(pageList);
-      }
-      previousConfidence = confidence;
-    }
-    return extractSubDocuments(correctPageIndexes);
-  }
-
 }
@@ -11,32 +11,30 @@
 import java.util.Set;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
-import java.util.stream.Stream;
 import org.apache.pdfbox.Loader;
 import org.apache.pdfbox.pdmodel.PDDocument;
 
 /**
  * Allows performing various operations on PDFs.
  */
-public final class PdfBoxApi implements PdfOperation {
+public final class PDFBoxApi implements PDFOperation {
 
   @Override
-  public SplitPdf split(SplitQuery splitQuery) throws IOException {
+  public SplitPDF split(SplitQuery splitQuery) throws IOException {
 
     if (!checkPdfOpen(splitQuery.getFile())) {
       throw new MindeeException("This document cannot be open and cannot be split.");
     }
 
-    try (PDDocument originalDocument = Loader.loadPDF(splitQuery.getFile())) {
-      try (PDDocument splitDocument = new PDDocument()) {
+    try (var originalDocument = Loader.loadPDF(splitQuery.getFile())) {
+      try (var splitDocument = new PDDocument()) {
         int totalOriginalPages = countPages(splitQuery.getFile());
 
         if (totalOriginalPages < splitQuery.getPageOptions().getOnMinPages()) {
-          return new SplitPdf(splitQuery.getFile(), totalOriginalPages);
+          return new SplitPDF(splitQuery.getFile(), totalOriginalPages);
         }
 
-        List<Integer> pageRange = getPageRanges(splitQuery.getPageOptions(), totalOriginalPages);
-
+        var pageRange = getPageRanges(splitQuery.getPageOptions(), totalOriginalPages);
         pageRange
           .stream()
           .filter(i -> i < totalOriginalPages)
@@ -45,7 +43,7 @@ public SplitPdf split(SplitQuery splitQuery) throws IOException {
         try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) {
           splitDocument.save(outputStream);
           byte[] splitPdf = outputStream.toByteArray();
-          return new SplitPdf(splitPdf, countPages(splitPdf));
+          return new SplitPDF(splitPdf, countPages(splitPdf));
         }
       }
     }
@@ -55,12 +53,12 @@ private List<Integer> getPageRanges(PageOptions pageOptions, Integer numberOfPag
 
     Set<Integer> pages = Optional
       .ofNullable(pageOptions.getPageIndexes())
-      .map(Collection::stream)
-      .orElseGet(Stream::empty)
+      .stream()
+      .flatMap(Collection::stream)
       .filter(x -> x > (numberOfPages) * (-1) && x <= (numberOfPages - 1))
       .map(x -> (numberOfPages + x) % numberOfPages)
       .collect(Collectors.toSet());
-    List<Integer> allPages = IntStream.range(0, numberOfPages).boxed().collect(Collectors.toList());
+    var allPages = IntStream.range(0, numberOfPages).boxed().collect(Collectors.toList());
 
     switch (pageOptions.getOperation()) {
       case KEEP_ONLY:
@@ -85,9 +83,6 @@ private boolean checkPdfOpen(byte[] documentFile) {
   }
 
   private int countPages(byte[] documentFile) throws IOException {
-    PDDocument document = Loader.loadPDF(documentFile);
-    int pageCount = document.getNumberOfPages();
-    document.close();
-    return pageCount;
+    return PDFUtils.getNumberOfPages(documentFile);
   }
 }
@@ -11,14 +11,13 @@
 import org.apache.pdfbox.pdmodel.PDPageContentStream;
 import org.apache.pdfbox.pdmodel.common.PDRectangle;
 import org.apache.pdfbox.pdmodel.graphics.image.JPEGFactory;
-import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
 import org.apache.pdfbox.rendering.ImageType;
 import org.apache.pdfbox.rendering.PDFRenderer;
 
 /**
  * PDF compression class.
  */
-public class PdfCompressor {
+public class PDFCompressor {
   public static byte[] compressPdf(
       byte[] pdfData,
       Integer imageQuality,
@@ -44,10 +43,10 @@ public static byte[] compressPdf(
     }
     try (PDDocument inputDoc = Loader.loadPDF(pdfData); PDDocument outputDoc = new PDDocument()) {
 
-      PDFRenderer pdfRenderer = new PDFRenderer(inputDoc);
+      var pdfRenderer = new PDFRenderer(inputDoc);
 
       for (int pageIndex = 0; pageIndex < inputDoc.getNumberOfPages(); pageIndex++) {
-        PDPage originalPage = inputDoc.getPage(pageIndex);
+        var originalPage = inputDoc.getPage(pageIndex);
         PDRectangle originalPageSize = originalPage.getMediaBox();
 
         processPage(
@@ -92,12 +91,12 @@ private static void processPage(
       PDRectangle originalPageSize,
       Boolean disableSourceText
   ) throws IOException {
-    PDPage newPage = new PDPage(originalPageSize);
+    var newPage = new PDPage(originalPageSize);
     outputDoc.addPage(newPage);
 
-    PDImageXObject pdImage = JPEGFactory.createFromImage(outputDoc, image, imageQuality);
+    var pdImage = JPEGFactory.createFromImage(outputDoc, image, imageQuality);
 
-    try (PDPageContentStream contentStream = new PDPageContentStream(outputDoc, newPage)) {
+    try (var contentStream = new PDPageContentStream(outputDoc, newPage)) {
       PDFUtils.addImageToPage(contentStream, pdImage, originalPageSize);
       PDFUtils.extractAndAddText(originalDocument, contentStream, pageIndex, disableSourceText);
     }
 
@@ -5,13 +5,13 @@
 /**
  * Minimum PDF operations.
  */
-public interface PdfOperation {
+public interface PDFOperation {
 
   /**
    * Split a PDF file.
    *
    * @param splitQuery Options to perform the query.
    * @return The split PDF.
    */
-  SplitPdf split(SplitQuery splitQuery) throws IOException;
+  SplitPDF split(SplitQuery splitQuery) throws IOException;
 }
@@ -69,8 +69,8 @@ private static byte[] createPdfFromExistingPdf(
       List<Integer> pageNumbers,
       boolean closeOriginal
   ) throws IOException {
-    ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
-    PDDocument newDocument = new PDDocument();
+    var outputStream = new ByteArrayOutputStream();
+    var newDocument = new PDDocument();
     int pageCount = document.getNumberOfPages();
     pageNumbers
       .stream()
@@ -161,7 +161,7 @@ public static List<PdfPageImage> pdfToImages(String filePath) throws IOException
    */
   public static List<PdfPageImage> pdfToImages(LocalInputSource source) throws IOException {
     PDDocument document = Loader.loadPDF(source.getFile());
-    PDFRenderer pdfRenderer = new PDFRenderer(document);
+    var pdfRenderer = new PDFRenderer(document);
     List<PdfPageImage> pdfPageImages = new ArrayList<>();
     for (int i = 0; i < document.getNumberOfPages(); i++) {
       BufferedImage imageBuffer = pdfPageToImageBuffer(i, document, pdfRenderer);
@@ -201,7 +201,7 @@ public static PdfPageImage pdfPageToImage(
   ) throws IOException {
     int index = pageNumber - 1;
     PDDocument document = Loader.loadPDF(source.getFile());
-    PDFRenderer pdfRenderer = new PDFRenderer(document);
+    var pdfRenderer = new PDFRenderer(document);
     BufferedImage imageBuffer = pdfPageToImageBuffer(index, document, pdfRenderer);
     document.close();
     return new PdfPageImage(imageBuffer, index, source.getFilename(), "jpg");
@@ -226,7 +226,7 @@ private static BufferedImage pdfPageToImageBuffer(
   }
 
   public static byte[] documentToBytes(PDDocument document) throws IOException {
-    ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
+    var outputStream = new ByteArrayOutputStream();
     document.save(outputStream);
     return outputStream.toByteArray();
   }