Skip to content

Commit 692bd71

Browse files
committed
♻️ rework URL inputs (#318)
1 parent b91646a commit 692bd71

10 files changed

Lines changed: 97 additions & 89 deletions

File tree

src/main/java/com/mindee/input/InputSourceUtils.java

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import com.mindee.MindeeException;
44
import java.io.ByteArrayInputStream;
55
import java.io.IOException;
6-
import java.net.URL;
76
import org.apache.pdfbox.Loader;
87
import org.apache.pdfbox.io.RandomAccessReadBuffer;
98
import org.apache.pdfbox.pdmodel.PDDocument;
@@ -79,15 +78,6 @@ public static boolean isPdf(byte[] fileBytes) {
7978
return true;
8079
}
8180

82-
/**
83-
* Ensures the URL can be sent to the Mindee server.
84-
*/
85-
public static void validateUrl(URL inputUrl) {
86-
if (!"https".equalsIgnoreCase(inputUrl.getProtocol())) {
87-
throw new MindeeException("Only HTTPS source URLs are allowed");
88-
}
89-
}
90-
9181
/**
9282
* Returns true if the source PDF has source text inside. Returns false for images.
9383
*

src/main/java/com/mindee/input/URLInputSource.java

Lines changed: 41 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
package com.mindee.input;
22

3+
import com.mindee.MindeeException;
34
import java.io.File;
45
import java.io.FileOutputStream;
56
import java.io.IOException;
67
import java.io.InputStream;
78
import java.io.OutputStream;
89
import java.net.HttpURLConnection;
10+
import java.net.MalformedURLException;
911
import java.net.URL;
1012
import java.nio.file.Files;
1113
import java.nio.file.Path;
@@ -19,7 +21,7 @@
1921
*/
2022
public class URLInputSource {
2123
@Getter
22-
private final String url;
24+
private final URL url;
2325
private final String username;
2426
private final String password;
2527
@Getter
@@ -43,20 +45,21 @@ public class URLInputSource {
4345
* @param url URL to fetch the file from.
4446
* @return An instance of {@link URLInputSource}.
4547
*/
46-
public static Builder builder(String url) {
47-
return new Builder(url);
48+
public static Builder builder(String url) throws MalformedURLException {
49+
return new Builder(new URL(url));
4850
}
4951

50-
private HttpURLConnection prepareConnection() throws IOException {
51-
HttpURLConnection connection = createConnection(url);
52-
connection = handleRedirects(connection);
52+
public static Builder builder(URL url) {
53+
return new Builder(url);
54+
}
5355

54-
int responseCode = connection.getResponseCode();
55-
if (responseCode != HttpURLConnection.HTTP_OK) {
56-
throw new IOException("Failed to fetch file: " + responseCode);
56+
/**
57+
* Ensures the URL can be sent to the Mindee server.
58+
*/
59+
public void validateSecure() {
60+
if (!"https".equalsIgnoreCase(this.url.getProtocol())) {
61+
throw new MindeeException("Only HTTPS source URLs are allowed");
5762
}
58-
59-
return connection;
6063
}
6164

6265
/**
@@ -72,8 +75,20 @@ public void fetchFile() throws IOException {
7275
}
7376
}
7477

75-
protected HttpURLConnection createConnection(String urlString) throws IOException {
76-
HttpURLConnection connection = (HttpURLConnection) new URL(urlString).openConnection();
78+
private HttpURLConnection prepareConnection() throws IOException {
79+
HttpURLConnection connection = createConnection(url);
80+
connection = handleRedirects(connection);
81+
82+
int responseCode = connection.getResponseCode();
83+
if (responseCode != HttpURLConnection.HTTP_OK) {
84+
throw new IOException("Failed to fetch file: " + responseCode);
85+
}
86+
87+
return connection;
88+
}
89+
90+
protected HttpURLConnection createConnection(URL url) throws IOException {
91+
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
7792
connection.setInstanceFollowRedirects(true);
7893

7994
if (username != null && password != null) {
@@ -101,7 +116,7 @@ private HttpURLConnection handleRedirects(HttpURLConnection connection) throws I
101116
String newUrl = connection.getHeaderField("Location");
102117
connection.disconnect();
103118

104-
HttpURLConnection newConnection = createConnection(newUrl);
119+
HttpURLConnection newConnection = createConnection(new URL(newUrl));
105120
return handleRedirects(newConnection); // Recursive call to handle multiple redirects
106121
}
107122
return connection;
@@ -189,18 +204,27 @@ public void cleanup() {
189204
* Builder class for an URLInputSource.
190205
*/
191206
public static class Builder {
192-
private final String url;
207+
private final URL url;
193208
private String username;
194209
private String password;
195210
private String localFilename;
196211
private String token;
197212

198213
/**
199-
* Default constructor.
214+
* String constructor.
215+
*
216+
* @param url Remote URL resource.
217+
*/
218+
public Builder(String url) throws MalformedURLException {
219+
this.url = new URL(url);
220+
}
221+
222+
/**
223+
* URL constructor.
200224
*
201225
* @param url Remote URL resource.
202226
*/
203-
public Builder(String url) {
227+
public Builder(URL url) {
204228
this.url = url;
205229
}
206230

src/main/java/com/mindee/v1/MindeeClient.java

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
package com.mindee.v1;
22

33
import com.mindee.MindeeException;
4-
import com.mindee.input.InputSourceUtils;
54
import com.mindee.input.LocalInputSource;
65
import com.mindee.input.PageOptions;
6+
import com.mindee.input.URLInputSource;
77
import com.mindee.pdf.PDFBoxApi;
88
import com.mindee.pdf.PDFOperation;
99
import com.mindee.v1.clientOptions.PollingOptions;
@@ -203,7 +203,6 @@ public <T extends Inference> AsyncPredictResponse<T> enqueue(
203203
Class<T> type,
204204
URL sourceUrl
205205
) throws IOException {
206-
InputSourceUtils.validateUrl(sourceUrl);
207206
return this.enqueue(type, new Endpoint(type), null, null, null, sourceUrl);
208207
}
209208

@@ -222,7 +221,6 @@ public <T extends Inference> AsyncPredictResponse<T> enqueue(
222221
URL sourceUrl,
223222
PredictOptions predictOptions
224223
) throws IOException {
225-
InputSourceUtils.validateUrl(sourceUrl);
226224
return this.enqueue(type, new Endpoint(type), null, null, predictOptions, sourceUrl);
227225
}
228226

@@ -232,8 +230,12 @@ private <T extends Inference> AsyncPredictResponse<T> enqueue(
232230
byte[] file,
233231
String filename,
234232
PredictOptions predictOptions,
235-
URL urlInputSource
233+
URL url
236234
) throws IOException {
235+
URLInputSource urlInputSource = null;
236+
if (url != null) {
237+
urlInputSource = new URLInputSource.Builder(url).build();
238+
}
237239
RequestParameters params = RequestParameters
238240
.builder()
239241
.file(file)
@@ -402,7 +404,6 @@ public <T extends Inference> AsyncPredictResponse<T> enqueueAndParse(
402404
Class<T> type,
403405
URL sourceUrl
404406
) throws IOException, InterruptedException {
405-
InputSourceUtils.validateUrl(sourceUrl);
406407
return this.enqueueAndParse(type, new Endpoint(type), null, null, null, null, sourceUrl);
407408
}
408409

@@ -441,11 +442,12 @@ private <T extends Inference> AsyncPredictResponse<T> enqueueAndParse(
441442
byte[] file,
442443
String filename,
443444
PredictOptions predictOptions,
444-
URL urlInputSource
445+
URL url
445446
) throws IOException, InterruptedException {
446447
if (pollingOptions == null) {
447448
pollingOptions = PollingOptions.builder().build();
448449
}
450+
449451
this.validateAsyncParams(pollingOptions);
450452
final int initialDelaySec = (int) (pollingOptions.getInitialDelaySec() * 1000);
451453
final int intervalSec = (int) (pollingOptions.getIntervalSec() * 1000);
@@ -456,7 +458,7 @@ private <T extends Inference> AsyncPredictResponse<T> enqueueAndParse(
456458
file,
457459
filename,
458460
predictOptions,
459-
urlInputSource
461+
url
460462
);
461463

462464
String jobId = enqueueResponse.getJob().getId();
@@ -648,7 +650,6 @@ public <T extends Inference> PredictResponse<T> parse(
648650
Class<T> type,
649651
URL urlInputSource
650652
) throws IOException {
651-
InputSourceUtils.validateUrl(urlInputSource);
652653
return this.parse(type, new Endpoint(type), null, null, null, urlInputSource);
653654
}
654655

@@ -667,7 +668,6 @@ public <T extends Inference> PredictResponse<T> parse(
667668
URL urlInputSource,
668669
PredictOptions predictOptions
669670
) throws IOException {
670-
InputSourceUtils.validateUrl(urlInputSource);
671671
return this.parse(type, new Endpoint(type), null, null, predictOptions, urlInputSource);
672672
}
673673

@@ -677,8 +677,12 @@ private <T extends Inference> PredictResponse<T> parse(
677677
byte[] file,
678678
String filename,
679679
PredictOptions predictOptions,
680-
URL urlInputSource
680+
URL url
681681
) throws IOException {
682+
URLInputSource urlInputSource = null;
683+
if (url != null) {
684+
urlInputSource = new URLInputSource.Builder(url).build();
685+
}
682686
RequestParameters params = RequestParameters
683687
.builder()
684688
.file(file)
@@ -760,7 +764,6 @@ public <T extends GeneratedV1> AsyncPredictResponse<T> enqueue(
760764
Endpoint endpoint,
761765
URL sourceUrl
762766
) throws IOException {
763-
InputSourceUtils.validateUrl(sourceUrl);
764767
return this.enqueue(type, endpoint, null, null, null, sourceUrl);
765768
}
766769

@@ -781,7 +784,6 @@ public <T extends GeneratedV1> AsyncPredictResponse<T> enqueue(
781784
URL sourceUrl,
782785
PredictOptions predictOptions
783786
) throws IOException {
784-
InputSourceUtils.validateUrl(sourceUrl);
785787
return this.enqueue(type, endpoint, null, null, predictOptions, sourceUrl);
786788
}
787789

@@ -893,7 +895,6 @@ public <T extends GeneratedV1> AsyncPredictResponse<T> enqueueAndParse(
893895
Endpoint endpoint,
894896
URL sourceUrl
895897
) throws IOException, InterruptedException {
896-
InputSourceUtils.validateUrl(sourceUrl);
897898
return this.enqueueAndParse(type, endpoint, null, null, null, null, sourceUrl);
898899
}
899900

@@ -1024,7 +1025,6 @@ public <T extends GeneratedV1> PredictResponse<T> parse(
10241025
Endpoint endpoint,
10251026
URL documentUrl
10261027
) throws IOException {
1027-
InputSourceUtils.validateUrl(documentUrl);
10281028
return this.parse(type, endpoint, null, null, null, documentUrl);
10291029
}
10301030

@@ -1045,7 +1045,6 @@ public <T extends GeneratedV1> PredictResponse<T> parse(
10451045
URL documentUrl,
10461046
PredictOptions predictOptions
10471047
) throws IOException {
1048-
InputSourceUtils.validateUrl(documentUrl);
10491048
return this.parse(type, endpoint, null, null, predictOptions, documentUrl);
10501049
}
10511050

src/main/java/com/mindee/v1/http/RequestParameters.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package com.mindee.v1.http;
22

3+
import com.mindee.input.URLInputSource;
34
import com.mindee.v1.clientOptions.PredictOptions;
45
import com.mindee.v1.clientOptions.WorkflowOptions;
56
import java.net.URL;
@@ -20,7 +21,7 @@ public class RequestParameters {
2021

2122
@Builder
2223
private RequestParameters(
23-
URL urlInputSource,
24+
URLInputSource urlInputSource,
2425
byte[] file,
2526
PredictOptions predictOptions,
2627
WorkflowOptions workflowOptions,
@@ -39,7 +40,12 @@ private RequestParameters(
3940
} else {
4041
this.workflowOptions = workflowOptions;
4142
}
42-
this.fileUrl = urlInputSource;
43+
if (urlInputSource != null) {
44+
urlInputSource.validateSecure();
45+
this.fileUrl = urlInputSource.getUrl();
46+
} else {
47+
this.fileUrl = null;
48+
}
4349
this.file = file;
4450
this.fileName = fileName;
4551
}

src/main/java/com/mindee/v2/MindeeClient.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ public JobResponse enqueue(
5454
* @param params The parameters to send along with the file.
5555
*/
5656
public JobResponse enqueue(URLInputSource inputSource, BaseParameters params) throws IOException {
57+
inputSource.validateSecure();
5758
return mindeeApi.reqPostEnqueue(inputSource, params);
5859
}
5960

@@ -141,8 +142,12 @@ public <TResponse extends CommonResponse> TResponse enqueueAndGetResult(
141142
URLInputSource inputSource,
142143
BaseParameters params
143144
) throws IOException, InterruptedException {
144-
JobResponse job = enqueue(inputSource, params);
145-
return pollAndFetch(responseClass, job, PollingOptions.builder().build());
145+
return enqueueAndGetResult(
146+
responseClass,
147+
inputSource,
148+
params,
149+
PollingOptions.builder().build()
150+
);
146151
}
147152

148153
/**
@@ -162,6 +167,7 @@ public <TResponse extends CommonResponse> TResponse enqueueAndGetResult(
162167
BaseParameters params,
163168
PollingOptions pollingOptions
164169
) throws IOException, InterruptedException {
170+
inputSource.validateSecure();
165171
JobResponse job = enqueue(inputSource, params);
166172
return pollAndFetch(responseClass, job, pollingOptions);
167173
}

src/main/java/com/mindee/v2/http/MindeeHttpApiV2.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ public JobResponse reqPostEnqueue(URLInputSource inputSource, BaseParameters opt
101101

102102
var builder = MultipartEntityBuilder.create();
103103
builder.setMode(HttpMultipartMode.EXTENDED);
104-
builder.addTextBody("url", inputSource.getUrl());
104+
builder.addTextBody("url", inputSource.getUrl().toString());
105105
post.setEntity(options.buildHttpBody(builder).build());
106106
return executeEnqueue(post);
107107
}

0 commit comments

Comments
 (0)