Skip to content

Commit c8d257c

Browse files
committed
feat: Implement the clear and task APIs
Also enabled parallel builds for performance Fixes #188 Signed-off-by: Eric Deandrea <eric.deandrea@ibm.com>
1 parent 1b98230 commit c8d257c

File tree

50 files changed

+1332
-181
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+1332
-181
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
This is the repository for Docling Java, a Java API for using [Docling](https://github.com/docling-project).
44

55
[![Docs](https://img.shields.io/badge/docs-live-brightgreen)](https://docling-project.github.io/docling-java/)
6+
[![docling-core version](https://img.shields.io/maven-central/v/ai.docling/docling-core?label=docling-core
7+
)](https://docling-project.github.io/docling-java/dev/core)
68
[![docling-serve-api version](https://img.shields.io/maven-central/v/ai.docling/docling-serve-api?label=docling-serve-api
79
)](https://docling-project.github.io/docling-java/dev/docling-serve/serve-api/)
810
[![docling-serve-client version](https://img.shields.io/maven-central/v/ai.docling/docling-serve-client?label=docling-serve-client)](https://docling-project.github.io/docling-java/dev/docling-serve/serve-client/)

buildSrc/src/main/kotlin/docling-java-shared.gradle.kts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,13 @@ testing {
3636
}
3737
}
3838

39-
tasks.withType<Test> {
39+
tasks.withType<Test>().configureEach {
4040
// Use JUnit Platform for unit tests.
4141
useJUnitPlatform()
4242

43+
maxParallelForks = (Runtime.getRuntime().availableProcessors() / 2).coerceAtLeast(1)
44+
forkEvery = 100
45+
4346
testLogging {
4447
events("PASSED", "FAILED", "SKIPPED", "STANDARD_OUT", "STANDARD_ERROR")
4548
showStandardStreams = true
@@ -65,6 +68,10 @@ tasks.withType<Test> {
6568
}
6669
}
6770

71+
tasks.withType<JavaCompile>().configureEach {
72+
options.isFork = true
73+
}
74+
6875
tasks.withType<Javadoc> {
6976
isFailOnError = false
7077

docling-serve/docling-serve-api/build.gradle.kts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,6 @@ dependencies {
2020
}
2121

2222
tasks.withType<Javadoc> {
23+
source = sourceSets["main"].allJava
2324
exclude("**/lombok.config")
2425
}

docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeApi.java

Lines changed: 26 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,10 @@
11
package ai.docling.serve.api;
22

3-
import ai.docling.serve.api.chunk.request.HierarchicalChunkDocumentRequest;
4-
import ai.docling.serve.api.chunk.request.HybridChunkDocumentRequest;
5-
import ai.docling.serve.api.chunk.response.ChunkDocumentResponse;
6-
import ai.docling.serve.api.convert.request.ConvertDocumentRequest;
7-
import ai.docling.serve.api.convert.response.ConvertDocumentResponse;
8-
import ai.docling.serve.api.health.HealthCheckResponse;
9-
103
/**
114
* Docling Serve API interface.
125
*/
13-
public interface DoclingServeApi {
14-
15-
/**
16-
* Executes a health check for the API and retrieves the health status of the service.
17-
*
18-
* @return a {@link HealthCheckResponse} object containing the health status of the API.
19-
*/
20-
HealthCheckResponse health();
21-
22-
/**
23-
* Converts the provided document source(s) into a processed document based on the specified options.
24-
*
25-
* @param request the {@link ConvertDocumentRequest} containing the source(s), conversion options, and optional target.
26-
* @return a {@link ConvertDocumentResponse} containing the processed document data, processing details, and any errors.
27-
*/
28-
ConvertDocumentResponse convertSource(ConvertDocumentRequest request);
29-
30-
/**
31-
* Converts and chunks the provided document source(s) into a processed document based on the specified options
32-
* and using a hierarchical chunker for splitting the document into smaller chunks.
33-
*/
34-
ChunkDocumentResponse chunkSourceWithHierarchicalChunker(HierarchicalChunkDocumentRequest request);
35-
36-
/**
37-
* Converts and chunks the provided document source(s) into a processed document based on the specified options
38-
* and using a hybrid chunker for splitting the document into smaller chunks.
39-
*/
40-
ChunkDocumentResponse chunkSourceWithHybridChunker(HybridChunkDocumentRequest request);
41-
6+
public interface DoclingServeApi
7+
extends DoclingServeHealthApi, DoclingServeConvertApi, DoclingServeChunkApi, DoclingServeClearApi, DoclingServeTaskApi {
428
/**
439
* Creates and returns a builder instance capable of constructing a duplicate or modified
4410
* version of the current API instance. The builder provides a customizable way to adjust
@@ -98,6 +64,30 @@ default B logResponses() {
9864
*/
9965
B logResponses(boolean logResponses);
10066

67+
/**
68+
* Configures whether the API client should format JSON requests and responses in a "pretty" format.
69+
* Pretty formatting organizes the response data to improve readability,
70+
* typically by adding spacing and line breaks.
71+
*
72+
* This setting does not affect the functional content of the response but can
73+
* assist with debugging or human-readable output for development purposes.
74+
*
75+
* @param prettyPrint {@code true} to enable pretty-printing of JSON requests and responses;
76+
* {@code false} to use compact formatting.
77+
* @return {@code this} builder instance for fluent API usage.
78+
*/
79+
B prettyPrint(boolean prettyPrint);
80+
81+
/**
82+
* Configures the API client to format JSON requests and responses in a "pretty" format.
83+
* Pretty formatting improves readability by including spacing and line breaks.
84+
*
85+
* @return {@code this} builder instance for fluent API usage.
86+
*/
87+
default B prettyPrint() {
88+
return prettyPrint(true);
89+
}
90+
10191
/**
10292
* Builds and returns an instance of the specified type, representing the completed configuration
10393
* of the builder. The returned instance is typically an implementation of the Docling API.
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
package ai.docling.serve.api;
2+
3+
import ai.docling.serve.api.chunk.request.HierarchicalChunkDocumentRequest;
4+
import ai.docling.serve.api.chunk.request.HybridChunkDocumentRequest;
5+
import ai.docling.serve.api.chunk.response.ChunkDocumentResponse;
6+
7+
/**
8+
* Represents the Docling Serve Chunk API, providing methods for processing document sources
9+
* by splitting them into smaller chunks using various chunking strategies. This interface
10+
* ensures flexibility by supporting both hierarchical and hybrid chunking mechanisms.
11+
*/
12+
public interface DoclingServeChunkApi {
13+
/**
14+
* Converts and chunks the provided document source(s) into a processed document based on the specified options
15+
* and using a hierarchical chunker for splitting the document into smaller chunks.
16+
*/
17+
ChunkDocumentResponse chunkSourceWithHierarchicalChunker(HierarchicalChunkDocumentRequest request);
18+
19+
/**
20+
* Converts and chunks the provided document source(s) into a processed document based on the specified options
21+
* and using a hybrid chunker for splitting the document into smaller chunks.
22+
*/
23+
ChunkDocumentResponse chunkSourceWithHybridChunker(HybridChunkDocumentRequest request);
24+
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
package ai.docling.serve.api;
2+
3+
import ai.docling.serve.api.clear.request.ClearResultsRequest;
4+
import ai.docling.serve.api.clear.response.ClearResponse;
5+
6+
/**
7+
* Interface representing the Docling Serve Clear API. This API provides functionality
8+
* for managing and cleaning up converters and stale data retained by the service.
9+
* It includes methods for clearing registered converters and stored results based
10+
* on specified thresholds or default configurations.
11+
*/
12+
public interface DoclingServeClearApi {
13+
/**
14+
* Clears all registered converters associated with the API.
15+
* This method removes any previously configured or cached converters,
16+
* effectively resetting the converter state to an uninitialized state.
17+
* After invoking this method, no converters will be available until new ones are added or configured.
18+
*/
19+
ClearResponse clearConverters();
20+
21+
/**
22+
* Clears previously stored results based on the criteria provided in the request.
23+
* This method removes stale results or data that meet the threshold specified
24+
* in the {@link ClearResultsRequest}. It is typically used to clean up older
25+
* or unused data retained by the service.
26+
*
27+
* @param request the {@link ClearResultsRequest} containing the criteria for clearing
28+
* results, such as the threshold duration for identifying stale data.
29+
* @return a {@link ClearResponse} indicating the outcome of the clear operation,
30+
* including status or potential errors, if applicable.
31+
*/
32+
ClearResponse clearResults(ClearResultsRequest request);
33+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
package ai.docling.serve.api;
2+
3+
import ai.docling.serve.api.convert.request.ConvertDocumentRequest;
4+
import ai.docling.serve.api.convert.response.ConvertDocumentResponse;
5+
6+
/**
7+
* Interface representing the Docling Serve Convert API.
8+
* This API is responsible for processing and converting document source(s) into
9+
* a structured or processed document format based on the specified conversion options.
10+
*/
11+
public interface DoclingServeConvertApi {
12+
/**
13+
* Converts the provided document source(s) into a processed document based on the specified options.
14+
*
15+
* @param request the {@link ConvertDocumentRequest} containing the source(s), conversion options, and optional target.
16+
* @return a {@link ConvertDocumentResponse} containing the processed document data, processing details, and any errors.
17+
*/
18+
ConvertDocumentResponse convertSource(ConvertDocumentRequest request);
19+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
package ai.docling.serve.api;
2+
3+
import ai.docling.serve.api.health.HealthCheckResponse;
4+
5+
/**
6+
* Interface for performing health checks on the Docling service API.
7+
* This API is designed to verify and report the operational status of the service.
8+
*/
9+
public interface DoclingServeHealthApi {
10+
/**
11+
* Executes a health check for the API and retrieves the health status of the service.
12+
*
13+
* @return a {@link HealthCheckResponse} object containing the health status of the API.
14+
*/
15+
HealthCheckResponse health();
16+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
package ai.docling.serve.api;
2+
3+
import ai.docling.serve.api.chunk.response.ChunkDocumentResponse;
4+
import ai.docling.serve.api.convert.response.ConvertDocumentResponse;
5+
import ai.docling.serve.api.task.request.TaskResultRequest;
6+
import ai.docling.serve.api.task.request.TaskStatusPollRequest;
7+
import ai.docling.serve.api.task.response.TaskStatusPollResponse;
8+
9+
/**
10+
* Defines the interface for the Docling Task API, which provides operations for
11+
* managing and querying the status of asynchronous tasks.
12+
*
13+
* This interface supports task status polling with configurable wait durations
14+
* and a default polling mechanism. It serves as the base for specific implementations
15+
* such as {@link DoclingServeApi}.
16+
*/
17+
public interface DoclingServeTaskApi {
18+
/**
19+
* Polls the status of a task using the provided request object.
20+
* This method allows querying the current status of an asynchronous task
21+
* in progress or completed, based on its unique identifier and other
22+
* optional parameters such as wait time.
23+
*
24+
* @param request the {@link TaskStatusPollRequest} containing the details
25+
* for polling, including the task identifier and optional wait duration.
26+
* @return a {@link TaskStatusPollResponse} containing the task's current status,
27+
* including progress, position in the queue, and other metadata,
28+
* if available.
29+
*/
30+
TaskStatusPollResponse pollTaskStatus(TaskStatusPollRequest request);
31+
32+
/**
33+
* Converts the task result from a completed process into a document conversion response.
34+
* This method processes the provided task result request, retrieves conversion data, and
35+
* returns the resulting document conversion details.
36+
*
37+
* @param request the {@link TaskResultRequest} containing the task identifier for which
38+
* the result is being retrieved and converted.
39+
* @return a {@link ConvertDocumentResponse} containing the details of the converted document,
40+
* such as the document data, processing time, status, and any associated errors.
41+
*/
42+
ConvertDocumentResponse convertTaskResult(TaskResultRequest request);
43+
44+
/**
45+
* Processes the result of a completed task request by transforming the task data into
46+
* a chunked document response. This method retrieves conversion data specific to the
47+
* requested task and generates a response containing the chunked document details.
48+
*
49+
* @param request the {@link TaskResultRequest} containing the unique task identifier
50+
* for which the result is being processed into chunks.
51+
* @return a {@link ChunkDocumentResponse} containing the details of the chunked document,
52+
* including the generated chunks, associated documents, processing time, and any
53+
* relevant metadata.
54+
*/
55+
ChunkDocumentResponse chunkTaskResult(TaskResultRequest request);
56+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
@NullMarked
2+
package ai.docling.serve.api.clear;
3+
4+
import org.jspecify.annotations.NullMarked;

0 commit comments

Comments
 (0)