Skip to content

Commit 2dd85f2

Browse files
authored
feat: Implement the clear and task APIs (#192)
* feat: Implement the clear and task APIs Also enabled parallel builds for performance Fixes #188 Signed-off-by: Eric Deandrea <eric.deandrea@ibm.com> * feat: Implement the clear and task APIs Also enabled parallel builds for performance Fixes #188 Signed-off-by: Eric Deandrea <eric.deandrea@ibm.com> * feat: Implement the clear and task APIs Also enabled parallel builds for performance Fixes #188 Signed-off-by: Eric Deandrea <eric.deandrea@ibm.com> * feat: Implement the clear and task APIs Also enabled parallel builds for performance Fixes #188 Signed-off-by: Eric Deandrea <eric.deandrea@ibm.com> --------- Signed-off-by: Eric Deandrea <eric.deandrea@ibm.com>
1 parent 6bba58d commit 2dd85f2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+1236
-182
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
This is the repository for Docling Java, a Java API for using [Docling](https://github.com/docling-project).
44

55
[![Docs](https://img.shields.io/badge/docs-live-brightgreen)](https://docling-project.github.io/docling-java/)
6+
[![docling-core version](https://img.shields.io/maven-central/v/ai.docling/docling-core?label=docling-core
7+
)](https://docling-project.github.io/docling-java/dev/core)
68
[![docling-serve-api version](https://img.shields.io/maven-central/v/ai.docling/docling-serve-api?label=docling-serve-api
79
)](https://docling-project.github.io/docling-java/dev/docling-serve/serve-api/)
810
[![docling-serve-client version](https://img.shields.io/maven-central/v/ai.docling/docling-serve-client?label=docling-serve-client)](https://docling-project.github.io/docling-java/dev/docling-serve/serve-client/)

buildSrc/src/main/kotlin/docling-java-shared.gradle.kts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,13 @@ jacoco {
4141
toolVersion = libs.findVersion("jacoco").get().toString()
4242
}
4343

44-
tasks.withType<Test> {
44+
tasks.withType<Test>().configureEach {
4545
// Use JUnit Platform for unit tests.
4646
useJUnitPlatform()
4747

48+
maxParallelForks = (Runtime.getRuntime().availableProcessors() / 2).coerceAtLeast(1)
49+
forkEvery = 100
50+
4851
finalizedBy(tasks.named("jacocoTestReport"))
4952

5053
testLogging {
@@ -72,6 +75,10 @@ tasks.withType<Test> {
7275
}
7376
}
7477

78+
tasks.withType<JavaCompile>().configureEach {
79+
options.isFork = true
80+
}
81+
7582
tasks.withType<Javadoc> {
7683
isFailOnError = false
7784

docling-serve/docling-serve-api/build.gradle.kts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,6 @@ dependencies {
2020
}
2121

2222
tasks.withType<Javadoc> {
23+
source = sourceSets["main"].allJava
2324
exclude("**/lombok.config")
2425
}

docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeApi.java

Lines changed: 26 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,10 @@
11
package ai.docling.serve.api;
22

3-
import ai.docling.serve.api.chunk.request.HierarchicalChunkDocumentRequest;
4-
import ai.docling.serve.api.chunk.request.HybridChunkDocumentRequest;
5-
import ai.docling.serve.api.chunk.response.ChunkDocumentResponse;
6-
import ai.docling.serve.api.convert.request.ConvertDocumentRequest;
7-
import ai.docling.serve.api.convert.response.ConvertDocumentResponse;
8-
import ai.docling.serve.api.health.HealthCheckResponse;
9-
103
/**
114
* Docling Serve API interface.
125
*/
13-
public interface DoclingServeApi {
14-
15-
/**
16-
* Executes a health check for the API and retrieves the health status of the service.
17-
*
18-
* @return a {@link HealthCheckResponse} object containing the health status of the API.
19-
*/
20-
HealthCheckResponse health();
21-
22-
/**
23-
* Converts the provided document source(s) into a processed document based on the specified options.
24-
*
25-
* @param request the {@link ConvertDocumentRequest} containing the source(s), conversion options, and optional target.
26-
* @return a {@link ConvertDocumentResponse} containing the processed document data, processing details, and any errors.
27-
*/
28-
ConvertDocumentResponse convertSource(ConvertDocumentRequest request);
29-
30-
/**
31-
* Converts and chunks the provided document source(s) into a processed document based on the specified options
32-
* and using a hierarchical chunker for splitting the document into smaller chunks.
33-
*/
34-
ChunkDocumentResponse chunkSourceWithHierarchicalChunker(HierarchicalChunkDocumentRequest request);
35-
36-
/**
37-
* Converts and chunks the provided document source(s) into a processed document based on the specified options
38-
* and using a hybrid chunker for splitting the document into smaller chunks.
39-
*/
40-
ChunkDocumentResponse chunkSourceWithHybridChunker(HybridChunkDocumentRequest request);
41-
6+
public interface DoclingServeApi
7+
extends DoclingServeHealthApi, DoclingServeConvertApi, DoclingServeChunkApi, DoclingServeClearApi, DoclingServeTaskApi {
428
/**
439
* Creates and returns a builder instance capable of constructing a duplicate or modified
4410
* version of the current API instance. The builder provides a customizable way to adjust
@@ -98,6 +64,30 @@ default B logResponses() {
9864
*/
9965
B logResponses(boolean logResponses);
10066

67+
/**
68+
* Configures whether the API client should format JSON requests and responses in a "pretty" format.
69+
* Pretty formatting organizes the response data to improve readability,
70+
* typically by adding spacing and line breaks.
71+
*
72+
* This setting does not affect the functional content of the response but can
73+
* assist with debugging or human-readable output for development purposes.
74+
*
75+
* @param prettyPrint {@code true} to enable pretty-printing of JSON requests and responses;
76+
* {@code false} to use compact formatting.
77+
* @return {@code this} builder instance for fluent API usage.
78+
*/
79+
B prettyPrint(boolean prettyPrint);
80+
81+
/**
82+
* Configures the API client to format JSON requests and responses in a "pretty" format.
83+
* Pretty formatting improves readability by including spacing and line breaks.
84+
*
85+
* @return {@code this} builder instance for fluent API usage.
86+
*/
87+
default B prettyPrint() {
88+
return prettyPrint(true);
89+
}
90+
10191
/**
10292
* Builds and returns an instance of the specified type, representing the completed configuration
10393
* of the builder. The returned instance is typically an implementation of the Docling API.
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
package ai.docling.serve.api;
2+
3+
import ai.docling.serve.api.chunk.request.HierarchicalChunkDocumentRequest;
4+
import ai.docling.serve.api.chunk.request.HybridChunkDocumentRequest;
5+
import ai.docling.serve.api.chunk.response.ChunkDocumentResponse;
6+
7+
/**
8+
* Represents the Docling Serve Chunk API, providing methods for processing document sources
9+
* by splitting them into smaller chunks using various chunking strategies. This interface
10+
* ensures flexibility by supporting both hierarchical and hybrid chunking mechanisms.
11+
*/
12+
public interface DoclingServeChunkApi {
13+
/**
14+
* Converts and chunks the provided document source(s) into a processed document based on the specified options
15+
* and using a hierarchical chunker for splitting the document into smaller chunks.
16+
*/
17+
ChunkDocumentResponse chunkSourceWithHierarchicalChunker(HierarchicalChunkDocumentRequest request);
18+
19+
/**
20+
* Converts and chunks the provided document source(s) into a processed document based on the specified options
21+
* and using a hybrid chunker for splitting the document into smaller chunks.
22+
*/
23+
ChunkDocumentResponse chunkSourceWithHybridChunker(HybridChunkDocumentRequest request);
24+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
package ai.docling.serve.api;
2+
3+
import java.time.Duration;
4+
5+
import org.jspecify.annotations.Nullable;
6+
7+
import ai.docling.serve.api.clear.response.ClearResponse;
8+
9+
/**
10+
* Interface representing the Docling Serve Clear API. This API provides functionality
11+
* for managing and cleaning up converters and stale data retained by the service.
12+
* It includes methods for clearing registered converters and stored results based
13+
* on specified thresholds or default configurations.
14+
*/
15+
public interface DoclingServeClearApi {
16+
/**
17+
* Represents the default duration used as a threshold for clearing stale results
18+
* or data in the Docling Serve Clear API. Results older than this duration
19+
* are considered stale and may be subject to cleanup.
20+
*
21+
* The value is predefined as 1 hour (3600 seconds).
22+
*/
23+
Duration DEFAULT_OLDER_THAN = Duration.ofSeconds(3600);
24+
25+
/**
26+
* Clears all registered converters associated with the API.
27+
* This method removes any previously configured or cached converters,
28+
* effectively resetting the converter state to an uninitialized state.
29+
* After invoking this method, no converters will be available until new ones are added or configured.
30+
*/
31+
ClearResponse clearConverters();
32+
33+
/**
34+
* Clears stored results that are older than the specified duration threshold.
35+
* This method is used for housekeeping to remove stale or outdated data from the system.
36+
*
37+
* @param olderThen the duration threshold; only results older than this duration will be cleared.
38+
* @return a {@link ClearResponse} object containing the status of the clear operation.
39+
*/
40+
ClearResponse clearResults(@Nullable Duration olderThen);
41+
42+
/**
43+
* Clears stored results that are older than the default duration threshold.
44+
* This method uses the pre-defined {@code DEFAULT_OLDER_THAN} as the threshold
45+
* to determine which results are considered stale and should be removed.
46+
*
47+
* @return a {@link ClearResponse} object containing the status of the clear operation.
48+
*/
49+
default ClearResponse clearResults() {
50+
return clearResults(DEFAULT_OLDER_THAN);
51+
}
52+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
package ai.docling.serve.api;
2+
3+
import ai.docling.serve.api.convert.request.ConvertDocumentRequest;
4+
import ai.docling.serve.api.convert.response.ConvertDocumentResponse;
5+
6+
/**
7+
* Interface representing the Docling Serve Convert API.
8+
* This API is responsible for processing and converting document source(s) into
9+
* a structured or processed document format based on the specified conversion options.
10+
*/
11+
public interface DoclingServeConvertApi {
12+
/**
13+
* Converts the provided document source(s) into a processed document based on the specified options.
14+
*
15+
* @param request the {@link ConvertDocumentRequest} containing the source(s), conversion options, and optional target.
16+
* @return a {@link ConvertDocumentResponse} containing the processed document data, processing details, and any errors.
17+
*/
18+
ConvertDocumentResponse convertSource(ConvertDocumentRequest request);
19+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
package ai.docling.serve.api;
2+
3+
import ai.docling.serve.api.health.HealthCheckResponse;
4+
5+
/**
6+
* Interface for performing health checks on the Docling service API.
7+
* This API is designed to verify and report the operational status of the service.
8+
*/
9+
public interface DoclingServeHealthApi {
10+
/**
11+
* Executes a health check for the API and retrieves the health status of the service.
12+
*
13+
* @return a {@link HealthCheckResponse} object containing the health status of the API.
14+
*/
15+
HealthCheckResponse health();
16+
}
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
package ai.docling.serve.api;
2+
3+
import java.time.Duration;
4+
5+
import org.jspecify.annotations.Nullable;
6+
7+
import ai.docling.serve.api.chunk.response.ChunkDocumentResponse;
8+
import ai.docling.serve.api.convert.response.ConvertDocumentResponse;
9+
import ai.docling.serve.api.task.response.TaskStatusPollResponse;
10+
11+
/**
12+
* Defines the interface for the Docling Task API, which provides operations for
13+
* managing and querying the status of asynchronous tasks.
14+
*
15+
* This interface supports task status polling with configurable wait durations
16+
* and a default polling mechanism. It serves as the base for specific implementations
17+
* such as {@link DoclingServeApi}.
18+
*/
19+
public interface DoclingServeTaskApi {
20+
/**
21+
* The default wait time between status polling attempts for a task.
22+
* <p>
23+
* This value is used when no explicit wait time is specified in a
24+
* {@code TaskStatusPollRequest} instance. It is set to {@link Duration#ZERO},
25+
* meaning there is no delay by default between consecutive polling attempts.
26+
* </p>
27+
*/
28+
Duration DEFAULT_STATUS_POLL_WAIT_TIME = Duration.ZERO;
29+
30+
/**
31+
* Polls the status of a task asynchronously and retrieves its current state.
32+
* Allows for configurable wait time between polling attempts.
33+
* If the wait time is {@code }, the default wait time ({@link #DEFAULT_STATUS_POLL_WAIT_TIME}) is used.
34+
*
35+
* @param taskId the unique identifier of the task whose status is being polled
36+
* @param waitTime the duration to wait before polling the status, or null to use the default polling interval
37+
* @return a {@link TaskStatusPollResponse} containing the current status of the task and associated metadata
38+
*/
39+
TaskStatusPollResponse pollTaskStatus(String taskId, @Nullable Duration waitTime);
40+
41+
/**
42+
* Polls the status of a task asynchronously using the default wait time.
43+
* This convenience method delegates to {@link #pollTaskStatus(String, Duration)}
44+
* with {@code DEFAULT_STATUS_POLL_WAIT_TIME} as the wait time.
45+
*
46+
* @param taskId the unique identifier of the task whose status is being polled
47+
* @return a {@link TaskStatusPollResponse} containing the current status of the task
48+
* and associated metadata
49+
*/
50+
default TaskStatusPollResponse pollTaskStatus(String taskId) {
51+
return pollTaskStatus(taskId, DEFAULT_STATUS_POLL_WAIT_TIME);
52+
}
53+
54+
/**
55+
* Converts the completed task result identified by the provided task ID into a document response.
56+
* This method processes the task data associated with the given ID and generates a response
57+
* encapsulating the converted document details.
58+
*
59+
* @param taskId the unique identifier of the task whose result needs to be converted into a document response
60+
* @return a {@link ConvertDocumentResponse} containing the details of the converted document, processing metadata,
61+
* errors (if any), and other relevant information
62+
*/
63+
ConvertDocumentResponse convertTaskResult(String taskId);
64+
65+
/**
66+
* Processes the results of a completed task identified by the given task ID and generates a
67+
* response containing chunked document details. This method is used to break down the document
68+
* associated with the task into manageable chunks, making it suitable for subsequent processing
69+
* or analysis.
70+
*
71+
* @param taskId the unique identifier of the task whose result is to be processed and chunked into
72+
* a {@link ChunkDocumentResponse}
73+
* @return a {@link ChunkDocumentResponse} containing the chunked document details and related metadata
74+
*/
75+
ChunkDocumentResponse chunkTaskResult(String taskId);
76+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
@NullMarked
2+
package ai.docling.serve.api.clear;
3+
4+
import org.jspecify.annotations.NullMarked;

0 commit comments

Comments
 (0)