From f7859fbcf47f8a1b4f584b4a96211585bce9b25c Mon Sep 17 00:00:00 2001 From: Eric Deandrea Date: Tue, 9 Dec 2025 15:00:46 -0500 Subject: [PATCH 1/4] feat: Implement the clear and task APIs Also enabled parallel builds for performance Fixes #188 Signed-off-by: Eric Deandrea --- README.md | 2 + .../kotlin/docling-java-shared.gradle.kts | 9 +- .../docling-serve-api/build.gradle.kts | 1 + .../ai/docling/serve/api/DoclingServeApi.java | 62 +- .../serve/api/DoclingServeChunkApi.java | 24 + .../serve/api/DoclingServeClearApi.java | 33 ++ .../serve/api/DoclingServeConvertApi.java | 19 + .../serve/api/DoclingServeHealthApi.java | 16 + .../serve/api/DoclingServeTaskApi.java | 56 ++ .../docling/serve/api/clear/package-info.java | 4 + .../clear/request/ClearResultsRequest.java | 34 ++ .../serve/api/clear/request/package-info.java | 4 + .../api/clear/response/ClearResponse.java | 33 ++ .../api/clear/response/package-info.java | 4 + .../DurationSerializationFormat.java | 1 - .../serve/api/serialization/package-info.java | 7 + .../docling/serve/api/task/package-info.java | 7 + .../api/task/request/TaskResultRequest.java | 9 + .../task/request/TaskStatusPollRequest.java | 46 ++ .../serve/api/task/request/package-info.java | 7 + .../serve/api/task/response/TaskStatus.java | 21 + .../api/task/response/TaskStatusMetadata.java | 47 ++ .../task/response/TaskStatusPollResponse.java | 55 ++ .../serve/api/task/response/package-info.java | 7 + .../src/main/java/module-info.java | 8 + .../request/ClearResultsRequestTests.java | 22 + .../clear/response/ClearResponseTests.java | 36 ++ .../task/request/TaskResultRequestTests.java | 14 + .../request/TaskStatusPollRequestTests.java | 36 ++ .../docling-serve-client/build.gradle.kts | 2 + .../docling/serve/client/ChunkOperations.java | 34 ++ .../docling/serve/client/ClearOperations.java | 34 ++ .../serve/client/ConvertOperations.java | 27 + .../serve/client/DoclingServeClient.java | 89 ++- .../client/DoclingServeClientException.java | 35 ++ .../client/DoclingServeJackson2Client.java | 4 +- .../client/DoclingServeJackson3Client.java | 4 +- .../serve/client/HealthOperations.java | 25 + .../docling/serve/client/HttpOperations.java | 31 + .../docling/serve/client/TaskOperations.java | 51 ++ .../AbstractDoclingServeClientTests.java | 534 +++++++++++++----- .../DoclingServeJackson2ClientTests.java | 1 + .../DoclingServeJackson3ClientTests.java | 1 + .../src/main/resources/application.yml | 2 + docs/src/doc/docs/core.md | 2 + docs/src/doc/docs/docling-serve/serve-api.md | 2 + .../doc/docs/docling-serve/serve-client.md | 2 + docs/src/doc/docs/testcontainers.md | 2 + docs/src/doc/docs/whats-new.md | 8 +- gradle.properties | 3 + gradle/libs.versions.toml | 4 + 51 files changed, 1339 insertions(+), 182 deletions(-) create mode 100644 docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeChunkApi.java create mode 100644 docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeClearApi.java create mode 100644 docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeConvertApi.java create mode 100644 docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeHealthApi.java create mode 100644 docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeTaskApi.java create mode 100644 docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/package-info.java create mode 100644 docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/request/ClearResultsRequest.java create mode 100644 docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/request/package-info.java create mode 100644 docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/response/ClearResponse.java create mode 100644 docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/response/package-info.java create mode 100644 docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/serialization/package-info.java create mode 100644 docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/package-info.java create mode 100644 docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/request/TaskResultRequest.java create mode 100644 docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/request/TaskStatusPollRequest.java create mode 100644 docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/request/package-info.java create mode 100644 docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/TaskStatus.java create mode 100644 docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/TaskStatusMetadata.java create mode 100644 docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/TaskStatusPollResponse.java create mode 100644 docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/package-info.java create mode 100644 docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/clear/request/ClearResultsRequestTests.java create mode 100644 docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/clear/response/ClearResponseTests.java create mode 100644 docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/task/request/TaskResultRequestTests.java create mode 100644 docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/task/request/TaskStatusPollRequestTests.java create mode 100644 docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ChunkOperations.java create mode 100644 docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ClearOperations.java create mode 100644 docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ConvertOperations.java create mode 100644 docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeClientException.java create mode 100644 docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/HealthOperations.java create mode 100644 docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/HttpOperations.java create mode 100644 docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/TaskOperations.java diff --git a/README.md b/README.md index bcf861a..2e4e607 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,8 @@ This is the repository for Docling Java, a Java API for using [Docling](https://github.com/docling-project). [![Docs](https://img.shields.io/badge/docs-live-brightgreen)](https://docling-project.github.io/docling-java/) +[![docling-core version](https://img.shields.io/maven-central/v/ai.docling/docling-core?label=docling-core +)](https://docling-project.github.io/docling-java/dev/core) [![docling-serve-api version](https://img.shields.io/maven-central/v/ai.docling/docling-serve-api?label=docling-serve-api )](https://docling-project.github.io/docling-java/dev/docling-serve/serve-api/) [![docling-serve-client version](https://img.shields.io/maven-central/v/ai.docling/docling-serve-client?label=docling-serve-client)](https://docling-project.github.io/docling-java/dev/docling-serve/serve-client/) diff --git a/buildSrc/src/main/kotlin/docling-java-shared.gradle.kts b/buildSrc/src/main/kotlin/docling-java-shared.gradle.kts index 74d1049..d660805 100644 --- a/buildSrc/src/main/kotlin/docling-java-shared.gradle.kts +++ b/buildSrc/src/main/kotlin/docling-java-shared.gradle.kts @@ -36,10 +36,13 @@ testing { } } -tasks.withType { +tasks.withType().configureEach { // Use JUnit Platform for unit tests. useJUnitPlatform() + maxParallelForks = (Runtime.getRuntime().availableProcessors() / 2).coerceAtLeast(1) + forkEvery = 100 + testLogging { events("PASSED", "FAILED", "SKIPPED", "STANDARD_OUT", "STANDARD_ERROR") showStandardStreams = true @@ -65,6 +68,10 @@ tasks.withType { } } +tasks.withType().configureEach { + options.isFork = true +} + tasks.withType { isFailOnError = false diff --git a/docling-serve/docling-serve-api/build.gradle.kts b/docling-serve/docling-serve-api/build.gradle.kts index 8645420..1612ae7 100644 --- a/docling-serve/docling-serve-api/build.gradle.kts +++ b/docling-serve/docling-serve-api/build.gradle.kts @@ -20,5 +20,6 @@ dependencies { } tasks.withType { + source = sourceSets["main"].allJava exclude("**/lombok.config") } diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeApi.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeApi.java index 7fc57fa..67d76fd 100644 --- a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeApi.java +++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeApi.java @@ -1,44 +1,10 @@ package ai.docling.serve.api; -import ai.docling.serve.api.chunk.request.HierarchicalChunkDocumentRequest; -import ai.docling.serve.api.chunk.request.HybridChunkDocumentRequest; -import ai.docling.serve.api.chunk.response.ChunkDocumentResponse; -import ai.docling.serve.api.convert.request.ConvertDocumentRequest; -import ai.docling.serve.api.convert.response.ConvertDocumentResponse; -import ai.docling.serve.api.health.HealthCheckResponse; - /** * Docling Serve API interface. */ -public interface DoclingServeApi { - - /** - * Executes a health check for the API and retrieves the health status of the service. - * - * @return a {@link HealthCheckResponse} object containing the health status of the API. - */ - HealthCheckResponse health(); - - /** - * Converts the provided document source(s) into a processed document based on the specified options. - * - * @param request the {@link ConvertDocumentRequest} containing the source(s), conversion options, and optional target. - * @return a {@link ConvertDocumentResponse} containing the processed document data, processing details, and any errors. - */ - ConvertDocumentResponse convertSource(ConvertDocumentRequest request); - - /** - * Converts and chunks the provided document source(s) into a processed document based on the specified options - * and using a hierarchical chunker for splitting the document into smaller chunks. - */ - ChunkDocumentResponse chunkSourceWithHierarchicalChunker(HierarchicalChunkDocumentRequest request); - - /** - * Converts and chunks the provided document source(s) into a processed document based on the specified options - * and using a hybrid chunker for splitting the document into smaller chunks. - */ - ChunkDocumentResponse chunkSourceWithHybridChunker(HybridChunkDocumentRequest request); - +public interface DoclingServeApi + extends DoclingServeHealthApi, DoclingServeConvertApi, DoclingServeChunkApi, DoclingServeClearApi, DoclingServeTaskApi { /** * Creates and returns a builder instance capable of constructing a duplicate or modified * version of the current API instance. The builder provides a customizable way to adjust @@ -98,6 +64,30 @@ default B logResponses() { */ B logResponses(boolean logResponses); + /** + * Configures whether the API client should format JSON requests and responses in a "pretty" format. + * Pretty formatting organizes the response data to improve readability, + * typically by adding spacing and line breaks. + * + * This setting does not affect the functional content of the response but can + * assist with debugging or human-readable output for development purposes. + * + * @param prettyPrint {@code true} to enable pretty-printing of JSON requests and responses; + * {@code false} to use compact formatting. + * @return {@code this} builder instance for fluent API usage. + */ + B prettyPrint(boolean prettyPrint); + + /** + * Configures the API client to format JSON requests and responses in a "pretty" format. + * Pretty formatting improves readability by including spacing and line breaks. + * + * @return {@code this} builder instance for fluent API usage. + */ + default B prettyPrint() { + return prettyPrint(true); + } + /** * Builds and returns an instance of the specified type, representing the completed configuration * of the builder. The returned instance is typically an implementation of the Docling API. diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeChunkApi.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeChunkApi.java new file mode 100644 index 0000000..f8b4af3 --- /dev/null +++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeChunkApi.java @@ -0,0 +1,24 @@ +package ai.docling.serve.api; + +import ai.docling.serve.api.chunk.request.HierarchicalChunkDocumentRequest; +import ai.docling.serve.api.chunk.request.HybridChunkDocumentRequest; +import ai.docling.serve.api.chunk.response.ChunkDocumentResponse; + +/** + * Represents the Docling Serve Chunk API, providing methods for processing document sources + * by splitting them into smaller chunks using various chunking strategies. This interface + * ensures flexibility by supporting both hierarchical and hybrid chunking mechanisms. + */ +public interface DoclingServeChunkApi { + /** + * Converts and chunks the provided document source(s) into a processed document based on the specified options + * and using a hierarchical chunker for splitting the document into smaller chunks. + */ + ChunkDocumentResponse chunkSourceWithHierarchicalChunker(HierarchicalChunkDocumentRequest request); + + /** + * Converts and chunks the provided document source(s) into a processed document based on the specified options + * and using a hybrid chunker for splitting the document into smaller chunks. + */ + ChunkDocumentResponse chunkSourceWithHybridChunker(HybridChunkDocumentRequest request); +} diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeClearApi.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeClearApi.java new file mode 100644 index 0000000..36fb3e3 --- /dev/null +++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeClearApi.java @@ -0,0 +1,33 @@ +package ai.docling.serve.api; + +import ai.docling.serve.api.clear.request.ClearResultsRequest; +import ai.docling.serve.api.clear.response.ClearResponse; + +/** + * Interface representing the Docling Serve Clear API. This API provides functionality + * for managing and cleaning up converters and stale data retained by the service. + * It includes methods for clearing registered converters and stored results based + * on specified thresholds or default configurations. + */ +public interface DoclingServeClearApi { + /** + * Clears all registered converters associated with the API. + * This method removes any previously configured or cached converters, + * effectively resetting the converter state to an uninitialized state. + * After invoking this method, no converters will be available until new ones are added or configured. + */ + ClearResponse clearConverters(); + + /** + * Clears previously stored results based on the criteria provided in the request. + * This method removes stale results or data that meet the threshold specified + * in the {@link ClearResultsRequest}. It is typically used to clean up older + * or unused data retained by the service. + * + * @param request the {@link ClearResultsRequest} containing the criteria for clearing + * results, such as the threshold duration for identifying stale data. + * @return a {@link ClearResponse} indicating the outcome of the clear operation, + * including status or potential errors, if applicable. + */ + ClearResponse clearResults(ClearResultsRequest request); +} diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeConvertApi.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeConvertApi.java new file mode 100644 index 0000000..6c28631 --- /dev/null +++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeConvertApi.java @@ -0,0 +1,19 @@ +package ai.docling.serve.api; + +import ai.docling.serve.api.convert.request.ConvertDocumentRequest; +import ai.docling.serve.api.convert.response.ConvertDocumentResponse; + +/** + * Interface representing the Docling Serve Convert API. + * This API is responsible for processing and converting document source(s) into + * a structured or processed document format based on the specified conversion options. + */ +public interface DoclingServeConvertApi { + /** + * Converts the provided document source(s) into a processed document based on the specified options. + * + * @param request the {@link ConvertDocumentRequest} containing the source(s), conversion options, and optional target. + * @return a {@link ConvertDocumentResponse} containing the processed document data, processing details, and any errors. + */ + ConvertDocumentResponse convertSource(ConvertDocumentRequest request); +} diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeHealthApi.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeHealthApi.java new file mode 100644 index 0000000..e90ff96 --- /dev/null +++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeHealthApi.java @@ -0,0 +1,16 @@ +package ai.docling.serve.api; + +import ai.docling.serve.api.health.HealthCheckResponse; + +/** + * Interface for performing health checks on the Docling service API. + * This API is designed to verify and report the operational status of the service. + */ +public interface DoclingServeHealthApi { + /** + * Executes a health check for the API and retrieves the health status of the service. + * + * @return a {@link HealthCheckResponse} object containing the health status of the API. + */ + HealthCheckResponse health(); +} diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeTaskApi.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeTaskApi.java new file mode 100644 index 0000000..b410995 --- /dev/null +++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeTaskApi.java @@ -0,0 +1,56 @@ +package ai.docling.serve.api; + +import ai.docling.serve.api.chunk.response.ChunkDocumentResponse; +import ai.docling.serve.api.convert.response.ConvertDocumentResponse; +import ai.docling.serve.api.task.request.TaskResultRequest; +import ai.docling.serve.api.task.request.TaskStatusPollRequest; +import ai.docling.serve.api.task.response.TaskStatusPollResponse; + +/** + * Defines the interface for the Docling Task API, which provides operations for + * managing and querying the status of asynchronous tasks. + * + * This interface supports task status polling with configurable wait durations + * and a default polling mechanism. It serves as the base for specific implementations + * such as {@link DoclingServeApi}. + */ +public interface DoclingServeTaskApi { + /** + * Polls the status of a task using the provided request object. + * This method allows querying the current status of an asynchronous task + * in progress or completed, based on its unique identifier and other + * optional parameters such as wait time. + * + * @param request the {@link TaskStatusPollRequest} containing the details + * for polling, including the task identifier and optional wait duration. + * @return a {@link TaskStatusPollResponse} containing the task's current status, + * including progress, position in the queue, and other metadata, + * if available. + */ + TaskStatusPollResponse pollTaskStatus(TaskStatusPollRequest request); + + /** + * Converts the task result from a completed process into a document conversion response. + * This method processes the provided task result request, retrieves conversion data, and + * returns the resulting document conversion details. + * + * @param request the {@link TaskResultRequest} containing the task identifier for which + * the result is being retrieved and converted. + * @return a {@link ConvertDocumentResponse} containing the details of the converted document, + * such as the document data, processing time, status, and any associated errors. + */ + ConvertDocumentResponse convertTaskResult(TaskResultRequest request); + + /** + * Processes the result of a completed task request by transforming the task data into + * a chunked document response. This method retrieves conversion data specific to the + * requested task and generates a response containing the chunked document details. + * + * @param request the {@link TaskResultRequest} containing the unique task identifier + * for which the result is being processed into chunks. + * @return a {@link ChunkDocumentResponse} containing the details of the chunked document, + * including the generated chunks, associated documents, processing time, and any + * relevant metadata. + */ + ChunkDocumentResponse chunkTaskResult(TaskResultRequest request); +} diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/package-info.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/package-info.java new file mode 100644 index 0000000..11a1edd --- /dev/null +++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/package-info.java @@ -0,0 +1,4 @@ +@NullMarked +package ai.docling.serve.api.clear; + +import org.jspecify.annotations.NullMarked; diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/request/ClearResultsRequest.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/request/ClearResultsRequest.java new file mode 100644 index 0000000..def34e5 --- /dev/null +++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/request/ClearResultsRequest.java @@ -0,0 +1,34 @@ +package ai.docling.serve.api.clear.request; + +import java.time.Duration; + +/** + * Represents a request to clear stale results or data in the Docling Serve Clear API. + * This class stores the parameters required for defining the threshold duration + * beyond which results are considered stale and subject to cleanup. + * + * The main parameter, {@code olderThan}, indicates the threshold duration, + * while a default duration of 1 hour (3600 seconds) is provided through {@code DEFAULT_OLDER_THAN}. + * + * This class is designed to be immutable and uses the Builder pattern + * to facilitate the creation of instances with customized configurations. + * + * Annotations are used to support JSON serialization and the deserialization process. + */ +@lombok.Builder(toBuilder = true) +@lombok.Getter +@lombok.ToString +public class ClearResultsRequest { + /** + * Represents the default duration used as a threshold for clearing stale results + * or data in the Docling Serve Clear API. Results older than this duration + * are considered stale and may be subject to cleanup. + * + * The value is predefined as 1 hour (3600 seconds). + */ + public static final Duration DEFAULT_OLDER_THAN = Duration.ofSeconds(3600); + + @lombok.Builder.Default + @lombok.NonNull + private Duration olderThan = DEFAULT_OLDER_THAN; +} diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/request/package-info.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/request/package-info.java new file mode 100644 index 0000000..66ba63c --- /dev/null +++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/request/package-info.java @@ -0,0 +1,4 @@ +@NullMarked +package ai.docling.serve.api.clear.request; + +import org.jspecify.annotations.NullMarked; diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/response/ClearResponse.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/response/ClearResponse.java new file mode 100644 index 0000000..8d8711c --- /dev/null +++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/response/ClearResponse.java @@ -0,0 +1,33 @@ +package ai.docling.serve.api.clear.response; + +import org.jspecify.annotations.Nullable; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; + +/** + * Represents the response returned when performing a "clear" operation + * within the Docling API. This response typically communicates the status + * of the clear operation, indicating success, failure, or other relevant + * information. + * + * The response includes a status field, which may provide additional details + * about the operation's outcome. The status field is optional and may be + * null or empty if no specific status message is provided. + * + * This class is immutable and is constructed using the {@link Builder}. + */ +@JsonInclude(JsonInclude.Include.NON_EMPTY) +@tools.jackson.databind.annotation.JsonDeserialize(builder = ClearResponse.Builder.class) +@lombok.extern.jackson.Jacksonized +@lombok.Builder(toBuilder = true) +@lombok.Getter +@lombok.ToString +public class ClearResponse { + @JsonProperty("status") + @Nullable + private String status; + + @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "") + public static class Builder { } +} diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/response/package-info.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/response/package-info.java new file mode 100644 index 0000000..a9cd244 --- /dev/null +++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/response/package-info.java @@ -0,0 +1,4 @@ +@NullMarked +package ai.docling.serve.api.clear.response; + +import org.jspecify.annotations.NullMarked; diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/serialization/DurationSerializationFormat.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/serialization/DurationSerializationFormat.java index 0c86ca0..a65c02f 100644 --- a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/serialization/DurationSerializationFormat.java +++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/serialization/DurationSerializationFormat.java @@ -23,7 +23,6 @@ * private Duration timeout; * } * - *

* @see Jackson2DurationSerializer * @see Jackson3DurationSerializer */ diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/serialization/package-info.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/serialization/package-info.java new file mode 100644 index 0000000..4518de5 --- /dev/null +++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/serialization/package-info.java @@ -0,0 +1,7 @@ +/** + * Serialization helpers + */ +@NullMarked +package ai.docling.serve.api.serialization; + +import org.jspecify.annotations.NullMarked; diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/package-info.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/package-info.java new file mode 100644 index 0000000..55746da --- /dev/null +++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/package-info.java @@ -0,0 +1,7 @@ +/** + * The Docling task api + */ +@NullMarked +package ai.docling.serve.api.task; + +import org.jspecify.annotations.NullMarked; diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/request/TaskResultRequest.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/request/TaskResultRequest.java new file mode 100644 index 0000000..3b62f12 --- /dev/null +++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/request/TaskResultRequest.java @@ -0,0 +1,9 @@ +package ai.docling.serve.api.task.request; + +@lombok.Builder(toBuilder = true) +@lombok.Getter +@lombok.ToString +public class TaskResultRequest { + @lombok.NonNull + private String taskId; +} diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/request/TaskStatusPollRequest.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/request/TaskStatusPollRequest.java new file mode 100644 index 0000000..54bef7c --- /dev/null +++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/request/TaskStatusPollRequest.java @@ -0,0 +1,46 @@ +package ai.docling.serve.api.task.request; + +import java.time.Duration; + +/** + * Represents a request to poll the status of a task within the Docling Task API. + * This class encapsulates the information needed to query the status of an + * asynchronous task, including task identification and poll wait time. + * + * The {@code TaskStatusPollRequest} supports serialization and deserialization + * using Jackson, ensuring compatibility with JSON-based APIs. It also provides + * a builder for constructing instances. + * + * Key attributes include: + * - {@code taskId}: A unique identifier for the task whose status is to be polled. + * - {@code waitTime}: An optional duration indicating the wait interval between + * poll attempts. Defaults to {@link #DEFAULT_STATUS_POLL_WAIT_TIME} if not specified. + * + * This class is designed to be immutable, with all properties being final and + * accessible through getter methods. It supports features such as pretty-printing + * via {@code toString} and JSON-specific annotations for seamless integration + * with APIs consuming or producing JSON. + * + * Thread Safety: The class is thread-safe as it is immutable. + */ +@lombok.Builder(toBuilder = true) +@lombok.Getter +@lombok.ToString +public class TaskStatusPollRequest { + /** + * The default wait time between status polling attempts for a task. + *

+ * This value is used when no explicit wait time is specified in a + * {@code TaskStatusPollRequest} instance. It is set to {@link Duration#ZERO}, + * meaning there is no delay by default between consecutive polling attempts. + *

+ */ + public static final Duration DEFAULT_STATUS_POLL_WAIT_TIME = Duration.ZERO; + + @lombok.NonNull + private String taskId; + + @lombok.Builder.Default + @lombok.NonNull + private Duration waitTime = DEFAULT_STATUS_POLL_WAIT_TIME; +} diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/request/package-info.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/request/package-info.java new file mode 100644 index 0000000..701fb4c --- /dev/null +++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/request/package-info.java @@ -0,0 +1,7 @@ +/** + * The Docling task api + */ +@NullMarked +package ai.docling.serve.api.task.request; + +import org.jspecify.annotations.NullMarked; diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/TaskStatus.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/TaskStatus.java new file mode 100644 index 0000000..b9da683 --- /dev/null +++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/TaskStatus.java @@ -0,0 +1,21 @@ +package ai.docling.serve.api.task.response; + +import com.fasterxml.jackson.annotation.JsonProperty; + +/** + * Represents the possible statuses of a Task within the system. + * Each status is mapped to a specific JSON property for serialization and deserialization. + * + *
    + *
  • {@code PENDING}: Indicates that the task has been created but has not yet started execution.
  • + *
  • {@code STARTED}: Indicates that the task is currently in progress.
  • + *
  • {@code SUCCESS}: Indicates that the task has completed successfully.
  • + *
  • {@code FAILURE}: Indicates that the task has failed to complete successfully.
  • + *
+ */ +public enum TaskStatus { + @JsonProperty("pending") PENDING, + @JsonProperty("started") STARTED, + @JsonProperty("success") SUCCESS, + @JsonProperty("failure") FAILURE; +} diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/TaskStatusMetadata.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/TaskStatusMetadata.java new file mode 100644 index 0000000..014ae4e --- /dev/null +++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/TaskStatusMetadata.java @@ -0,0 +1,47 @@ +package ai.docling.serve.api.task.response; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; + +/** + * Represents metadata for the status of a task, providing detailed information + * about the task's progress and outcomes. + * + * This class is used to encapsulate the statistical data related to the execution + * of a task, such as the number of documents processed, succeeded, or failed. It + * is intended for use in tracking task processing and is serialized to JSON for + * communication between system components. + * + * Features: + * - Serialization and deserialization using Jackson annotations. + * - All fields are included in the JSON only if they are non-empty. + * - Immutable structure with a builder pattern for object creation. + * + * Fields: + * - {@code numDocs}: The total number of documents associated with the task. + * - {@code numProcessed}: The number of documents that have been processed. + * - {@code numSucceeded}: The number of documents that were successfully processed. + * - {@code numFailed}: The number of documents that failed processing. + */ +@JsonInclude(JsonInclude.Include.NON_EMPTY) +@tools.jackson.databind.annotation.JsonDeserialize(builder = TaskStatusMetadata.Builder.class) +@lombok.extern.jackson.Jacksonized +@lombok.Builder(toBuilder = true) +@lombok.Getter +@lombok.ToString +public class TaskStatusMetadata { + @JsonProperty("num_docs") + private Long numDocs; + + @JsonProperty("num_processed") + private Long numProcessed; + + @JsonProperty("num_succeeded") + private Long numSucceeded; + + @JsonProperty("num_failed") + private Long numFailed; + + @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "") + public static class Builder { } +} diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/TaskStatusPollResponse.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/TaskStatusPollResponse.java new file mode 100644 index 0000000..ab853ba --- /dev/null +++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/TaskStatusPollResponse.java @@ -0,0 +1,55 @@ +package ai.docling.serve.api.task.response; + +import org.jspecify.annotations.Nullable; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; + +/** + * Represents the response for polling the status of a task. + * + * This class encapsulates details about a specific task's current status, + * including its identifier, type, position in a queue, and metadata regarding + * its progress and outcomes. The object is serialized and deserialized using + * Jackson annotations, ensuring compatibility with JSON-based APIs. + * + * Features: + * - Uses a builder pattern with {@code lombok.Builder} for creating immutable objects. + * - Serialized to JSON only if fields are non-empty. + * - Metadata field for detailed task status is optional. + * + * Fields: + * - {@code taskId}: The unique identifier for the task. + * - {@code taskType}: The type of task being processed (optional). + * - {@code taskStatus}: The current status of the task, as defined in the {@code TaskStatus} enum. + * - {@code taskPosition}: A numerical representation of the task's position in the processing queue. + * - {@code taskStatusMetadata}: Metadata providing detailed statistics and progress information (optional). + */ +@JsonInclude(JsonInclude.Include.NON_EMPTY) +@tools.jackson.databind.annotation.JsonDeserialize(builder = TaskStatusPollResponse.Builder.class) +@lombok.extern.jackson.Jacksonized +@lombok.Builder(toBuilder = true) +@lombok.Getter +@lombok.ToString +public class TaskStatusPollResponse { + @JsonProperty("task_id") + private String taskId; + + @JsonProperty("task_type") + @Nullable + private String taskType; + + @JsonProperty("task_status") + private TaskStatus taskStatus; + + @JsonProperty("task_position") + @Nullable + private Long taskPosition; + + @JsonProperty("task_meta") + @Nullable + private TaskStatusMetadata taskStatusMetadata; + + @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "") + public static class Builder { } +} diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/package-info.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/package-info.java new file mode 100644 index 0000000..96512c7 --- /dev/null +++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/package-info.java @@ -0,0 +1,7 @@ +/** + * The Docling task api + */ +@NullMarked +package ai.docling.serve.api.task.response; + +import org.jspecify.annotations.NullMarked; diff --git a/docling-serve/docling-serve-api/src/main/java/module-info.java b/docling-serve/docling-serve-api/src/main/java/module-info.java index 38a784a..eb58960 100644 --- a/docling-serve/docling-serve-api/src/main/java/module-info.java +++ b/docling-serve/docling-serve-api/src/main/java/module-info.java @@ -26,6 +26,14 @@ exports ai.docling.serve.api.convert.request.target; exports ai.docling.serve.api.convert.response; + // Clear API + exports ai.docling.serve.api.clear.request; + exports ai.docling.serve.api.clear.response; + + // Task API + exports ai.docling.serve.api.task.request; + exports ai.docling.serve.api.task.response; + // Serialization helpers exports ai.docling.serve.api.serialization; } diff --git a/docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/clear/request/ClearResultsRequestTests.java b/docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/clear/request/ClearResultsRequestTests.java new file mode 100644 index 0000000..d2acd27 --- /dev/null +++ b/docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/clear/request/ClearResultsRequestTests.java @@ -0,0 +1,22 @@ +package ai.docling.serve.api.clear.request; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import org.junit.jupiter.api.Test; + +class ClearResultsRequestTests { + @Test + void whenOlderThanIsNullThenThrow() { + assertThatThrownBy(() -> ClearResultsRequest.builder().olderThan(null).build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("olderThan is marked non-null but is null"); + } + + @Test + void defaultOlderThan() { + assertThat(ClearResultsRequest.builder().build().getOlderThan()) + .isNotNull() + .isEqualByComparingTo(ClearResultsRequest.DEFAULT_OLDER_THAN); + } +} diff --git a/docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/clear/response/ClearResponseTests.java b/docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/clear/response/ClearResponseTests.java new file mode 100644 index 0000000..1901bd2 --- /dev/null +++ b/docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/clear/response/ClearResponseTests.java @@ -0,0 +1,36 @@ +package ai.docling.serve.api.clear.response; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.junit.jupiter.api.Test; + +class ClearResponseTests { + @Test + void whenValidParametersThenCreateClearResponse() { + String status = "healthy"; + + ClearResponse response = ClearResponse.builder() + .status(status) + .build(); + + assertThat(response.getStatus()).isEqualTo(status); + } + + @Test + void whenStatusIsNullThenCreateClearResponse() { + ClearResponse response = ClearResponse.builder().build(); + + assertThat(response.getStatus()).isNull(); + } + + @Test + void whenEmptyStatusThenCreateClearResponse() { + String status = ""; + + ClearResponse response = ClearResponse.builder() + .status(status) + .build(); + + assertThat(response.getStatus()).isEqualTo(status); + } +} diff --git a/docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/task/request/TaskResultRequestTests.java b/docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/task/request/TaskResultRequestTests.java new file mode 100644 index 0000000..92bfb58 --- /dev/null +++ b/docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/task/request/TaskResultRequestTests.java @@ -0,0 +1,14 @@ +package ai.docling.serve.api.task.request; + +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import org.junit.jupiter.api.Test; + +class TaskResultRequestTests { + @Test + void whenTaskIdIsNullThenThrow() { + assertThatThrownBy(() -> TaskResultRequest.builder().build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("taskId is marked non-null but is null"); + } +} diff --git a/docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/task/request/TaskStatusPollRequestTests.java b/docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/task/request/TaskStatusPollRequestTests.java new file mode 100644 index 0000000..f4f75aa --- /dev/null +++ b/docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/task/request/TaskStatusPollRequestTests.java @@ -0,0 +1,36 @@ +package ai.docling.serve.api.task.request; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import java.time.Duration; + +import org.junit.jupiter.api.Test; + +class TaskStatusPollRequestTests { + @Test + void whenTaskIdIsNullThenThrow() { + assertThatThrownBy(() -> TaskStatusPollRequest.builder().waitTime(Duration.ofMinutes(1)).build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("taskId is marked non-null but is null"); + } + + @Test + void whenWaitTimeIsNullThenThrow() { + assertThatThrownBy(() -> TaskStatusPollRequest.builder().taskId("1234").waitTime(null).build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("waitTime is marked non-null but is null"); + } + + @Test + void defaults() { + var request = TaskStatusPollRequest.builder().taskId("1234").build(); + + assertThat(request.getWaitTime()) + .isNotNull() + .isEqualByComparingTo(TaskStatusPollRequest.DEFAULT_STATUS_POLL_WAIT_TIME); + + assertThat(request.getTaskId()) + .isEqualTo("1234"); + } +} diff --git a/docling-serve/docling-serve-client/build.gradle.kts b/docling-serve/docling-serve-client/build.gradle.kts index f8a3910..68fb0b9 100644 --- a/docling-serve/docling-serve-client/build.gradle.kts +++ b/docling-serve/docling-serve-client/build.gradle.kts @@ -20,4 +20,6 @@ dependencies { testImplementation(platform(libs.jackson.bom)) testImplementation(libs.jackson.databind) testImplementation(libs.jackson2.databind) + + testImplementation(libs.awaitility) } diff --git a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ChunkOperations.java b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ChunkOperations.java new file mode 100644 index 0000000..180c426 --- /dev/null +++ b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ChunkOperations.java @@ -0,0 +1,34 @@ +package ai.docling.serve.client; + +import ai.docling.serve.api.DoclingServeChunkApi; +import ai.docling.serve.api.chunk.request.HierarchicalChunkDocumentRequest; +import ai.docling.serve.api.chunk.request.HybridChunkDocumentRequest; +import ai.docling.serve.api.chunk.response.ChunkDocumentResponse; + +/** + * Base class for document chunking API operations. Provides access to document chunking + * functionality with both hierarchical and hybrid strategies. + */ +final class ChunkOperations implements DoclingServeChunkApi { + private final HttpOperations httpOperations; + + ChunkOperations(HttpOperations httpOperations) { + this.httpOperations = httpOperations; + } + + /** + * Converts and chunks the provided document source(s) into a processed document based on the specified options + * and using a hierarchical chunker for splitting the document into smaller chunks. + */ + public ChunkDocumentResponse chunkSourceWithHierarchicalChunker(HierarchicalChunkDocumentRequest request) { + return this.httpOperations.executePost("/v1/chunk/hierarchical/source", request, ChunkDocumentResponse.class); + } + + /** + * Converts and chunks the provided document source(s) into a processed document based on the specified options + * and using a hybrid chunker for splitting the document into smaller chunks. + */ + public ChunkDocumentResponse chunkSourceWithHybridChunker(HybridChunkDocumentRequest request) { + return this.httpOperations.executePost("/v1/chunk/hybrid/source", request, ChunkDocumentResponse.class); + } +} diff --git a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ClearOperations.java b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ClearOperations.java new file mode 100644 index 0000000..3c2e5c4 --- /dev/null +++ b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ClearOperations.java @@ -0,0 +1,34 @@ +package ai.docling.serve.client; + +import ai.docling.serve.api.DoclingServeClearApi; +import ai.docling.serve.api.clear.request.ClearResultsRequest; +import ai.docling.serve.api.clear.response.ClearResponse; + +/** + * Base class for clear API operations. Provides functionality for managing and cleaning up + * converters and stale data retained by the service. + */ +final class ClearOperations implements DoclingServeClearApi { + private final HttpOperations httpOperations; + + ClearOperations(HttpOperations httpOperations) { + this.httpOperations = httpOperations; + } + + /** + * Clears all registered converters associated with the API. + */ + public ClearResponse clearConverters() { + return this.httpOperations.executeGet("/v1/clear/converters", ClearResponse.class); + } + + /** + * Clears previously stored results based on the criteria provided in the request. + * + * @param request the {@link ClearResultsRequest} containing the criteria for clearing results. + * @return a {@link ClearResponse} indicating the outcome of the clear operation. + */ + public ClearResponse clearResults(ClearResultsRequest request) { + return this.httpOperations.executeGet("/v1/clear/results?older_then=%d".formatted(request.getOlderThan().toSeconds()), ClearResponse.class); + } +} diff --git a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ConvertOperations.java b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ConvertOperations.java new file mode 100644 index 0000000..a5fe5af --- /dev/null +++ b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ConvertOperations.java @@ -0,0 +1,27 @@ +package ai.docling.serve.client; + +import ai.docling.serve.api.DoclingServeConvertApi; +import ai.docling.serve.api.convert.request.ConvertDocumentRequest; +import ai.docling.serve.api.convert.response.ConvertDocumentResponse; + +/** + * Base class for document conversion API operations. Provides access to document + * conversion functionality. + */ +final class ConvertOperations implements DoclingServeConvertApi { + private final HttpOperations httpOperations; + + ConvertOperations(HttpOperations httpOperations) { + this.httpOperations = httpOperations; + } + + /** + * Converts the provided document source(s) into a processed document based on the specified options. + * + * @param request the {@link ConvertDocumentRequest} containing the source(s), conversion options, and optional target. + * @return a {@link ConvertDocumentResponse} containing the processed document data, processing details, and any errors. + */ + public ConvertDocumentResponse convertSource(ConvertDocumentRequest request) { + return this.httpOperations.executePost("/v1/convert/source", request, ConvertDocumentResponse.class); + } +} diff --git a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeClient.java b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeClient.java index 245e7d2..929bac3 100644 --- a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeClient.java +++ b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeClient.java @@ -5,6 +5,7 @@ import java.net.URI; import java.net.http.HttpClient; +import java.net.http.HttpClient.Redirect; import java.net.http.HttpRequest; import java.net.http.HttpRequest.BodyPublisher; import java.net.http.HttpRequest.BodyPublishers; @@ -19,12 +20,22 @@ import org.slf4j.LoggerFactory; import ai.docling.serve.api.DoclingServeApi; +import ai.docling.serve.api.DoclingServeChunkApi; +import ai.docling.serve.api.DoclingServeClearApi; +import ai.docling.serve.api.DoclingServeConvertApi; +import ai.docling.serve.api.DoclingServeHealthApi; +import ai.docling.serve.api.DoclingServeTaskApi; import ai.docling.serve.api.chunk.request.HierarchicalChunkDocumentRequest; import ai.docling.serve.api.chunk.request.HybridChunkDocumentRequest; import ai.docling.serve.api.chunk.response.ChunkDocumentResponse; +import ai.docling.serve.api.clear.request.ClearResultsRequest; +import ai.docling.serve.api.clear.response.ClearResponse; import ai.docling.serve.api.convert.request.ConvertDocumentRequest; import ai.docling.serve.api.convert.response.ConvertDocumentResponse; import ai.docling.serve.api.health.HealthCheckResponse; +import ai.docling.serve.api.task.request.TaskResultRequest; +import ai.docling.serve.api.task.request.TaskStatusPollRequest; +import ai.docling.serve.api.task.response.TaskStatusPollResponse; /** * Abstract class representing a client for interacting with the Docling API. @@ -34,10 +45,15 @@ * configurations. It provides abstract methods for JSON serialization and * deserialization, allowing implementation-specific customization. * + *

The client is structured hierarchically, with separate nested implementations + * for each API interface ({@link DoclingServeHealthApi}, {@link DoclingServeConvertApi}, + * {@link DoclingServeChunkApi}, {@link DoclingServeClearApi}, {@link DoclingServeTaskApi}). + * These implementations share common HTTP execution logic and configuration. + * *

Concrete subclasses must implement {@link #readValue(String, Class)} and * {@link #writeValueAsString(Object)} for serialization and deserialization behavior. */ -public abstract class DoclingServeClient implements DoclingServeApi { +public abstract class DoclingServeClient extends HttpOperations implements DoclingServeApi { private static final Logger LOG = LoggerFactory.getLogger(DoclingServeClient.class); protected static final URI DEFAULT_BASE_URL = URI.create("http://localhost:5001"); @@ -45,6 +61,13 @@ public abstract class DoclingServeClient implements DoclingServeApi { private final HttpClient httpClient; private final boolean logRequests; private final boolean logResponses; + private final boolean prettyPrintJson; + + private final HealthOperations healthOps = new HealthOperations(this); + private final ConvertOperations convertOps = new ConvertOperations(this); + private final ChunkOperations chunkOps = new ChunkOperations(this); + private final ClearOperations clearOps = new ClearOperations(this); + private final TaskOperations taskOps = new TaskOperations(this); protected DoclingServeClient(DoclingServeClientBuilder builder) { this.baseUrl = ensureNotNull(builder.baseUrl, "baseUrl"); @@ -59,6 +82,7 @@ protected DoclingServeClient(DoclingServeClientBuilder builder) { this.httpClient = ensureNotNull(builder.httpClientBuilder, "httpClientBuilder").build(); this.logRequests = builder.logRequests; this.logResponses = builder.logResponses; + this.prettyPrintJson = builder.prettyPrintJson; } /** @@ -82,6 +106,10 @@ protected DoclingServeClient(DoclingServeClientBuilder builder) { */ protected abstract String writeValueAsString(T value); + protected boolean prettyPrintJson() { + return this.prettyPrintJson; + } + protected void logRequest(HttpRequest request) { if (LOG.isInfoEnabled()) { var stringBuilder = new StringBuilder(); @@ -106,7 +134,9 @@ protected void logResponse(HttpResponse response, Optional respo stringBuilder.append(" %s: %s\n".formatted(key, String.join(", ", values))) ); - responseBody.ifPresent(body -> stringBuilder.append(" BODY:\n%s".formatted(body))); + responseBody + .map(body -> this.prettyPrintJson ? writeValueAsString(readValue(body, Object.class)) : body) + .ifPresent(body -> stringBuilder.append(" BODY:\n%s".formatted(body))); LOG.info(stringBuilder.toString()); } } @@ -138,7 +168,8 @@ protected O executePost(String uri, I request, Class expectedReturnTyp return execute(httpRequest, expectedReturnType); } - protected O executeGet(String uri, Class expectedReturnType) { + @Override + protected O executeGet(String uri, Class expectedReturnType) { var httpRequest = createRequestBuilder(uri) .GET() .build(); @@ -159,27 +190,60 @@ protected T getResponse(HttpResponse response, Class expectedRetu logResponse(response, Optional.ofNullable(body)); } + var statusCode = response.statusCode(); + + if (statusCode >= 400) { + // Handle errors + // The Java HTTPClient doesn't throw exceptions on error codes + throw new DoclingServeClientException("An error occurred: %s".formatted(body), statusCode, body); + } + return readValue(body, expectedReturnType); } @Override public HealthCheckResponse health() { - return executeGet("/health", HealthCheckResponse.class); + return this.healthOps.health(); } @Override public ConvertDocumentResponse convertSource(ConvertDocumentRequest request) { - return executePost("/v1/convert/source", request, ConvertDocumentResponse.class); + return this.convertOps.convertSource(request); } @Override public ChunkDocumentResponse chunkSourceWithHierarchicalChunker(HierarchicalChunkDocumentRequest request) { - return executePost("/v1/chunk/hierarchical/source", request, ChunkDocumentResponse.class); + return this.chunkOps.chunkSourceWithHierarchicalChunker(request); } @Override public ChunkDocumentResponse chunkSourceWithHybridChunker(HybridChunkDocumentRequest request) { - return executePost("/v1/chunk/hybrid/source", request, ChunkDocumentResponse.class); + return this.chunkOps.chunkSourceWithHybridChunker(request); + } + + @Override + public TaskStatusPollResponse pollTaskStatus(TaskStatusPollRequest request) { + return this.taskOps.pollTaskStatus(request); + } + + @Override + public ConvertDocumentResponse convertTaskResult(TaskResultRequest request) { + return this.taskOps.convertTaskResult(request); + } + + @Override + public ChunkDocumentResponse chunkTaskResult(TaskResultRequest request) { + return this.taskOps.chunkTaskResult(request); + } + + @Override + public ClearResponse clearConverters() { + return this.clearOps.clearConverters(); + } + + @Override + public ClearResponse clearResults(ClearResultsRequest request) { + return this.clearOps.clearResults(request); } private class LoggingBodyPublisher implements BodyPublisher { @@ -199,7 +263,7 @@ public long contentLength() { @Override public void subscribe(Subscriber subscriber) { if (logRequests) { - LOG.info("→ REQUEST BODY: {}", this.stringContent); + LOG.info("→ REQUEST BODY: \n{}", this.stringContent); } this.delegate.subscribe(subscriber); @@ -219,9 +283,10 @@ public void subscribe(Subscriber subscriber) { @SuppressWarnings("unchecked") public abstract static class DoclingServeClientBuilder> implements DoclingApiBuilder { private URI baseUrl = DEFAULT_BASE_URL; - private HttpClient.Builder httpClientBuilder = HttpClient.newBuilder(); + private HttpClient.Builder httpClientBuilder = HttpClient.newBuilder().followRedirects(Redirect.NORMAL); private boolean logRequests = false; private boolean logResponses = false; + private boolean prettyPrintJson = false; /** * Protected constructor for use by subclasses of {@link DoclingServeClientBuilder}. @@ -300,5 +365,11 @@ public B logResponses(boolean logResponses) { this.logResponses = logResponses; return (B) this; } + + @Override + public B prettyPrint(boolean prettyPrint) { + this.prettyPrintJson = prettyPrint; + return (B) this; + } } } diff --git a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeClientException.java b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeClientException.java new file mode 100644 index 0000000..d20286b --- /dev/null +++ b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeClientException.java @@ -0,0 +1,35 @@ +package ai.docling.serve.client; + +/** + * Exception thrown when an error occurs during a client request to the Docling Serve API. + * This exception encapsulates additional details about the HTTP status code and the response + * body returned by the server, providing more context for the error. + */ +public class DoclingServeClientException extends RuntimeException { + private final int statusCode; + private final String responseBody; + + public DoclingServeClientException(String message, int statusCode, String responseBody) { + super(message); + this.statusCode = statusCode; + this.responseBody = responseBody; + } + + /** + * Retrieves the HTTP status code associated with this exception. + * + * @return the HTTP status code that was returned by the server. + */ + public int getStatusCode() { + return this.statusCode; + } + + /** + * Retrieves the response body associated with this exception. + * + * @return the response body returned by the server as a string. + */ + public String getResponseBody() { + return this.responseBody; + } +} diff --git a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeJackson2Client.java b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeJackson2Client.java index 86b1554..5614278 100644 --- a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeJackson2Client.java +++ b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeJackson2Client.java @@ -55,7 +55,9 @@ protected T readValue(String json, Class valueType) { @Override protected String writeValueAsString(T value) { try { - return this.jsonMapper.writeValueAsString(value); + return prettyPrintJson() ? + this.jsonMapper.writerWithDefaultPrettyPrinter().writeValueAsString(value) : + this.jsonMapper.writeValueAsString(value); } catch (JsonProcessingException e) { throw new RuntimeException(e); } diff --git a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeJackson3Client.java b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeJackson3Client.java index 918f31e..9863331 100644 --- a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeJackson3Client.java +++ b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeJackson3Client.java @@ -42,7 +42,9 @@ protected T readValue(String json, Class valueType) { @Override protected String writeValueAsString(T value) { - return this.jsonMapper.writeValueAsString(value); + return prettyPrintJson() ? + this.jsonMapper.writerWithDefaultPrettyPrinter().writeValueAsString(value) : + this.jsonMapper.writeValueAsString(value); } /** diff --git a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/HealthOperations.java b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/HealthOperations.java new file mode 100644 index 0000000..a8bbf91 --- /dev/null +++ b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/HealthOperations.java @@ -0,0 +1,25 @@ +package ai.docling.serve.client; + +import ai.docling.serve.api.DoclingServeHealthApi; +import ai.docling.serve.api.health.HealthCheckResponse; + +/** + * Base class for health API operations. Provides access to health check functionality + * of the Docling service. + */ +final class HealthOperations implements DoclingServeHealthApi { + private final HttpOperations httpOperations; + + HealthOperations(HttpOperations httpOperations) { + this.httpOperations = httpOperations; + } + + /** + * Executes a health check for the API and retrieves the health status of the service. + * + * @return a {@link HealthCheckResponse} object containing the health status of the API. + */ + public HealthCheckResponse health() { + return this.httpOperations.executeGet("/health", HealthCheckResponse.class); + } +} diff --git a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/HttpOperations.java b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/HttpOperations.java new file mode 100644 index 0000000..880bd20 --- /dev/null +++ b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/HttpOperations.java @@ -0,0 +1,31 @@ +package ai.docling.serve.client; + +/** + * Abstract base class for HTTP operations. Provides methods for executing HTTP requests such as GET and POST + * and deserializing responses into specified types. This class is intended to be extended by subclasses that + * implement these operations for specific use cases. + */ +public abstract class HttpOperations { + /** + * Executes an HTTP GET request to the specified URI and deserializes the response into the given type. + * + * @param the expected return type for the deserialized response. + * @param uri the URI to send the GET request to. + * @param expectedReturnType the class representing the type to which the response should be deserialized. + * @return an instance of the specified type containing the deserialized response data. + */ + protected abstract O executeGet(String uri, Class expectedReturnType); + + /** + * Executes an HTTP POST request to the specified URI with the given request payload and deserializes + * the response into the specified return type. + * + * @param the type of the request payload sent in the POST request. + * @param the expected return type for the deserialized response. + * @param uri the URI to send the POST request to. + * @param request the payload to be sent in the POST request. + * @param expectedReturnType the class representing the type to which the response should be deserialized. + * @return an instance of the specified type containing the deserialized response data. + */ + protected abstract O executePost(String uri, I request, Class expectedReturnType); +} diff --git a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/TaskOperations.java b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/TaskOperations.java new file mode 100644 index 0000000..13bcdc0 --- /dev/null +++ b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/TaskOperations.java @@ -0,0 +1,51 @@ +package ai.docling.serve.client; + +import ai.docling.serve.api.DoclingServeTaskApi; +import ai.docling.serve.api.chunk.response.ChunkDocumentResponse; +import ai.docling.serve.api.convert.response.ConvertDocumentResponse; +import ai.docling.serve.api.task.request.TaskResultRequest; +import ai.docling.serve.api.task.request.TaskStatusPollRequest; +import ai.docling.serve.api.task.response.TaskStatusPollResponse; + +/** + * Base class for task API operations. Provides operations for managing and querying + * the status of asynchronous tasks. + */ +final class TaskOperations implements DoclingServeTaskApi { + private final HttpOperations httpOperations; + + TaskOperations(HttpOperations httpOperations) { + this.httpOperations = httpOperations; + } + + /** + * Polls the status of a task using the provided request object. + * + * @param request the {@link TaskStatusPollRequest} containing the details for polling. + * @return a {@link TaskStatusPollResponse} containing the task's current status. + */ + public TaskStatusPollResponse pollTaskStatus(TaskStatusPollRequest request) { + return this.httpOperations.executeGet("/v1/status/poll/%s?wait=%d".formatted(request.getTaskId(), request.getWaitTime().toSeconds()), TaskStatusPollResponse.class); + } + + /** + * Converts the task result from a completed process into a document conversion response. + * + * @param request the {@link TaskResultRequest} containing the task identifier. + * @return a {@link ConvertDocumentResponse} containing the details of the converted document. + */ + public ConvertDocumentResponse convertTaskResult(TaskResultRequest request) { + return this.httpOperations.executeGet("/v1/result/%s".formatted(request.getTaskId()), ConvertDocumentResponse.class); + } + + /** + * Processes the result of a completed task request by transforming the task data into + * a chunked document response. + * + * @param request the {@link TaskResultRequest} containing the unique task identifier. + * @return a {@link ChunkDocumentResponse} containing the details of the chunked document. + */ + public ChunkDocumentResponse chunkTaskResult(TaskResultRequest request) { + return this.httpOperations.executeGet("/v1/result/%s".formatted(request.getTaskId()), ChunkDocumentResponse.class); + } +} diff --git a/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/AbstractDoclingServeClientTests.java b/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/AbstractDoclingServeClientTests.java index 4192cc0..c71054d 100644 --- a/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/AbstractDoclingServeClientTests.java +++ b/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/AbstractDoclingServeClientTests.java @@ -1,15 +1,38 @@ package ai.docling.serve.client; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.awaitility.Awaitility.await; import java.io.IOException; import java.io.InputStream; +import java.lang.reflect.Method; import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpClient.Redirect; +import java.net.http.HttpClient.Version; +import java.net.http.HttpRequest; +import java.net.http.HttpRequest.BodyPublisher; +import java.net.http.HttpRequest.BodyPublishers; +import java.net.http.HttpResponse; +import java.net.http.HttpResponse.BodyHandlers; +import java.nio.ByteBuffer; import java.time.Duration; import java.util.Base64; import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.Flow.Subscriber; +import java.util.concurrent.atomic.AtomicReference; +import org.jspecify.annotations.Nullable; +import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtensionContext; +import org.junit.jupiter.api.extension.RegisterExtension; +import org.junit.jupiter.api.extension.TestWatcher; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import ai.docling.core.DoclingDocument; import ai.docling.core.DoclingDocument.DocItemLabel; @@ -20,6 +43,8 @@ import ai.docling.serve.api.chunk.request.options.HybridChunkerOptions; import ai.docling.serve.api.chunk.response.Chunk; import ai.docling.serve.api.chunk.response.ChunkDocumentResponse; +import ai.docling.serve.api.clear.request.ClearResultsRequest; +import ai.docling.serve.api.clear.response.ClearResponse; import ai.docling.serve.api.convert.request.ConvertDocumentRequest; import ai.docling.serve.api.convert.request.options.ConvertDocumentOptions; import ai.docling.serve.api.convert.request.options.OutputFormat; @@ -28,178 +53,423 @@ import ai.docling.serve.api.convert.request.source.HttpSource; import ai.docling.serve.api.convert.response.ConvertDocumentResponse; import ai.docling.serve.api.health.HealthCheckResponse; +import ai.docling.serve.api.task.request.TaskResultRequest; +import ai.docling.serve.api.task.request.TaskStatusPollRequest; +import ai.docling.serve.api.task.response.TaskStatus; +import ai.docling.serve.api.task.response.TaskStatusPollResponse; import ai.docling.testcontainers.serve.DoclingServeContainer; import ai.docling.testcontainers.serve.config.DoclingServeContainerConfig; abstract class AbstractDoclingServeClientTests { + private static final Logger LOG = LoggerFactory.getLogger(AbstractDoclingServeClientTests.class); + protected static final DoclingServeContainer doclingContainer = new DoclingServeContainer( - DoclingServeContainerConfig.builder() - .image(DoclingServeContainerConfig.DOCLING_IMAGE) - .enableUi(true) - .build() + DoclingServeContainerConfig.builder() + .image(DoclingServeContainerConfig.DOCLING_IMAGE) + .enableUi(true) + .build() ); + // Used to output the container logs on a test failure + // This could be useful when debugging + @RegisterExtension + TestWatcher watcher = new TestWatcher() { + @Override + public void testFailed(ExtensionContext context, @Nullable Throwable cause) { + var message = """ + Test %s.%s failed with message: %s + Container logs: + %s + """.formatted( + getClass().getName(), + context.getTestMethod().map(Method::getName).orElse(""), + Optional.ofNullable(cause).map(Throwable::getMessage).orElse(""), + doclingContainer.getLogs()); + + LOG.error(message); + } + }; + static { doclingContainer.start(); } protected abstract DoclingServeApi getDoclingClient(); - @Test - void shouldSuccessfullyCallHealthEndpoint() { - HealthCheckResponse response = getDoclingClient().health(); + private T readValue(String json, Class valueType) { + return ((DoclingServeClient) getDoclingClient()).readValue(json, valueType); + } - assertThat(response) - .isNotNull() - .extracting(HealthCheckResponse::getStatus) - .isEqualTo("ok"); + private String writeValueAsString(T value) { + return ((DoclingServeClient) getDoclingClient()).writeValueAsString(value); } - @Test - void shouldConvertHttpSourceSuccessfully() { - ConvertDocumentRequest request = ConvertDocumentRequest.builder() - .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build()) - .build(); + @Nested + class ClearTests { + @Test + void shouldClearConvertersSuccessfully() { + var response = getDoclingClient().clearConverters(); - ConvertDocumentResponse response = getDoclingClient().convertSource(request); + assertThat(response) + .isNotNull() + .extracting(ClearResponse::getStatus) + .isEqualTo("ok"); + } - assertThat(response).isNotNull(); + @Test + void shouldClearResultsSuccessfully() { + var request = ClearResultsRequest.builder().build(); + var response = getDoclingClient().clearResults(request); - assertThat(response.getStatus()).isNotEmpty(); - assertThat(response.getDocument()).isNotNull(); - assertThat(response.getDocument().getFilename()).isNotEmpty(); + assertThat(response) + .isNotNull() + .extracting(ClearResponse::getStatus) + .isEqualTo("ok"); + } + } - if (response.getProcessingTime() != null) { - assertThat(response.getProcessingTime()).isPositive(); + @Nested + class TaskTests { + @Test + void pollInvalidTaskId() { + var request = TaskStatusPollRequest.builder() + .taskId("someInvalidTaskId") + .build(); + + assertThatThrownBy(() -> getDoclingClient().pollTaskStatus(request)) + .hasRootCauseInstanceOf(DoclingServeClientException.class) + .hasRootCauseMessage("An error occurred: {\"detail\":\"Task not found.\"}"); } - assertThat(response.getDocument().getMarkdownContent()).isNotEmpty(); - } + @Test + void convertUrlTaskResult() throws IOException, InterruptedException { + var pollResponse = doPollForTaskCompletion(); + var request = TaskResultRequest.builder() + .taskId(pollResponse.getTaskId()) + .build(); + + var result = getDoclingClient().convertTaskResult(request); + ConvertTests.assertConvertHttpSource(result); + } + + @Test + void pollConvertUrlTask() throws IOException, InterruptedException { + doPollForTaskCompletion(); + } + + private TaskStatusPollResponse doPollForTaskCompletion() throws IOException, InterruptedException { + var response = submitTask(); + var pollRequest = TaskStatusPollRequest.builder() + .taskId(response.getTaskId()) + .build(); + + var doclingClient = getDoclingClient(); + var taskPollResponse = new AtomicReference<>(doclingClient.pollTaskStatus(pollRequest)); + + assertThat(taskPollResponse).isNotNull(); + assertThat(taskPollResponse.get()) + .isNotNull() + .extracting( + TaskStatusPollResponse::getTaskId, + TaskStatusPollResponse::getTaskStatus, + TaskStatusPollResponse::getTaskType + ) + .allMatch(Objects::nonNull); + + assertThat(taskPollResponse.get()) + .extracting( + TaskStatusPollResponse::getTaskId, + TaskStatusPollResponse::getTaskType + ) + .containsExactly( + response.getTaskId(), + response.getTaskType() + ); + + if (taskPollResponse.get().getTaskStatus() != TaskStatus.SUCCESS) { + await() + .atMost(Duration.ofMinutes(1)) + .pollDelay(Duration.ofSeconds(5)) + .pollInterval(Duration.ofSeconds(5)) + .logging(LoggerFactory.getLogger("org.awaitility")::info) + .until(() -> { + taskPollResponse.set(doclingClient.pollTaskStatus(pollRequest)); + return taskPollResponse.get().getTaskStatus() == TaskStatus.SUCCESS; + }); + } + + assertThat(taskPollResponse.get().getTaskStatus()).isEqualTo(TaskStatus.SUCCESS); + + return taskPollResponse.get(); + } + + // @TODO The async api isn't here yet, so we have to do something on our own for now in these tests. + // Once https://github.com/docling-project/docling-java/issues/127 is implemented then these methods below + // Can be switched to use that API for making the calls + private TaskStatusPollResponse submitTask() throws IOException, InterruptedException { + var request = ConvertDocumentRequest.builder() + .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build()) + .build(); + + var httpRequest = HttpRequest.newBuilder() + .uri(URI.create("%s/v1/convert/source/async".formatted(doclingContainer.getApiUrl()))) + .header("Content-Type", "application/json") + .header("Accept", "application/json") + .POST(new LoggingBodyPublisher<>(request)) + .build(); + + logRequest(httpRequest); + + var httpClient = HttpClient.newBuilder() + .followRedirects(Redirect.NORMAL) + .version(Version.HTTP_1_1) + .build(); + + var httpResponse = httpClient.send(httpRequest, BodyHandlers.ofString()); + var body = httpResponse.body(); + + logResponse(httpResponse, Optional.ofNullable(body)); + + var statusCode = httpResponse.statusCode(); + + if (statusCode >= 400) { + // Handle errors + // The Java HTTPClient doesn't throw exceptions on error codes + throw new DoclingServeClientException("An error occurred: %s".formatted(body), statusCode, body); + } + + var response = readValue(body, TaskStatusPollResponse.class); + + assertThat(response) + .isNotNull() + .extracting( + TaskStatusPollResponse::getTaskId, + TaskStatusPollResponse::getTaskPosition, + TaskStatusPollResponse::getTaskStatus, + TaskStatusPollResponse::getTaskType + ) + .allMatch(Objects::nonNull); + + assertThat(response.getTaskType()).isEqualTo("convert"); + + return response; + } + + private static void logRequest(HttpRequest request) { + var stringBuilder = new StringBuilder(); + stringBuilder.append("\n→ REQUEST: %s %s\n".formatted(request.method(), request.uri())); + stringBuilder.append(" HEADERS:\n"); - @Test - void shouldConvertFileSourceSuccessfully() throws IOException { - var fileResource = readFileFromClasspath("story.pdf"); - ConvertDocumentRequest request = ConvertDocumentRequest.builder() - .source(FileSource.builder() - .filename("story.pdf") - .base64String(Base64.getEncoder().encodeToString(fileResource)) - .build() - ) + request.headers().map().forEach((key, values) -> + stringBuilder.append(" %s: %s\n".formatted(key, String.join(", ", values))) + ); - .build(); + LOG.info(stringBuilder.toString()); + } - ConvertDocumentResponse response = getDoclingClient().convertSource(request); + private void logResponse(HttpResponse response, Optional responseBody) { + var stringBuilder = new StringBuilder(); + stringBuilder.append("\n← RESPONSE: %s\n".formatted(response.statusCode())); + stringBuilder.append(" HEADERS:\n"); - assertThat(response).isNotNull(); - assertThat(response.getStatus()).isNotEmpty(); - assertThat(response.getDocument()).isNotNull(); - assertThat(response.getDocument().getFilename()).isEqualTo("story.pdf"); + response.headers().map().forEach((key, values) -> + stringBuilder.append(" %s: %s\n".formatted(key, String.join(", ", values))) + ); - if (response.getProcessingTime() != null) { - assertThat(response.getProcessingTime()).isPositive(); + responseBody + .map(body -> writeValueAsString(readValue(body, Object.class))) + .ifPresent(body -> stringBuilder.append(" BODY:\n%s".formatted(body))); + LOG.info(stringBuilder.toString()); } - assertThat(response.getDocument().getMarkdownContent()).isNotEmpty(); + private class LoggingBodyPublisher implements BodyPublisher { + private final BodyPublisher delegate; + private final String stringContent; + + private LoggingBodyPublisher(T content) { + this.stringContent = writeValueAsString(content); + this.delegate = BodyPublishers.ofString(this.stringContent); + } + + @Override + public long contentLength() { + return this.delegate.contentLength(); + } + + @Override + public void subscribe(Subscriber subscriber) { + LOG.info("→ REQUEST BODY: \n{}", this.stringContent); + this.delegate.subscribe(subscriber); + } + } } - @Test - void shouldHandleConversionWithDifferentDocumentOptions() { - ConvertDocumentOptions options = ConvertDocumentOptions.builder() - .doOcr(true) - .includeImages(true) - .tableMode(TableFormerMode.FAST) - .documentTimeout(Duration.ofMinutes(1)) - .build(); - - ConvertDocumentRequest request = ConvertDocumentRequest.builder() - .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build()) - .options(options) - .build(); - - ConvertDocumentResponse response = getDoclingClient().convertSource(request); - - assertThat(response).isNotNull(); - assertThat(response.getStatus()).isNotEmpty(); - assertThat(response.getDocument()).isNotNull(); + @Nested + class HealthTests { + @Test + void shouldSuccessfullyCallHealthEndpoint() { + HealthCheckResponse response = getDoclingClient().health(); + + assertThat(response) + .isNotNull() + .extracting(HealthCheckResponse::getStatus) + .isEqualTo("ok"); + } } - @Test - void shouldHandleResponseWithDoclingDocument() { - ConvertDocumentOptions options = ConvertDocumentOptions.builder() - .toFormat(OutputFormat.JSON) - .build(); + @Nested + class ConvertTests { + static void assertConvertHttpSource(ConvertDocumentResponse response) { + assertThat(response).isNotNull(); + assertThat(response.getStatus()).isNotEmpty(); + assertThat(response.getDocument()).isNotNull(); + assertThat(response.getDocument().getFilename()).isNotEmpty(); + + if (response.getProcessingTime() != null) { + assertThat(response.getProcessingTime()).isPositive(); + } + + assertThat(response.getDocument().getMarkdownContent()).isNotEmpty(); + } - ConvertDocumentRequest request = ConvertDocumentRequest.builder() - .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build()) - .options(options) - .build(); + @Test + void shouldConvertHttpSourceSuccessfully() { + ConvertDocumentRequest request = ConvertDocumentRequest.builder() + .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build()) + .build(); - ConvertDocumentResponse response = getDoclingClient().convertSource(request); + ConvertDocumentResponse response = getDoclingClient().convertSource(request); + assertConvertHttpSource(response); + } - assertThat(response).isNotNull(); - assertThat(response.getStatus()).isNotEmpty(); - assertThat(response.getDocument()).isNotNull(); + @Test + void shouldConvertFileSourceSuccessfully() throws IOException { + var fileResource = readFileFromClasspath("story.pdf"); + ConvertDocumentRequest request = ConvertDocumentRequest.builder() + .source(FileSource.builder() + .filename("story.pdf") + .base64String(Base64.getEncoder().encodeToString(fileResource)) + .build() + ) - DoclingDocument doclingDocument = response.getDocument().getJsonContent(); - assertThat(doclingDocument).isNotNull(); - assertThat(doclingDocument.getName()).isNotEmpty(); - assertThat(doclingDocument.getTexts().get(0).getLabel()).isEqualTo(DocItemLabel.TITLE); - } + .build(); - @Test - void shouldChunkDocumentWithHierarchicalChunker() { - ConvertDocumentOptions options = ConvertDocumentOptions.builder() - .toFormat(OutputFormat.JSON) - .build(); - - HierarchicalChunkDocumentRequest request = HierarchicalChunkDocumentRequest.builder() - .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build()) - .options(options) - .includeConvertedDoc(true) - .chunkingOptions(HierarchicalChunkerOptions.builder() - .includeRawText(true) - .useMarkdownTables(true) - .build()) - .build(); - - ChunkDocumentResponse response = getDoclingClient().chunkSourceWithHierarchicalChunker(request); - - assertThat(response).isNotNull(); - assertThat(response.getChunks()).isNotEmpty(); - assertThat(response.getDocuments()).isNotEmpty(); - assertThat(response.getProcessingTime()).isNotNull(); - - List chunks = response.getChunks(); - assertThat(chunks).allMatch(chunk -> !chunk.getText().isEmpty()); + ConvertDocumentResponse response = getDoclingClient().convertSource(request); + + assertThat(response).isNotNull(); + assertThat(response.getStatus()).isNotEmpty(); + assertThat(response.getDocument()).isNotNull(); + assertThat(response.getDocument().getFilename()).isEqualTo("story.pdf"); + + if (response.getProcessingTime()!=null) { + assertThat(response.getProcessingTime()).isPositive(); + } + + assertThat(response.getDocument().getMarkdownContent()).isNotEmpty(); + } + + @Test + void shouldHandleConversionWithDifferentDocumentOptions() { + ConvertDocumentOptions options = ConvertDocumentOptions.builder() + .doOcr(true) + .includeImages(true) + .tableMode(TableFormerMode.FAST) + .documentTimeout(Duration.ofMinutes(1)) + .build(); + + ConvertDocumentRequest request = ConvertDocumentRequest.builder() + .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build()) + .options(options) + .build(); + + ConvertDocumentResponse response = getDoclingClient().convertSource(request); + + assertThat(response).isNotNull(); + assertThat(response.getStatus()).isNotEmpty(); + assertThat(response.getDocument()).isNotNull(); + } + + @Test + void shouldHandleResponseWithDoclingDocument() { + ConvertDocumentOptions options = ConvertDocumentOptions.builder() + .toFormat(OutputFormat.JSON) + .build(); + + ConvertDocumentRequest request = ConvertDocumentRequest.builder() + .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build()) + .options(options) + .build(); + + ConvertDocumentResponse response = getDoclingClient().convertSource(request); + + assertThat(response).isNotNull(); + assertThat(response.getStatus()).isNotEmpty(); + assertThat(response.getDocument()).isNotNull(); + + DoclingDocument doclingDocument = response.getDocument().getJsonContent(); + assertThat(doclingDocument).isNotNull(); + assertThat(doclingDocument.getName()).isNotEmpty(); + assertThat(doclingDocument.getTexts().get(0).getLabel()).isEqualTo(DocItemLabel.TITLE); + } } - @Test - void shouldChunkDocumentWithHybridChunker() { - ConvertDocumentOptions options = ConvertDocumentOptions.builder() - .toFormat(OutputFormat.JSON) - .build(); - - HybridChunkDocumentRequest request = HybridChunkDocumentRequest.builder() - .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build()) - .options(options) - .includeConvertedDoc(true) - .chunkingOptions(HybridChunkerOptions.builder() - .includeRawText(true) - .useMarkdownTables(true) - .maxTokens(10000) - .tokenizer("sentence-transformers/all-MiniLM-L6-v2") - .build()) - .build(); - - ChunkDocumentResponse response = getDoclingClient().chunkSourceWithHybridChunker(request); - - assertThat(response).isNotNull(); - assertThat(response.getChunks()).isNotEmpty(); - assertThat(response.getDocuments()).isNotEmpty(); - assertThat(response.getProcessingTime()).isNotNull(); - - List chunks = response.getChunks(); - assertThat(chunks).allMatch(chunk -> !chunk.getText().isEmpty()); + @Nested + class ChunkTests { + @Test + void shouldChunkDocumentWithHierarchicalChunker() { + ConvertDocumentOptions options = ConvertDocumentOptions.builder() + .toFormat(OutputFormat.JSON) + .build(); + + HierarchicalChunkDocumentRequest request = HierarchicalChunkDocumentRequest.builder() + .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build()) + .options(options) + .includeConvertedDoc(true) + .chunkingOptions(HierarchicalChunkerOptions.builder() + .includeRawText(true) + .useMarkdownTables(true) + .build()) + .build(); + + ChunkDocumentResponse response = getDoclingClient().chunkSourceWithHierarchicalChunker(request); + + assertThat(response).isNotNull(); + assertThat(response.getChunks()).isNotEmpty(); + assertThat(response.getDocuments()).isNotEmpty(); + assertThat(response.getProcessingTime()).isNotNull(); + + List chunks = response.getChunks(); + assertThat(chunks).allMatch(chunk -> !chunk.getText().isEmpty()); + } + + @Test + void shouldChunkDocumentWithHybridChunker() { + ConvertDocumentOptions options = ConvertDocumentOptions.builder() + .toFormat(OutputFormat.JSON) + .build(); + + HybridChunkDocumentRequest request = HybridChunkDocumentRequest.builder() + .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build()) + .options(options) + .includeConvertedDoc(true) + .chunkingOptions(HybridChunkerOptions.builder() + .includeRawText(true) + .useMarkdownTables(true) + .maxTokens(10000) + .tokenizer("sentence-transformers/all-MiniLM-L6-v2") + .build()) + .build(); + + ChunkDocumentResponse response = getDoclingClient().chunkSourceWithHybridChunker(request); + + assertThat(response).isNotNull(); + assertThat(response.getChunks()).isNotEmpty(); + assertThat(response.getDocuments()).isNotEmpty(); + assertThat(response.getProcessingTime()).isNotNull(); + + List chunks = response.getChunks(); + assertThat(chunks).allMatch(chunk -> !chunk.getText().isEmpty()); + } } private static byte[] readFileFromClasspath(String filePath) throws IOException { diff --git a/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/DoclingServeJackson2ClientTests.java b/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/DoclingServeJackson2ClientTests.java index 9ad3742..a84ebaf 100644 --- a/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/DoclingServeJackson2ClientTests.java +++ b/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/DoclingServeJackson2ClientTests.java @@ -16,6 +16,7 @@ static void setUp() { .baseUrl(doclingContainer.getApiUrl()) .logRequests() .logResponses() + .prettyPrint() .build(); } diff --git a/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/DoclingServeJackson3ClientTests.java b/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/DoclingServeJackson3ClientTests.java index be832a5..97bc9f9 100644 --- a/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/DoclingServeJackson3ClientTests.java +++ b/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/DoclingServeJackson3ClientTests.java @@ -15,6 +15,7 @@ static void setUp() { doclingClient = DoclingServeJackson3Client.builder() .logRequests() .logResponses() + .prettyPrint() .baseUrl(doclingContainer.getApiUrl()) .build(); } diff --git a/docling-testing/docling-version-tests/src/main/resources/application.yml b/docling-testing/docling-version-tests/src/main/resources/application.yml index 8418a9d..818d115 100644 --- a/docling-testing/docling-version-tests/src/main/resources/application.yml +++ b/docling-testing/docling-version-tests/src/main/resources/application.yml @@ -1,4 +1,6 @@ quarkus: + http: + test-port: -1 rest-client: github-container-registry: url: https://ghcr.io diff --git a/docs/src/doc/docs/core.md b/docs/src/doc/docs/core.md index 2b8b896..0673f82 100644 --- a/docs/src/doc/docs/core.md +++ b/docs/src/doc/docs/core.md @@ -1,5 +1,7 @@ # Docling Core +[![docling-core version](https://img.shields.io/badge/docling--core_v{{ gradle.project_version }}-orange)](https://docling-project.github.io/docling-java/{{ gradle.project_version }}/core) + The `docling-core` module provides the core data types used by Docling for document representation. It defines the `DoclingDocument` model, which captures the structure and content of documents across various formats, along with utilities for working with these types. The base Java version is 17. This module has no external dependencies, making it lightweight and easy to integrate into your projects. It represents the foundational building block for the other Docling Java modules. diff --git a/docs/src/doc/docs/docling-serve/serve-api.md b/docs/src/doc/docs/docling-serve/serve-api.md index b9fe5f5..9e2a65f 100644 --- a/docs/src/doc/docs/docling-serve/serve-api.md +++ b/docs/src/doc/docs/docling-serve/serve-api.md @@ -1,5 +1,7 @@ # Docling Serve API +[![docling-serve-api version](https://img.shields.io/badge/docling--serve--api_v{{ gradle.project_version }}-orange)](https://docling-project.github.io/docling-java/{{ gradle.project_version }}/docling-serve/serve-api) + The `docling-serve-api` module defines the core, framework-agnostic Java API used to communicate with a [Docling Serve](https://github.com/docling-project/docling-serve) backend. It provides the request/response model and the main `DoclingServeApi` interface. You can use any implementation of this interface to talk to a running diff --git a/docs/src/doc/docs/docling-serve/serve-client.md b/docs/src/doc/docs/docling-serve/serve-client.md index 6fea4d9..bf21455 100644 --- a/docs/src/doc/docs/docling-serve/serve-client.md +++ b/docs/src/doc/docs/docling-serve/serve-client.md @@ -1,5 +1,7 @@ # Docling Serve Client +[![docling-serve-client version](https://img.shields.io/badge/docling--serve--client_v{{ gradle.project_version }}-orange)](https://docling-project.github.io/docling-java/{{ gradle.project_version }}/docling-serve/serve-client) + The `docling-serve-client` module is the reference HTTP client for talking to a [Docling Serve](https://github.com/docling-project/docling-serve) backend. diff --git a/docs/src/doc/docs/testcontainers.md b/docs/src/doc/docs/testcontainers.md index af0a191..f9532dc 100644 --- a/docs/src/doc/docs/testcontainers.md +++ b/docs/src/doc/docs/testcontainers.md @@ -1,5 +1,7 @@ # Testcontainers +[![docling-testcontainers version](https://img.shields.io/badge/docling--testcontainers_v{{ gradle.project_version }}-orange)](https://docling-project.github.io/docling-java/{{ gradle.project_version }}/testcontainers) + The `docling-testcontainers` module provides a ready-to-use [Testcontainers](https://testcontainers.com/) integration for running a [Docling Serve](https://github.com/docling-project/docling-serve) instance in your tests. It wraps the official container image and exposes a simple Java API so you can spin up Docling as part of your JUnit test lifecycle and exercise client code against a real server. If you need to talk to a running server from your application code, pair this module with the reference HTTP client: diff --git a/docs/src/doc/docs/whats-new.md b/docs/src/doc/docs/whats-new.md index a8544e1..e254821 100644 --- a/docs/src/doc/docs/whats-new.md +++ b/docs/src/doc/docs/whats-new.md @@ -1,9 +1,15 @@ -# What's New in Docling Java {{ gradle.project_version }} +# What's New in Docling Java Docling Java {{ gradle.project_version }} provides a number of new features, enhancements, and bug fixes. This page includes the highlights of the release, but you can also check out the full [release notes](https://github.com/docling-project/docling-java/releases) for more details about each new feature and bug fix. ## Docling Serve +### {{ gradle.project_version }} + +* Implementation of the Docling Serve clear and task APIs in `docling-serve-api` and `docling-serve-client`. + +### 0.3.0 + * The classes in the `docling-serve-api` module have been moved from the `ai.docling.api.serve` package to the `ai.docling.serve.api` package. * The classes in the `docling-serve-client` module have been moved from the `ai.docling.client.serve` package to the `ai.docling.serve.client` package. * The classes in the `docling-core` module have been moved from the `ai.docling.api.core` package to the `ai.docling.core` package. diff --git a/gradle.properties b/gradle.properties index b18fb65..de9400a 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1 +1,4 @@ java.version=17 +org.gradle.parallel=true +org.gradle.caching=true +org.gradle.configuration-cache=true diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index b8b7219..771e462 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -1,5 +1,6 @@ [versions] assertj = "3.27.6" +awaitility = "4.3.0" jackson2 = "2.20.1" jackson3 = "3.0.3" jreleaser = "1.20.0" @@ -18,6 +19,9 @@ quarkus-wiremock = "1.5.2" # assertj assertj-core = { group = "org.assertj", name = "assertj-core", version.ref = "assertj" } +# awaitility +awaitility = { group = "org.awaitility", name = "awaitility", version.ref = "awaitility" } + # Jackson jackson-annotations = { group = "com.fasterxml.jackson.core", name = "jackson-annotations" } jackson-bom = { group = "tools.jackson", name = "jackson-bom", version.ref = "jackson3" } From 2e6f5623955ba715a9736ada43f74034c81903f1 Mon Sep 17 00:00:00 2001 From: Eric Deandrea Date: Tue, 9 Dec 2025 15:32:14 -0500 Subject: [PATCH 2/4] feat: Implement the clear and task APIs Also enabled parallel builds for performance Fixes #188 Signed-off-by: Eric Deandrea --- docs/src/doc/docs/whats-new.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/src/doc/docs/whats-new.md b/docs/src/doc/docs/whats-new.md index e254821..5c51fe6 100644 --- a/docs/src/doc/docs/whats-new.md +++ b/docs/src/doc/docs/whats-new.md @@ -7,6 +7,7 @@ Docling Java {{ gradle.project_version }} provides a number of new features, enh ### {{ gradle.project_version }} * Implementation of the Docling Serve clear and task APIs in `docling-serve-api` and `docling-serve-client`. +* Adding `pretty-print` configuration option to `DoclingServeClient` to enable pretty printing of JSON requests and responses. ### 0.3.0 From db31add50f448462b0b159a24888c80c2b92c5af Mon Sep 17 00:00:00 2001 From: Eric Deandrea Date: Wed, 10 Dec 2025 14:42:50 -0500 Subject: [PATCH 3/4] feat: Implement the clear and task APIs Also enabled parallel builds for performance Fixes #188 Signed-off-by: Eric Deandrea --- .../serve/api/DoclingServeClearApi.java | 39 +++++++--- .../serve/api/DoclingServeTaskApi.java | 78 ++++++++++++------- .../clear/request/ClearResultsRequest.java | 34 -------- .../serve/api/clear/request/package-info.java | 4 - .../api/task/request/TaskResultRequest.java | 9 --- .../task/request/TaskStatusPollRequest.java | 46 ----------- .../serve/api/task/request/package-info.java | 7 -- .../src/main/java/module-info.java | 2 - .../request/ClearResultsRequestTests.java | 22 ------ .../task/request/TaskResultRequestTests.java | 14 ---- .../request/TaskStatusPollRequestTests.java | 36 --------- .../docling/serve/client/ClearOperations.java | 21 +++-- .../serve/client/DoclingServeClient.java | 21 +++-- .../docling/serve/client/TaskOperations.java | 70 ++++++++++++----- .../AbstractDoclingServeClientTests.java | 26 ++----- 15 files changed, 160 insertions(+), 269 deletions(-) delete mode 100644 docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/request/ClearResultsRequest.java delete mode 100644 docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/request/package-info.java delete mode 100644 docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/request/TaskResultRequest.java delete mode 100644 docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/request/TaskStatusPollRequest.java delete mode 100644 docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/request/package-info.java delete mode 100644 docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/clear/request/ClearResultsRequestTests.java delete mode 100644 docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/task/request/TaskResultRequestTests.java delete mode 100644 docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/task/request/TaskStatusPollRequestTests.java diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeClearApi.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeClearApi.java index 36fb3e3..b1a85b0 100644 --- a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeClearApi.java +++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeClearApi.java @@ -1,6 +1,9 @@ package ai.docling.serve.api; -import ai.docling.serve.api.clear.request.ClearResultsRequest; +import java.time.Duration; + +import org.jspecify.annotations.Nullable; + import ai.docling.serve.api.clear.response.ClearResponse; /** @@ -10,6 +13,15 @@ * on specified thresholds or default configurations. */ public interface DoclingServeClearApi { + /** + * Represents the default duration used as a threshold for clearing stale results + * or data in the Docling Serve Clear API. Results older than this duration + * are considered stale and may be subject to cleanup. + * + * The value is predefined as 1 hour (3600 seconds). + */ + Duration DEFAULT_OLDER_THAN = Duration.ofSeconds(3600); + /** * Clears all registered converters associated with the API. * This method removes any previously configured or cached converters, @@ -19,15 +31,22 @@ public interface DoclingServeClearApi { ClearResponse clearConverters(); /** - * Clears previously stored results based on the criteria provided in the request. - * This method removes stale results or data that meet the threshold specified - * in the {@link ClearResultsRequest}. It is typically used to clean up older - * or unused data retained by the service. + * Clears stored results that are older than the specified duration threshold. + * This method is used for housekeeping to remove stale or outdated data from the system. + * + * @param olderThen the duration threshold; only results older than this duration will be cleared. + * @return a {@link ClearResponse} object containing the status of the clear operation. + */ + ClearResponse clearResults(@Nullable Duration olderThen); + + /** + * Clears stored results that are older than the default duration threshold. + * This method uses the pre-defined {@code DEFAULT_OLDER_THAN} as the threshold + * to determine which results are considered stale and should be removed. * - * @param request the {@link ClearResultsRequest} containing the criteria for clearing - * results, such as the threshold duration for identifying stale data. - * @return a {@link ClearResponse} indicating the outcome of the clear operation, - * including status or potential errors, if applicable. + * @return a {@link ClearResponse} object containing the status of the clear operation. */ - ClearResponse clearResults(ClearResultsRequest request); + default ClearResponse clearResults() { + return clearResults(DEFAULT_OLDER_THAN); + } } diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeTaskApi.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeTaskApi.java index b410995..da25a98 100644 --- a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeTaskApi.java +++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeTaskApi.java @@ -1,9 +1,11 @@ package ai.docling.serve.api; +import java.time.Duration; + +import org.jspecify.annotations.Nullable; + import ai.docling.serve.api.chunk.response.ChunkDocumentResponse; import ai.docling.serve.api.convert.response.ConvertDocumentResponse; -import ai.docling.serve.api.task.request.TaskResultRequest; -import ai.docling.serve.api.task.request.TaskStatusPollRequest; import ai.docling.serve.api.task.response.TaskStatusPollResponse; /** @@ -16,41 +18,59 @@ */ public interface DoclingServeTaskApi { /** - * Polls the status of a task using the provided request object. - * This method allows querying the current status of an asynchronous task - * in progress or completed, based on its unique identifier and other - * optional parameters such as wait time. + * The default wait time between status polling attempts for a task. + *

+ * This value is used when no explicit wait time is specified in a + * {@code TaskStatusPollRequest} instance. It is set to {@link Duration#ZERO}, + * meaning there is no delay by default between consecutive polling attempts. + *

+ */ + Duration DEFAULT_STATUS_POLL_WAIT_TIME = Duration.ZERO; + + /** + * Polls the status of a task asynchronously and retrieves its current state. + * Allows for configurable wait time between polling attempts. + * If the wait time is {@code }, the default wait time ({@link #DEFAULT_STATUS_POLL_WAIT_TIME}) is used. + * + * @param taskId the unique identifier of the task whose status is being polled + * @param waitTime the duration to wait before polling the status, or null to use the default polling interval + * @return a {@link TaskStatusPollResponse} containing the current status of the task and associated metadata + */ + TaskStatusPollResponse pollTaskStatus(String taskId, @Nullable Duration waitTime); + + /** + * Polls the status of a task asynchronously using the default wait time. + * This convenience method delegates to {@link #pollTaskStatus(String, Duration)} + * with {@code DEFAULT_STATUS_POLL_WAIT_TIME} as the wait time. * - * @param request the {@link TaskStatusPollRequest} containing the details - * for polling, including the task identifier and optional wait duration. - * @return a {@link TaskStatusPollResponse} containing the task's current status, - * including progress, position in the queue, and other metadata, - * if available. + * @param taskId the unique identifier of the task whose status is being polled + * @return a {@link TaskStatusPollResponse} containing the current status of the task + * and associated metadata */ - TaskStatusPollResponse pollTaskStatus(TaskStatusPollRequest request); + default TaskStatusPollResponse pollTaskStatus(String taskId) { + return pollTaskStatus(taskId, DEFAULT_STATUS_POLL_WAIT_TIME); + } /** - * Converts the task result from a completed process into a document conversion response. - * This method processes the provided task result request, retrieves conversion data, and - * returns the resulting document conversion details. + * Converts the completed task result identified by the provided task ID into a document response. + * This method processes the task data associated with the given ID and generates a response + * encapsulating the converted document details. * - * @param request the {@link TaskResultRequest} containing the task identifier for which - * the result is being retrieved and converted. - * @return a {@link ConvertDocumentResponse} containing the details of the converted document, - * such as the document data, processing time, status, and any associated errors. + * @param taskId the unique identifier of the task whose result needs to be converted into a document response + * @return a {@link ConvertDocumentResponse} containing the details of the converted document, processing metadata, + * errors (if any), and other relevant information */ - ConvertDocumentResponse convertTaskResult(TaskResultRequest request); + ConvertDocumentResponse convertTaskResult(String taskId); /** - * Processes the result of a completed task request by transforming the task data into - * a chunked document response. This method retrieves conversion data specific to the - * requested task and generates a response containing the chunked document details. + * Processes the results of a completed task identified by the given task ID and generates a + * response containing chunked document details. This method is used to break down the document + * associated with the task into manageable chunks, making it suitable for subsequent processing + * or analysis. * - * @param request the {@link TaskResultRequest} containing the unique task identifier - * for which the result is being processed into chunks. - * @return a {@link ChunkDocumentResponse} containing the details of the chunked document, - * including the generated chunks, associated documents, processing time, and any - * relevant metadata. + * @param taskId the unique identifier of the task whose result is to be processed and chunked into + * a {@link ChunkDocumentResponse} + * @return a {@link ChunkDocumentResponse} containing the chunked document details and related metadata */ - ChunkDocumentResponse chunkTaskResult(TaskResultRequest request); + ChunkDocumentResponse chunkTaskResult(String taskId); } diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/request/ClearResultsRequest.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/request/ClearResultsRequest.java deleted file mode 100644 index def34e5..0000000 --- a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/request/ClearResultsRequest.java +++ /dev/null @@ -1,34 +0,0 @@ -package ai.docling.serve.api.clear.request; - -import java.time.Duration; - -/** - * Represents a request to clear stale results or data in the Docling Serve Clear API. - * This class stores the parameters required for defining the threshold duration - * beyond which results are considered stale and subject to cleanup. - * - * The main parameter, {@code olderThan}, indicates the threshold duration, - * while a default duration of 1 hour (3600 seconds) is provided through {@code DEFAULT_OLDER_THAN}. - * - * This class is designed to be immutable and uses the Builder pattern - * to facilitate the creation of instances with customized configurations. - * - * Annotations are used to support JSON serialization and the deserialization process. - */ -@lombok.Builder(toBuilder = true) -@lombok.Getter -@lombok.ToString -public class ClearResultsRequest { - /** - * Represents the default duration used as a threshold for clearing stale results - * or data in the Docling Serve Clear API. Results older than this duration - * are considered stale and may be subject to cleanup. - * - * The value is predefined as 1 hour (3600 seconds). - */ - public static final Duration DEFAULT_OLDER_THAN = Duration.ofSeconds(3600); - - @lombok.Builder.Default - @lombok.NonNull - private Duration olderThan = DEFAULT_OLDER_THAN; -} diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/request/package-info.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/request/package-info.java deleted file mode 100644 index 66ba63c..0000000 --- a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/request/package-info.java +++ /dev/null @@ -1,4 +0,0 @@ -@NullMarked -package ai.docling.serve.api.clear.request; - -import org.jspecify.annotations.NullMarked; diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/request/TaskResultRequest.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/request/TaskResultRequest.java deleted file mode 100644 index 3b62f12..0000000 --- a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/request/TaskResultRequest.java +++ /dev/null @@ -1,9 +0,0 @@ -package ai.docling.serve.api.task.request; - -@lombok.Builder(toBuilder = true) -@lombok.Getter -@lombok.ToString -public class TaskResultRequest { - @lombok.NonNull - private String taskId; -} diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/request/TaskStatusPollRequest.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/request/TaskStatusPollRequest.java deleted file mode 100644 index 54bef7c..0000000 --- a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/request/TaskStatusPollRequest.java +++ /dev/null @@ -1,46 +0,0 @@ -package ai.docling.serve.api.task.request; - -import java.time.Duration; - -/** - * Represents a request to poll the status of a task within the Docling Task API. - * This class encapsulates the information needed to query the status of an - * asynchronous task, including task identification and poll wait time. - * - * The {@code TaskStatusPollRequest} supports serialization and deserialization - * using Jackson, ensuring compatibility with JSON-based APIs. It also provides - * a builder for constructing instances. - * - * Key attributes include: - * - {@code taskId}: A unique identifier for the task whose status is to be polled. - * - {@code waitTime}: An optional duration indicating the wait interval between - * poll attempts. Defaults to {@link #DEFAULT_STATUS_POLL_WAIT_TIME} if not specified. - * - * This class is designed to be immutable, with all properties being final and - * accessible through getter methods. It supports features such as pretty-printing - * via {@code toString} and JSON-specific annotations for seamless integration - * with APIs consuming or producing JSON. - * - * Thread Safety: The class is thread-safe as it is immutable. - */ -@lombok.Builder(toBuilder = true) -@lombok.Getter -@lombok.ToString -public class TaskStatusPollRequest { - /** - * The default wait time between status polling attempts for a task. - *

- * This value is used when no explicit wait time is specified in a - * {@code TaskStatusPollRequest} instance. It is set to {@link Duration#ZERO}, - * meaning there is no delay by default between consecutive polling attempts. - *

- */ - public static final Duration DEFAULT_STATUS_POLL_WAIT_TIME = Duration.ZERO; - - @lombok.NonNull - private String taskId; - - @lombok.Builder.Default - @lombok.NonNull - private Duration waitTime = DEFAULT_STATUS_POLL_WAIT_TIME; -} diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/request/package-info.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/request/package-info.java deleted file mode 100644 index 701fb4c..0000000 --- a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/request/package-info.java +++ /dev/null @@ -1,7 +0,0 @@ -/** - * The Docling task api - */ -@NullMarked -package ai.docling.serve.api.task.request; - -import org.jspecify.annotations.NullMarked; diff --git a/docling-serve/docling-serve-api/src/main/java/module-info.java b/docling-serve/docling-serve-api/src/main/java/module-info.java index eb58960..c58f773 100644 --- a/docling-serve/docling-serve-api/src/main/java/module-info.java +++ b/docling-serve/docling-serve-api/src/main/java/module-info.java @@ -27,11 +27,9 @@ exports ai.docling.serve.api.convert.response; // Clear API - exports ai.docling.serve.api.clear.request; exports ai.docling.serve.api.clear.response; // Task API - exports ai.docling.serve.api.task.request; exports ai.docling.serve.api.task.response; // Serialization helpers diff --git a/docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/clear/request/ClearResultsRequestTests.java b/docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/clear/request/ClearResultsRequestTests.java deleted file mode 100644 index d2acd27..0000000 --- a/docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/clear/request/ClearResultsRequestTests.java +++ /dev/null @@ -1,22 +0,0 @@ -package ai.docling.serve.api.clear.request; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; - -import org.junit.jupiter.api.Test; - -class ClearResultsRequestTests { - @Test - void whenOlderThanIsNullThenThrow() { - assertThatThrownBy(() -> ClearResultsRequest.builder().olderThan(null).build()) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("olderThan is marked non-null but is null"); - } - - @Test - void defaultOlderThan() { - assertThat(ClearResultsRequest.builder().build().getOlderThan()) - .isNotNull() - .isEqualByComparingTo(ClearResultsRequest.DEFAULT_OLDER_THAN); - } -} diff --git a/docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/task/request/TaskResultRequestTests.java b/docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/task/request/TaskResultRequestTests.java deleted file mode 100644 index 92bfb58..0000000 --- a/docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/task/request/TaskResultRequestTests.java +++ /dev/null @@ -1,14 +0,0 @@ -package ai.docling.serve.api.task.request; - -import static org.assertj.core.api.Assertions.assertThatThrownBy; - -import org.junit.jupiter.api.Test; - -class TaskResultRequestTests { - @Test - void whenTaskIdIsNullThenThrow() { - assertThatThrownBy(() -> TaskResultRequest.builder().build()) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("taskId is marked non-null but is null"); - } -} diff --git a/docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/task/request/TaskStatusPollRequestTests.java b/docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/task/request/TaskStatusPollRequestTests.java deleted file mode 100644 index f4f75aa..0000000 --- a/docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/task/request/TaskStatusPollRequestTests.java +++ /dev/null @@ -1,36 +0,0 @@ -package ai.docling.serve.api.task.request; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; - -import java.time.Duration; - -import org.junit.jupiter.api.Test; - -class TaskStatusPollRequestTests { - @Test - void whenTaskIdIsNullThenThrow() { - assertThatThrownBy(() -> TaskStatusPollRequest.builder().waitTime(Duration.ofMinutes(1)).build()) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("taskId is marked non-null but is null"); - } - - @Test - void whenWaitTimeIsNullThenThrow() { - assertThatThrownBy(() -> TaskStatusPollRequest.builder().taskId("1234").waitTime(null).build()) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("waitTime is marked non-null but is null"); - } - - @Test - void defaults() { - var request = TaskStatusPollRequest.builder().taskId("1234").build(); - - assertThat(request.getWaitTime()) - .isNotNull() - .isEqualByComparingTo(TaskStatusPollRequest.DEFAULT_STATUS_POLL_WAIT_TIME); - - assertThat(request.getTaskId()) - .isEqualTo("1234"); - } -} diff --git a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ClearOperations.java b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ClearOperations.java index 3c2e5c4..43b2dde 100644 --- a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ClearOperations.java +++ b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ClearOperations.java @@ -1,7 +1,11 @@ package ai.docling.serve.client; +import java.time.Duration; +import java.util.Optional; + +import org.jspecify.annotations.Nullable; + import ai.docling.serve.api.DoclingServeClearApi; -import ai.docling.serve.api.clear.request.ClearResultsRequest; import ai.docling.serve.api.clear.response.ClearResponse; /** @@ -23,12 +27,17 @@ public ClearResponse clearConverters() { } /** - * Clears previously stored results based on the criteria provided in the request. + * Clears the results stored by the service that are older than the specified duration. * - * @param request the {@link ClearResultsRequest} containing the criteria for clearing results. - * @return a {@link ClearResponse} indicating the outcome of the clear operation. + * @param olderThen the {@link Duration} indicating the age threshold. Results older than + * this duration will be cleared. + * @return a {@link ClearResponse} containing information about the outcome of the clear operation. */ - public ClearResponse clearResults(ClearResultsRequest request) { - return this.httpOperations.executeGet("/v1/clear/results?older_then=%d".formatted(request.getOlderThan().toSeconds()), ClearResponse.class); + public ClearResponse clearResults(@Nullable Duration olderThen) { + var olderThenSeconds = Optional.ofNullable(olderThen) + .orElse(DEFAULT_OLDER_THAN) + .toSeconds(); + + return this.httpOperations.executeGet("/v1/clear/results?older_then=%d".formatted(olderThenSeconds), ClearResponse.class); } } diff --git a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeClient.java b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeClient.java index 929bac3..5a9bd7e 100644 --- a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeClient.java +++ b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeClient.java @@ -12,10 +12,12 @@ import java.net.http.HttpResponse; import java.net.http.HttpResponse.BodyHandlers; import java.nio.ByteBuffer; +import java.time.Duration; import java.util.Objects; import java.util.Optional; import java.util.concurrent.Flow.Subscriber; +import org.jspecify.annotations.Nullable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -28,13 +30,10 @@ import ai.docling.serve.api.chunk.request.HierarchicalChunkDocumentRequest; import ai.docling.serve.api.chunk.request.HybridChunkDocumentRequest; import ai.docling.serve.api.chunk.response.ChunkDocumentResponse; -import ai.docling.serve.api.clear.request.ClearResultsRequest; import ai.docling.serve.api.clear.response.ClearResponse; import ai.docling.serve.api.convert.request.ConvertDocumentRequest; import ai.docling.serve.api.convert.response.ConvertDocumentResponse; import ai.docling.serve.api.health.HealthCheckResponse; -import ai.docling.serve.api.task.request.TaskResultRequest; -import ai.docling.serve.api.task.request.TaskStatusPollRequest; import ai.docling.serve.api.task.response.TaskStatusPollResponse; /** @@ -222,18 +221,18 @@ public ChunkDocumentResponse chunkSourceWithHybridChunker(HybridChunkDocumentReq } @Override - public TaskStatusPollResponse pollTaskStatus(TaskStatusPollRequest request) { - return this.taskOps.pollTaskStatus(request); + public TaskStatusPollResponse pollTaskStatus(String taskId, @Nullable Duration waitTime) { + return this.taskOps.pollTaskStatus(taskId, waitTime); } @Override - public ConvertDocumentResponse convertTaskResult(TaskResultRequest request) { - return this.taskOps.convertTaskResult(request); + public ConvertDocumentResponse convertTaskResult(String taskId) { + return this.taskOps.convertTaskResult(taskId); } @Override - public ChunkDocumentResponse chunkTaskResult(TaskResultRequest request) { - return this.taskOps.chunkTaskResult(request); + public ChunkDocumentResponse chunkTaskResult(String taskId) { + return this.taskOps.chunkTaskResult(taskId); } @Override @@ -242,8 +241,8 @@ public ClearResponse clearConverters() { } @Override - public ClearResponse clearResults(ClearResultsRequest request) { - return this.clearOps.clearResults(request); + public ClearResponse clearResults(Duration olderThen) { + return this.clearOps.clearResults(olderThen); } private class LoggingBodyPublisher implements BodyPublisher { diff --git a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/TaskOperations.java b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/TaskOperations.java index 13bcdc0..fc39420 100644 --- a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/TaskOperations.java +++ b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/TaskOperations.java @@ -1,11 +1,15 @@ package ai.docling.serve.client; +import java.time.Duration; +import java.util.Optional; + +import org.jspecify.annotations.Nullable; + import ai.docling.serve.api.DoclingServeTaskApi; import ai.docling.serve.api.chunk.response.ChunkDocumentResponse; import ai.docling.serve.api.convert.response.ConvertDocumentResponse; -import ai.docling.serve.api.task.request.TaskResultRequest; -import ai.docling.serve.api.task.request.TaskStatusPollRequest; import ai.docling.serve.api.task.response.TaskStatusPollResponse; +import ai.docling.serve.api.util.ValidationUtils; /** * Base class for task API operations. Provides operations for managing and querying @@ -19,33 +23,63 @@ final class TaskOperations implements DoclingServeTaskApi { } /** - * Polls the status of a task using the provided request object. + * Polls the current status of a specified task. * - * @param request the {@link TaskStatusPollRequest} containing the details for polling. - * @return a {@link TaskStatusPollResponse} containing the task's current status. + * This method sends a request to the server to retrieve the current status + * of the task identified by the given {@code taskId}. Optionally, a {@code waitTime} + * can be specified to indicate how long the server should wait for a status change + * before responding. + * + * @param taskId the unique identifier of the task whose status is being polled. + * Must not be blank or null. + * @param waitTime an optional {@link Duration} specifying how long the server should + * wait for a status change before responding. If null, no wait is applied. + * @return a {@link TaskStatusPollResponse} containing the current status details of the task. + * @throws IllegalArgumentException if {@code taskId} is blank or null. */ - public TaskStatusPollResponse pollTaskStatus(TaskStatusPollRequest request) { - return this.httpOperations.executeGet("/v1/status/poll/%s?wait=%d".formatted(request.getTaskId(), request.getWaitTime().toSeconds()), TaskStatusPollResponse.class); + public TaskStatusPollResponse pollTaskStatus(String taskId, @Nullable Duration waitTime) { + ValidationUtils.ensureNotBlank(taskId, "taskId"); + + var waitTimeSeconds = Optional.ofNullable(waitTime) + .orElse(DEFAULT_STATUS_POLL_WAIT_TIME) + .toSeconds(); + + return this.httpOperations.executeGet("/v1/status/poll/%s?wait=%d".formatted(taskId, waitTimeSeconds), TaskStatusPollResponse.class); } /** - * Converts the task result from a completed process into a document conversion response. + * Retrieves the result of a completed task identified by the specified task ID. + * + * This method sends a GET request to the server to fetch the result of the task. + * The returned response includes details about the converted document and any + * potential errors or processing metadata associated with the task. * - * @param request the {@link TaskResultRequest} containing the task identifier. - * @return a {@link ConvertDocumentResponse} containing the details of the converted document. + * @param taskId the unique identifier of the task whose result is being fetched. + * Must not be blank or null. + * @return a {@link ConvertDocumentResponse} containing the details of the converted document, + * processing time, status, and any errors encountered during processing. + * @throws IllegalArgumentException if {@code taskId} is blank or null. */ - public ConvertDocumentResponse convertTaskResult(TaskResultRequest request) { - return this.httpOperations.executeGet("/v1/result/%s".formatted(request.getTaskId()), ConvertDocumentResponse.class); + public ConvertDocumentResponse convertTaskResult(String taskId) { + ValidationUtils.ensureNotBlank(taskId, "taskId"); + return this.httpOperations.executeGet("/v1/result/%s".formatted(taskId), ConvertDocumentResponse.class); } /** - * Processes the result of a completed task request by transforming the task data into - * a chunked document response. + * Retrieves the result of a completed task in chunks, identified by the specified task ID. + * + * This method sends a GET request to fetch the result of the task, providing the output + * in a chunked format. The response includes details about the chunks, related documents, + * processing time, and other metadata related to task completion. * - * @param request the {@link TaskResultRequest} containing the unique task identifier. - * @return a {@link ChunkDocumentResponse} containing the details of the chunked document. + * @param taskId the unique identifier of the task whose chunked result is being fetched. + * Must not be blank or null. + * @return a {@link ChunkDocumentResponse} containing information about the chunks, + * related documents, processing time, and any additional task metadata. + * @throws IllegalArgumentException if {@code taskId} is blank or null. */ - public ChunkDocumentResponse chunkTaskResult(TaskResultRequest request) { - return this.httpOperations.executeGet("/v1/result/%s".formatted(request.getTaskId()), ChunkDocumentResponse.class); + public ChunkDocumentResponse chunkTaskResult(String taskId) { + ValidationUtils.ensureNotBlank(taskId, "taskId"); + return this.httpOperations.executeGet("/v1/result/%s".formatted(taskId), ChunkDocumentResponse.class); } } diff --git a/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/AbstractDoclingServeClientTests.java b/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/AbstractDoclingServeClientTests.java index c71054d..40ac119 100644 --- a/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/AbstractDoclingServeClientTests.java +++ b/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/AbstractDoclingServeClientTests.java @@ -43,7 +43,6 @@ import ai.docling.serve.api.chunk.request.options.HybridChunkerOptions; import ai.docling.serve.api.chunk.response.Chunk; import ai.docling.serve.api.chunk.response.ChunkDocumentResponse; -import ai.docling.serve.api.clear.request.ClearResultsRequest; import ai.docling.serve.api.clear.response.ClearResponse; import ai.docling.serve.api.convert.request.ConvertDocumentRequest; import ai.docling.serve.api.convert.request.options.ConvertDocumentOptions; @@ -53,8 +52,6 @@ import ai.docling.serve.api.convert.request.source.HttpSource; import ai.docling.serve.api.convert.response.ConvertDocumentResponse; import ai.docling.serve.api.health.HealthCheckResponse; -import ai.docling.serve.api.task.request.TaskResultRequest; -import ai.docling.serve.api.task.request.TaskStatusPollRequest; import ai.docling.serve.api.task.response.TaskStatus; import ai.docling.serve.api.task.response.TaskStatusPollResponse; import ai.docling.testcontainers.serve.DoclingServeContainer; @@ -118,8 +115,7 @@ void shouldClearConvertersSuccessfully() { @Test void shouldClearResultsSuccessfully() { - var request = ClearResultsRequest.builder().build(); - var response = getDoclingClient().clearResults(request); + var response = getDoclingClient().clearResults(); assertThat(response) .isNotNull() @@ -132,11 +128,7 @@ void shouldClearResultsSuccessfully() { class TaskTests { @Test void pollInvalidTaskId() { - var request = TaskStatusPollRequest.builder() - .taskId("someInvalidTaskId") - .build(); - - assertThatThrownBy(() -> getDoclingClient().pollTaskStatus(request)) + assertThatThrownBy(() -> getDoclingClient().pollTaskStatus("someInvalidTaskId")) .hasRootCauseInstanceOf(DoclingServeClientException.class) .hasRootCauseMessage("An error occurred: {\"detail\":\"Task not found.\"}"); } @@ -144,11 +136,7 @@ void pollInvalidTaskId() { @Test void convertUrlTaskResult() throws IOException, InterruptedException { var pollResponse = doPollForTaskCompletion(); - var request = TaskResultRequest.builder() - .taskId(pollResponse.getTaskId()) - .build(); - - var result = getDoclingClient().convertTaskResult(request); + var result = getDoclingClient().convertTaskResult(pollResponse.getTaskId()); ConvertTests.assertConvertHttpSource(result); } @@ -159,12 +147,8 @@ void pollConvertUrlTask() throws IOException, InterruptedException { private TaskStatusPollResponse doPollForTaskCompletion() throws IOException, InterruptedException { var response = submitTask(); - var pollRequest = TaskStatusPollRequest.builder() - .taskId(response.getTaskId()) - .build(); - var doclingClient = getDoclingClient(); - var taskPollResponse = new AtomicReference<>(doclingClient.pollTaskStatus(pollRequest)); + var taskPollResponse = new AtomicReference<>(doclingClient.pollTaskStatus(response.getTaskId())); assertThat(taskPollResponse).isNotNull(); assertThat(taskPollResponse.get()) @@ -193,7 +177,7 @@ private TaskStatusPollResponse doPollForTaskCompletion() throws IOException, Int .pollInterval(Duration.ofSeconds(5)) .logging(LoggerFactory.getLogger("org.awaitility")::info) .until(() -> { - taskPollResponse.set(doclingClient.pollTaskStatus(pollRequest)); + taskPollResponse.set(doclingClient.pollTaskStatus(response.getTaskId())); return taskPollResponse.get().getTaskStatus() == TaskStatus.SUCCESS; }); } From d8610d2cefb479b3ddccbc865301e1605609c364 Mon Sep 17 00:00:00 2001 From: Eric Deandrea Date: Wed, 10 Dec 2025 14:47:05 -0500 Subject: [PATCH 4/4] feat: Implement the clear and task APIs Also enabled parallel builds for performance Fixes #188 Signed-off-by: Eric Deandrea --- .../src/main/java/ai/docling/serve/client/ChunkOperations.java | 3 +++ .../main/java/ai/docling/serve/client/ConvertOperations.java | 2 ++ 2 files changed, 5 insertions(+) diff --git a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ChunkOperations.java b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ChunkOperations.java index 180c426..06d6ff7 100644 --- a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ChunkOperations.java +++ b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ChunkOperations.java @@ -4,6 +4,7 @@ import ai.docling.serve.api.chunk.request.HierarchicalChunkDocumentRequest; import ai.docling.serve.api.chunk.request.HybridChunkDocumentRequest; import ai.docling.serve.api.chunk.response.ChunkDocumentResponse; +import ai.docling.serve.api.util.ValidationUtils; /** * Base class for document chunking API operations. Provides access to document chunking @@ -21,6 +22,7 @@ final class ChunkOperations implements DoclingServeChunkApi { * and using a hierarchical chunker for splitting the document into smaller chunks. */ public ChunkDocumentResponse chunkSourceWithHierarchicalChunker(HierarchicalChunkDocumentRequest request) { + ValidationUtils.ensureNotNull(request, "request"); return this.httpOperations.executePost("/v1/chunk/hierarchical/source", request, ChunkDocumentResponse.class); } @@ -29,6 +31,7 @@ public ChunkDocumentResponse chunkSourceWithHierarchicalChunker(HierarchicalChun * and using a hybrid chunker for splitting the document into smaller chunks. */ public ChunkDocumentResponse chunkSourceWithHybridChunker(HybridChunkDocumentRequest request) { + ValidationUtils.ensureNotNull(request, "request"); return this.httpOperations.executePost("/v1/chunk/hybrid/source", request, ChunkDocumentResponse.class); } } diff --git a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ConvertOperations.java b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ConvertOperations.java index a5fe5af..4693928 100644 --- a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ConvertOperations.java +++ b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ConvertOperations.java @@ -3,6 +3,7 @@ import ai.docling.serve.api.DoclingServeConvertApi; import ai.docling.serve.api.convert.request.ConvertDocumentRequest; import ai.docling.serve.api.convert.response.ConvertDocumentResponse; +import ai.docling.serve.api.util.ValidationUtils; /** * Base class for document conversion API operations. Provides access to document @@ -22,6 +23,7 @@ final class ConvertOperations implements DoclingServeConvertApi { * @return a {@link ConvertDocumentResponse} containing the processed document data, processing details, and any errors. */ public ConvertDocumentResponse convertSource(ConvertDocumentRequest request) { + ValidationUtils.ensureNotNull(request, "request"); return this.httpOperations.executePost("/v1/convert/source", request, ConvertDocumentResponse.class); } }