diff --git a/README.md b/README.md
index bcf861a..2e4e607 100644
--- a/README.md
+++ b/README.md
@@ -3,6 +3,8 @@
This is the repository for Docling Java, a Java API for using [Docling](https://github.com/docling-project).
[](https://docling-project.github.io/docling-java/)
+[](https://docling-project.github.io/docling-java/dev/core)
[](https://docling-project.github.io/docling-java/dev/docling-serve/serve-api/)
[](https://docling-project.github.io/docling-java/dev/docling-serve/serve-client/)
diff --git a/buildSrc/src/main/kotlin/docling-java-shared.gradle.kts b/buildSrc/src/main/kotlin/docling-java-shared.gradle.kts
index 1ec6d95..868a392 100644
--- a/buildSrc/src/main/kotlin/docling-java-shared.gradle.kts
+++ b/buildSrc/src/main/kotlin/docling-java-shared.gradle.kts
@@ -41,10 +41,13 @@ jacoco {
toolVersion = libs.findVersion("jacoco").get().toString()
}
-tasks.withType {
+tasks.withType().configureEach {
// Use JUnit Platform for unit tests.
useJUnitPlatform()
+ maxParallelForks = (Runtime.getRuntime().availableProcessors() / 2).coerceAtLeast(1)
+ forkEvery = 100
+
finalizedBy(tasks.named("jacocoTestReport"))
testLogging {
@@ -72,6 +75,10 @@ tasks.withType {
}
}
+tasks.withType().configureEach {
+ options.isFork = true
+}
+
tasks.withType {
isFailOnError = false
diff --git a/docling-serve/docling-serve-api/build.gradle.kts b/docling-serve/docling-serve-api/build.gradle.kts
index 8645420..1612ae7 100644
--- a/docling-serve/docling-serve-api/build.gradle.kts
+++ b/docling-serve/docling-serve-api/build.gradle.kts
@@ -20,5 +20,6 @@ dependencies {
}
tasks.withType {
+ source = sourceSets["main"].allJava
exclude("**/lombok.config")
}
diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeApi.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeApi.java
index 7fc57fa..67d76fd 100644
--- a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeApi.java
+++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeApi.java
@@ -1,44 +1,10 @@
package ai.docling.serve.api;
-import ai.docling.serve.api.chunk.request.HierarchicalChunkDocumentRequest;
-import ai.docling.serve.api.chunk.request.HybridChunkDocumentRequest;
-import ai.docling.serve.api.chunk.response.ChunkDocumentResponse;
-import ai.docling.serve.api.convert.request.ConvertDocumentRequest;
-import ai.docling.serve.api.convert.response.ConvertDocumentResponse;
-import ai.docling.serve.api.health.HealthCheckResponse;
-
/**
* Docling Serve API interface.
*/
-public interface DoclingServeApi {
-
- /**
- * Executes a health check for the API and retrieves the health status of the service.
- *
- * @return a {@link HealthCheckResponse} object containing the health status of the API.
- */
- HealthCheckResponse health();
-
- /**
- * Converts the provided document source(s) into a processed document based on the specified options.
- *
- * @param request the {@link ConvertDocumentRequest} containing the source(s), conversion options, and optional target.
- * @return a {@link ConvertDocumentResponse} containing the processed document data, processing details, and any errors.
- */
- ConvertDocumentResponse convertSource(ConvertDocumentRequest request);
-
- /**
- * Converts and chunks the provided document source(s) into a processed document based on the specified options
- * and using a hierarchical chunker for splitting the document into smaller chunks.
- */
- ChunkDocumentResponse chunkSourceWithHierarchicalChunker(HierarchicalChunkDocumentRequest request);
-
- /**
- * Converts and chunks the provided document source(s) into a processed document based on the specified options
- * and using a hybrid chunker for splitting the document into smaller chunks.
- */
- ChunkDocumentResponse chunkSourceWithHybridChunker(HybridChunkDocumentRequest request);
-
+public interface DoclingServeApi
+ extends DoclingServeHealthApi, DoclingServeConvertApi, DoclingServeChunkApi, DoclingServeClearApi, DoclingServeTaskApi {
/**
* Creates and returns a builder instance capable of constructing a duplicate or modified
* version of the current API instance. The builder provides a customizable way to adjust
@@ -98,6 +64,30 @@ default B logResponses() {
*/
B logResponses(boolean logResponses);
+ /**
+ * Configures whether the API client should format JSON requests and responses in a "pretty" format.
+ * Pretty formatting organizes the response data to improve readability,
+ * typically by adding spacing and line breaks.
+ *
+ * This setting does not affect the functional content of the response but can
+ * assist with debugging or human-readable output for development purposes.
+ *
+ * @param prettyPrint {@code true} to enable pretty-printing of JSON requests and responses;
+ * {@code false} to use compact formatting.
+ * @return {@code this} builder instance for fluent API usage.
+ */
+ B prettyPrint(boolean prettyPrint);
+
+ /**
+ * Configures the API client to format JSON requests and responses in a "pretty" format.
+ * Pretty formatting improves readability by including spacing and line breaks.
+ *
+ * @return {@code this} builder instance for fluent API usage.
+ */
+ default B prettyPrint() {
+ return prettyPrint(true);
+ }
+
/**
* Builds and returns an instance of the specified type, representing the completed configuration
* of the builder. The returned instance is typically an implementation of the Docling API.
diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeChunkApi.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeChunkApi.java
new file mode 100644
index 0000000..f8b4af3
--- /dev/null
+++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeChunkApi.java
@@ -0,0 +1,24 @@
+package ai.docling.serve.api;
+
+import ai.docling.serve.api.chunk.request.HierarchicalChunkDocumentRequest;
+import ai.docling.serve.api.chunk.request.HybridChunkDocumentRequest;
+import ai.docling.serve.api.chunk.response.ChunkDocumentResponse;
+
+/**
+ * Represents the Docling Serve Chunk API, providing methods for processing document sources
+ * by splitting them into smaller chunks using various chunking strategies. This interface
+ * ensures flexibility by supporting both hierarchical and hybrid chunking mechanisms.
+ */
+public interface DoclingServeChunkApi {
+ /**
+ * Converts and chunks the provided document source(s) into a processed document based on the specified options
+ * and using a hierarchical chunker for splitting the document into smaller chunks.
+ */
+ ChunkDocumentResponse chunkSourceWithHierarchicalChunker(HierarchicalChunkDocumentRequest request);
+
+ /**
+ * Converts and chunks the provided document source(s) into a processed document based on the specified options
+ * and using a hybrid chunker for splitting the document into smaller chunks.
+ */
+ ChunkDocumentResponse chunkSourceWithHybridChunker(HybridChunkDocumentRequest request);
+}
diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeClearApi.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeClearApi.java
new file mode 100644
index 0000000..b1a85b0
--- /dev/null
+++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeClearApi.java
@@ -0,0 +1,52 @@
+package ai.docling.serve.api;
+
+import java.time.Duration;
+
+import org.jspecify.annotations.Nullable;
+
+import ai.docling.serve.api.clear.response.ClearResponse;
+
+/**
+ * Interface representing the Docling Serve Clear API. This API provides functionality
+ * for managing and cleaning up converters and stale data retained by the service.
+ * It includes methods for clearing registered converters and stored results based
+ * on specified thresholds or default configurations.
+ */
+public interface DoclingServeClearApi {
+ /**
+ * Represents the default duration used as a threshold for clearing stale results
+ * or data in the Docling Serve Clear API. Results older than this duration
+ * are considered stale and may be subject to cleanup.
+ *
+ * The value is predefined as 1 hour (3600 seconds).
+ */
+ Duration DEFAULT_OLDER_THAN = Duration.ofSeconds(3600);
+
+ /**
+ * Clears all registered converters associated with the API.
+ * This method removes any previously configured or cached converters,
+ * effectively resetting the converter state to an uninitialized state.
+ * After invoking this method, no converters will be available until new ones are added or configured.
+ */
+ ClearResponse clearConverters();
+
+ /**
+ * Clears stored results that are older than the specified duration threshold.
+ * This method is used for housekeeping to remove stale or outdated data from the system.
+ *
+ * @param olderThen the duration threshold; only results older than this duration will be cleared.
+ * @return a {@link ClearResponse} object containing the status of the clear operation.
+ */
+ ClearResponse clearResults(@Nullable Duration olderThen);
+
+ /**
+ * Clears stored results that are older than the default duration threshold.
+ * This method uses the pre-defined {@code DEFAULT_OLDER_THAN} as the threshold
+ * to determine which results are considered stale and should be removed.
+ *
+ * @return a {@link ClearResponse} object containing the status of the clear operation.
+ */
+ default ClearResponse clearResults() {
+ return clearResults(DEFAULT_OLDER_THAN);
+ }
+}
diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeConvertApi.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeConvertApi.java
new file mode 100644
index 0000000..6c28631
--- /dev/null
+++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeConvertApi.java
@@ -0,0 +1,19 @@
+package ai.docling.serve.api;
+
+import ai.docling.serve.api.convert.request.ConvertDocumentRequest;
+import ai.docling.serve.api.convert.response.ConvertDocumentResponse;
+
+/**
+ * Interface representing the Docling Serve Convert API.
+ * This API is responsible for processing and converting document source(s) into
+ * a structured or processed document format based on the specified conversion options.
+ */
+public interface DoclingServeConvertApi {
+ /**
+ * Converts the provided document source(s) into a processed document based on the specified options.
+ *
+ * @param request the {@link ConvertDocumentRequest} containing the source(s), conversion options, and optional target.
+ * @return a {@link ConvertDocumentResponse} containing the processed document data, processing details, and any errors.
+ */
+ ConvertDocumentResponse convertSource(ConvertDocumentRequest request);
+}
diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeHealthApi.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeHealthApi.java
new file mode 100644
index 0000000..e90ff96
--- /dev/null
+++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeHealthApi.java
@@ -0,0 +1,16 @@
+package ai.docling.serve.api;
+
+import ai.docling.serve.api.health.HealthCheckResponse;
+
+/**
+ * Interface for performing health checks on the Docling service API.
+ * This API is designed to verify and report the operational status of the service.
+ */
+public interface DoclingServeHealthApi {
+ /**
+ * Executes a health check for the API and retrieves the health status of the service.
+ *
+ * @return a {@link HealthCheckResponse} object containing the health status of the API.
+ */
+ HealthCheckResponse health();
+}
diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeTaskApi.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeTaskApi.java
new file mode 100644
index 0000000..da25a98
--- /dev/null
+++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/DoclingServeTaskApi.java
@@ -0,0 +1,76 @@
+package ai.docling.serve.api;
+
+import java.time.Duration;
+
+import org.jspecify.annotations.Nullable;
+
+import ai.docling.serve.api.chunk.response.ChunkDocumentResponse;
+import ai.docling.serve.api.convert.response.ConvertDocumentResponse;
+import ai.docling.serve.api.task.response.TaskStatusPollResponse;
+
+/**
+ * Defines the interface for the Docling Task API, which provides operations for
+ * managing and querying the status of asynchronous tasks.
+ *
+ * This interface supports task status polling with configurable wait durations
+ * and a default polling mechanism. It serves as the base for specific implementations
+ * such as {@link DoclingServeApi}.
+ */
+public interface DoclingServeTaskApi {
+ /**
+ * The default wait time between status polling attempts for a task.
+ *
+ * This value is used when no explicit wait time is specified in a
+ * {@code TaskStatusPollRequest} instance. It is set to {@link Duration#ZERO},
+ * meaning there is no delay by default between consecutive polling attempts.
+ *
+ */
+ Duration DEFAULT_STATUS_POLL_WAIT_TIME = Duration.ZERO;
+
+ /**
+ * Polls the status of a task asynchronously and retrieves its current state.
+ * Allows for configurable wait time between polling attempts.
+ * If the wait time is {@code }, the default wait time ({@link #DEFAULT_STATUS_POLL_WAIT_TIME}) is used.
+ *
+ * @param taskId the unique identifier of the task whose status is being polled
+ * @param waitTime the duration to wait before polling the status, or null to use the default polling interval
+ * @return a {@link TaskStatusPollResponse} containing the current status of the task and associated metadata
+ */
+ TaskStatusPollResponse pollTaskStatus(String taskId, @Nullable Duration waitTime);
+
+ /**
+ * Polls the status of a task asynchronously using the default wait time.
+ * This convenience method delegates to {@link #pollTaskStatus(String, Duration)}
+ * with {@code DEFAULT_STATUS_POLL_WAIT_TIME} as the wait time.
+ *
+ * @param taskId the unique identifier of the task whose status is being polled
+ * @return a {@link TaskStatusPollResponse} containing the current status of the task
+ * and associated metadata
+ */
+ default TaskStatusPollResponse pollTaskStatus(String taskId) {
+ return pollTaskStatus(taskId, DEFAULT_STATUS_POLL_WAIT_TIME);
+ }
+
+ /**
+ * Converts the completed task result identified by the provided task ID into a document response.
+ * This method processes the task data associated with the given ID and generates a response
+ * encapsulating the converted document details.
+ *
+ * @param taskId the unique identifier of the task whose result needs to be converted into a document response
+ * @return a {@link ConvertDocumentResponse} containing the details of the converted document, processing metadata,
+ * errors (if any), and other relevant information
+ */
+ ConvertDocumentResponse convertTaskResult(String taskId);
+
+ /**
+ * Processes the results of a completed task identified by the given task ID and generates a
+ * response containing chunked document details. This method is used to break down the document
+ * associated with the task into manageable chunks, making it suitable for subsequent processing
+ * or analysis.
+ *
+ * @param taskId the unique identifier of the task whose result is to be processed and chunked into
+ * a {@link ChunkDocumentResponse}
+ * @return a {@link ChunkDocumentResponse} containing the chunked document details and related metadata
+ */
+ ChunkDocumentResponse chunkTaskResult(String taskId);
+}
diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/package-info.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/package-info.java
new file mode 100644
index 0000000..11a1edd
--- /dev/null
+++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/package-info.java
@@ -0,0 +1,4 @@
+@NullMarked
+package ai.docling.serve.api.clear;
+
+import org.jspecify.annotations.NullMarked;
diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/response/ClearResponse.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/response/ClearResponse.java
new file mode 100644
index 0000000..8d8711c
--- /dev/null
+++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/response/ClearResponse.java
@@ -0,0 +1,33 @@
+package ai.docling.serve.api.clear.response;
+
+import org.jspecify.annotations.Nullable;
+
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * Represents the response returned when performing a "clear" operation
+ * within the Docling API. This response typically communicates the status
+ * of the clear operation, indicating success, failure, or other relevant
+ * information.
+ *
+ * The response includes a status field, which may provide additional details
+ * about the operation's outcome. The status field is optional and may be
+ * null or empty if no specific status message is provided.
+ *
+ * This class is immutable and is constructed using the {@link Builder}.
+ */
+@JsonInclude(JsonInclude.Include.NON_EMPTY)
+@tools.jackson.databind.annotation.JsonDeserialize(builder = ClearResponse.Builder.class)
+@lombok.extern.jackson.Jacksonized
+@lombok.Builder(toBuilder = true)
+@lombok.Getter
+@lombok.ToString
+public class ClearResponse {
+ @JsonProperty("status")
+ @Nullable
+ private String status;
+
+ @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "")
+ public static class Builder { }
+}
diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/response/package-info.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/response/package-info.java
new file mode 100644
index 0000000..a9cd244
--- /dev/null
+++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/clear/response/package-info.java
@@ -0,0 +1,4 @@
+@NullMarked
+package ai.docling.serve.api.clear.response;
+
+import org.jspecify.annotations.NullMarked;
diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/serialization/DurationSerializationFormat.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/serialization/DurationSerializationFormat.java
index 0c86ca0..a65c02f 100644
--- a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/serialization/DurationSerializationFormat.java
+++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/serialization/DurationSerializationFormat.java
@@ -23,7 +23,6 @@
* private Duration timeout;
* }
*
- *
* @see Jackson2DurationSerializer
* @see Jackson3DurationSerializer
*/
diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/serialization/package-info.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/serialization/package-info.java
new file mode 100644
index 0000000..4518de5
--- /dev/null
+++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/serialization/package-info.java
@@ -0,0 +1,7 @@
+/**
+ * Serialization helpers
+ */
+@NullMarked
+package ai.docling.serve.api.serialization;
+
+import org.jspecify.annotations.NullMarked;
diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/package-info.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/package-info.java
new file mode 100644
index 0000000..55746da
--- /dev/null
+++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/package-info.java
@@ -0,0 +1,7 @@
+/**
+ * The Docling task api
+ */
+@NullMarked
+package ai.docling.serve.api.task;
+
+import org.jspecify.annotations.NullMarked;
diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/TaskStatus.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/TaskStatus.java
new file mode 100644
index 0000000..b9da683
--- /dev/null
+++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/TaskStatus.java
@@ -0,0 +1,21 @@
+package ai.docling.serve.api.task.response;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * Represents the possible statuses of a Task within the system.
+ * Each status is mapped to a specific JSON property for serialization and deserialization.
+ *
+ *
+ * - {@code PENDING}: Indicates that the task has been created but has not yet started execution.
+ * - {@code STARTED}: Indicates that the task is currently in progress.
+ * - {@code SUCCESS}: Indicates that the task has completed successfully.
+ * - {@code FAILURE}: Indicates that the task has failed to complete successfully.
+ *
+ */
+public enum TaskStatus {
+ @JsonProperty("pending") PENDING,
+ @JsonProperty("started") STARTED,
+ @JsonProperty("success") SUCCESS,
+ @JsonProperty("failure") FAILURE;
+}
diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/TaskStatusMetadata.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/TaskStatusMetadata.java
new file mode 100644
index 0000000..014ae4e
--- /dev/null
+++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/TaskStatusMetadata.java
@@ -0,0 +1,47 @@
+package ai.docling.serve.api.task.response;
+
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * Represents metadata for the status of a task, providing detailed information
+ * about the task's progress and outcomes.
+ *
+ * This class is used to encapsulate the statistical data related to the execution
+ * of a task, such as the number of documents processed, succeeded, or failed. It
+ * is intended for use in tracking task processing and is serialized to JSON for
+ * communication between system components.
+ *
+ * Features:
+ * - Serialization and deserialization using Jackson annotations.
+ * - All fields are included in the JSON only if they are non-empty.
+ * - Immutable structure with a builder pattern for object creation.
+ *
+ * Fields:
+ * - {@code numDocs}: The total number of documents associated with the task.
+ * - {@code numProcessed}: The number of documents that have been processed.
+ * - {@code numSucceeded}: The number of documents that were successfully processed.
+ * - {@code numFailed}: The number of documents that failed processing.
+ */
+@JsonInclude(JsonInclude.Include.NON_EMPTY)
+@tools.jackson.databind.annotation.JsonDeserialize(builder = TaskStatusMetadata.Builder.class)
+@lombok.extern.jackson.Jacksonized
+@lombok.Builder(toBuilder = true)
+@lombok.Getter
+@lombok.ToString
+public class TaskStatusMetadata {
+ @JsonProperty("num_docs")
+ private Long numDocs;
+
+ @JsonProperty("num_processed")
+ private Long numProcessed;
+
+ @JsonProperty("num_succeeded")
+ private Long numSucceeded;
+
+ @JsonProperty("num_failed")
+ private Long numFailed;
+
+ @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "")
+ public static class Builder { }
+}
diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/TaskStatusPollResponse.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/TaskStatusPollResponse.java
new file mode 100644
index 0000000..ab853ba
--- /dev/null
+++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/TaskStatusPollResponse.java
@@ -0,0 +1,55 @@
+package ai.docling.serve.api.task.response;
+
+import org.jspecify.annotations.Nullable;
+
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * Represents the response for polling the status of a task.
+ *
+ * This class encapsulates details about a specific task's current status,
+ * including its identifier, type, position in a queue, and metadata regarding
+ * its progress and outcomes. The object is serialized and deserialized using
+ * Jackson annotations, ensuring compatibility with JSON-based APIs.
+ *
+ * Features:
+ * - Uses a builder pattern with {@code lombok.Builder} for creating immutable objects.
+ * - Serialized to JSON only if fields are non-empty.
+ * - Metadata field for detailed task status is optional.
+ *
+ * Fields:
+ * - {@code taskId}: The unique identifier for the task.
+ * - {@code taskType}: The type of task being processed (optional).
+ * - {@code taskStatus}: The current status of the task, as defined in the {@code TaskStatus} enum.
+ * - {@code taskPosition}: A numerical representation of the task's position in the processing queue.
+ * - {@code taskStatusMetadata}: Metadata providing detailed statistics and progress information (optional).
+ */
+@JsonInclude(JsonInclude.Include.NON_EMPTY)
+@tools.jackson.databind.annotation.JsonDeserialize(builder = TaskStatusPollResponse.Builder.class)
+@lombok.extern.jackson.Jacksonized
+@lombok.Builder(toBuilder = true)
+@lombok.Getter
+@lombok.ToString
+public class TaskStatusPollResponse {
+ @JsonProperty("task_id")
+ private String taskId;
+
+ @JsonProperty("task_type")
+ @Nullable
+ private String taskType;
+
+ @JsonProperty("task_status")
+ private TaskStatus taskStatus;
+
+ @JsonProperty("task_position")
+ @Nullable
+ private Long taskPosition;
+
+ @JsonProperty("task_meta")
+ @Nullable
+ private TaskStatusMetadata taskStatusMetadata;
+
+ @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "")
+ public static class Builder { }
+}
diff --git a/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/package-info.java b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/package-info.java
new file mode 100644
index 0000000..96512c7
--- /dev/null
+++ b/docling-serve/docling-serve-api/src/main/java/ai/docling/serve/api/task/response/package-info.java
@@ -0,0 +1,7 @@
+/**
+ * The Docling task api
+ */
+@NullMarked
+package ai.docling.serve.api.task.response;
+
+import org.jspecify.annotations.NullMarked;
diff --git a/docling-serve/docling-serve-api/src/main/java/module-info.java b/docling-serve/docling-serve-api/src/main/java/module-info.java
index 38a784a..c58f773 100644
--- a/docling-serve/docling-serve-api/src/main/java/module-info.java
+++ b/docling-serve/docling-serve-api/src/main/java/module-info.java
@@ -26,6 +26,12 @@
exports ai.docling.serve.api.convert.request.target;
exports ai.docling.serve.api.convert.response;
+ // Clear API
+ exports ai.docling.serve.api.clear.response;
+
+ // Task API
+ exports ai.docling.serve.api.task.response;
+
// Serialization helpers
exports ai.docling.serve.api.serialization;
}
diff --git a/docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/clear/response/ClearResponseTests.java b/docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/clear/response/ClearResponseTests.java
new file mode 100644
index 0000000..1901bd2
--- /dev/null
+++ b/docling-serve/docling-serve-api/src/test/java/ai/docling/serve/api/clear/response/ClearResponseTests.java
@@ -0,0 +1,36 @@
+package ai.docling.serve.api.clear.response;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import org.junit.jupiter.api.Test;
+
+class ClearResponseTests {
+ @Test
+ void whenValidParametersThenCreateClearResponse() {
+ String status = "healthy";
+
+ ClearResponse response = ClearResponse.builder()
+ .status(status)
+ .build();
+
+ assertThat(response.getStatus()).isEqualTo(status);
+ }
+
+ @Test
+ void whenStatusIsNullThenCreateClearResponse() {
+ ClearResponse response = ClearResponse.builder().build();
+
+ assertThat(response.getStatus()).isNull();
+ }
+
+ @Test
+ void whenEmptyStatusThenCreateClearResponse() {
+ String status = "";
+
+ ClearResponse response = ClearResponse.builder()
+ .status(status)
+ .build();
+
+ assertThat(response.getStatus()).isEqualTo(status);
+ }
+}
diff --git a/docling-serve/docling-serve-client/build.gradle.kts b/docling-serve/docling-serve-client/build.gradle.kts
index f8a3910..68fb0b9 100644
--- a/docling-serve/docling-serve-client/build.gradle.kts
+++ b/docling-serve/docling-serve-client/build.gradle.kts
@@ -20,4 +20,6 @@ dependencies {
testImplementation(platform(libs.jackson.bom))
testImplementation(libs.jackson.databind)
testImplementation(libs.jackson2.databind)
+
+ testImplementation(libs.awaitility)
}
diff --git a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ChunkOperations.java b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ChunkOperations.java
new file mode 100644
index 0000000..06d6ff7
--- /dev/null
+++ b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ChunkOperations.java
@@ -0,0 +1,37 @@
+package ai.docling.serve.client;
+
+import ai.docling.serve.api.DoclingServeChunkApi;
+import ai.docling.serve.api.chunk.request.HierarchicalChunkDocumentRequest;
+import ai.docling.serve.api.chunk.request.HybridChunkDocumentRequest;
+import ai.docling.serve.api.chunk.response.ChunkDocumentResponse;
+import ai.docling.serve.api.util.ValidationUtils;
+
+/**
+ * Base class for document chunking API operations. Provides access to document chunking
+ * functionality with both hierarchical and hybrid strategies.
+ */
+final class ChunkOperations implements DoclingServeChunkApi {
+ private final HttpOperations httpOperations;
+
+ ChunkOperations(HttpOperations httpOperations) {
+ this.httpOperations = httpOperations;
+ }
+
+ /**
+ * Converts and chunks the provided document source(s) into a processed document based on the specified options
+ * and using a hierarchical chunker for splitting the document into smaller chunks.
+ */
+ public ChunkDocumentResponse chunkSourceWithHierarchicalChunker(HierarchicalChunkDocumentRequest request) {
+ ValidationUtils.ensureNotNull(request, "request");
+ return this.httpOperations.executePost("/v1/chunk/hierarchical/source", request, ChunkDocumentResponse.class);
+ }
+
+ /**
+ * Converts and chunks the provided document source(s) into a processed document based on the specified options
+ * and using a hybrid chunker for splitting the document into smaller chunks.
+ */
+ public ChunkDocumentResponse chunkSourceWithHybridChunker(HybridChunkDocumentRequest request) {
+ ValidationUtils.ensureNotNull(request, "request");
+ return this.httpOperations.executePost("/v1/chunk/hybrid/source", request, ChunkDocumentResponse.class);
+ }
+}
diff --git a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ClearOperations.java b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ClearOperations.java
new file mode 100644
index 0000000..43b2dde
--- /dev/null
+++ b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ClearOperations.java
@@ -0,0 +1,43 @@
+package ai.docling.serve.client;
+
+import java.time.Duration;
+import java.util.Optional;
+
+import org.jspecify.annotations.Nullable;
+
+import ai.docling.serve.api.DoclingServeClearApi;
+import ai.docling.serve.api.clear.response.ClearResponse;
+
+/**
+ * Base class for clear API operations. Provides functionality for managing and cleaning up
+ * converters and stale data retained by the service.
+ */
+final class ClearOperations implements DoclingServeClearApi {
+ private final HttpOperations httpOperations;
+
+ ClearOperations(HttpOperations httpOperations) {
+ this.httpOperations = httpOperations;
+ }
+
+ /**
+ * Clears all registered converters associated with the API.
+ */
+ public ClearResponse clearConverters() {
+ return this.httpOperations.executeGet("/v1/clear/converters", ClearResponse.class);
+ }
+
+ /**
+ * Clears the results stored by the service that are older than the specified duration.
+ *
+ * @param olderThen the {@link Duration} indicating the age threshold. Results older than
+ * this duration will be cleared.
+ * @return a {@link ClearResponse} containing information about the outcome of the clear operation.
+ */
+ public ClearResponse clearResults(@Nullable Duration olderThen) {
+ var olderThenSeconds = Optional.ofNullable(olderThen)
+ .orElse(DEFAULT_OLDER_THAN)
+ .toSeconds();
+
+ return this.httpOperations.executeGet("/v1/clear/results?older_then=%d".formatted(olderThenSeconds), ClearResponse.class);
+ }
+}
diff --git a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ConvertOperations.java b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ConvertOperations.java
new file mode 100644
index 0000000..4693928
--- /dev/null
+++ b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/ConvertOperations.java
@@ -0,0 +1,29 @@
+package ai.docling.serve.client;
+
+import ai.docling.serve.api.DoclingServeConvertApi;
+import ai.docling.serve.api.convert.request.ConvertDocumentRequest;
+import ai.docling.serve.api.convert.response.ConvertDocumentResponse;
+import ai.docling.serve.api.util.ValidationUtils;
+
+/**
+ * Base class for document conversion API operations. Provides access to document
+ * conversion functionality.
+ */
+final class ConvertOperations implements DoclingServeConvertApi {
+ private final HttpOperations httpOperations;
+
+ ConvertOperations(HttpOperations httpOperations) {
+ this.httpOperations = httpOperations;
+ }
+
+ /**
+ * Converts the provided document source(s) into a processed document based on the specified options.
+ *
+ * @param request the {@link ConvertDocumentRequest} containing the source(s), conversion options, and optional target.
+ * @return a {@link ConvertDocumentResponse} containing the processed document data, processing details, and any errors.
+ */
+ public ConvertDocumentResponse convertSource(ConvertDocumentRequest request) {
+ ValidationUtils.ensureNotNull(request, "request");
+ return this.httpOperations.executePost("/v1/convert/source", request, ConvertDocumentResponse.class);
+ }
+}
diff --git a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeClient.java b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeClient.java
index 245e7d2..5a9bd7e 100644
--- a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeClient.java
+++ b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeClient.java
@@ -5,26 +5,36 @@
import java.net.URI;
import java.net.http.HttpClient;
+import java.net.http.HttpClient.Redirect;
import java.net.http.HttpRequest;
import java.net.http.HttpRequest.BodyPublisher;
import java.net.http.HttpRequest.BodyPublishers;
import java.net.http.HttpResponse;
import java.net.http.HttpResponse.BodyHandlers;
import java.nio.ByteBuffer;
+import java.time.Duration;
import java.util.Objects;
import java.util.Optional;
import java.util.concurrent.Flow.Subscriber;
+import org.jspecify.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ai.docling.serve.api.DoclingServeApi;
+import ai.docling.serve.api.DoclingServeChunkApi;
+import ai.docling.serve.api.DoclingServeClearApi;
+import ai.docling.serve.api.DoclingServeConvertApi;
+import ai.docling.serve.api.DoclingServeHealthApi;
+import ai.docling.serve.api.DoclingServeTaskApi;
import ai.docling.serve.api.chunk.request.HierarchicalChunkDocumentRequest;
import ai.docling.serve.api.chunk.request.HybridChunkDocumentRequest;
import ai.docling.serve.api.chunk.response.ChunkDocumentResponse;
+import ai.docling.serve.api.clear.response.ClearResponse;
import ai.docling.serve.api.convert.request.ConvertDocumentRequest;
import ai.docling.serve.api.convert.response.ConvertDocumentResponse;
import ai.docling.serve.api.health.HealthCheckResponse;
+import ai.docling.serve.api.task.response.TaskStatusPollResponse;
/**
* Abstract class representing a client for interacting with the Docling API.
@@ -34,10 +44,15 @@
* configurations. It provides abstract methods for JSON serialization and
* deserialization, allowing implementation-specific customization.
*
+ * The client is structured hierarchically, with separate nested implementations
+ * for each API interface ({@link DoclingServeHealthApi}, {@link DoclingServeConvertApi},
+ * {@link DoclingServeChunkApi}, {@link DoclingServeClearApi}, {@link DoclingServeTaskApi}).
+ * These implementations share common HTTP execution logic and configuration.
+ *
*
Concrete subclasses must implement {@link #readValue(String, Class)} and
* {@link #writeValueAsString(Object)} for serialization and deserialization behavior.
*/
-public abstract class DoclingServeClient implements DoclingServeApi {
+public abstract class DoclingServeClient extends HttpOperations implements DoclingServeApi {
private static final Logger LOG = LoggerFactory.getLogger(DoclingServeClient.class);
protected static final URI DEFAULT_BASE_URL = URI.create("http://localhost:5001");
@@ -45,6 +60,13 @@ public abstract class DoclingServeClient implements DoclingServeApi {
private final HttpClient httpClient;
private final boolean logRequests;
private final boolean logResponses;
+ private final boolean prettyPrintJson;
+
+ private final HealthOperations healthOps = new HealthOperations(this);
+ private final ConvertOperations convertOps = new ConvertOperations(this);
+ private final ChunkOperations chunkOps = new ChunkOperations(this);
+ private final ClearOperations clearOps = new ClearOperations(this);
+ private final TaskOperations taskOps = new TaskOperations(this);
protected DoclingServeClient(DoclingServeClientBuilder builder) {
this.baseUrl = ensureNotNull(builder.baseUrl, "baseUrl");
@@ -59,6 +81,7 @@ protected DoclingServeClient(DoclingServeClientBuilder builder) {
this.httpClient = ensureNotNull(builder.httpClientBuilder, "httpClientBuilder").build();
this.logRequests = builder.logRequests;
this.logResponses = builder.logResponses;
+ this.prettyPrintJson = builder.prettyPrintJson;
}
/**
@@ -82,6 +105,10 @@ protected DoclingServeClient(DoclingServeClientBuilder builder) {
*/
protected abstract String writeValueAsString(T value);
+ protected boolean prettyPrintJson() {
+ return this.prettyPrintJson;
+ }
+
protected void logRequest(HttpRequest request) {
if (LOG.isInfoEnabled()) {
var stringBuilder = new StringBuilder();
@@ -106,7 +133,9 @@ protected void logResponse(HttpResponse response, Optional respo
stringBuilder.append(" %s: %s\n".formatted(key, String.join(", ", values)))
);
- responseBody.ifPresent(body -> stringBuilder.append(" BODY:\n%s".formatted(body)));
+ responseBody
+ .map(body -> this.prettyPrintJson ? writeValueAsString(readValue(body, Object.class)) : body)
+ .ifPresent(body -> stringBuilder.append(" BODY:\n%s".formatted(body)));
LOG.info(stringBuilder.toString());
}
}
@@ -138,7 +167,8 @@ protected O executePost(String uri, I request, Class expectedReturnTyp
return execute(httpRequest, expectedReturnType);
}
- protected O executeGet(String uri, Class expectedReturnType) {
+ @Override
+ protected O executeGet(String uri, Class expectedReturnType) {
var httpRequest = createRequestBuilder(uri)
.GET()
.build();
@@ -159,27 +189,60 @@ protected T getResponse(HttpResponse response, Class expectedRetu
logResponse(response, Optional.ofNullable(body));
}
+ var statusCode = response.statusCode();
+
+ if (statusCode >= 400) {
+ // Handle errors
+ // The Java HTTPClient doesn't throw exceptions on error codes
+ throw new DoclingServeClientException("An error occurred: %s".formatted(body), statusCode, body);
+ }
+
return readValue(body, expectedReturnType);
}
@Override
public HealthCheckResponse health() {
- return executeGet("/health", HealthCheckResponse.class);
+ return this.healthOps.health();
}
@Override
public ConvertDocumentResponse convertSource(ConvertDocumentRequest request) {
- return executePost("/v1/convert/source", request, ConvertDocumentResponse.class);
+ return this.convertOps.convertSource(request);
}
@Override
public ChunkDocumentResponse chunkSourceWithHierarchicalChunker(HierarchicalChunkDocumentRequest request) {
- return executePost("/v1/chunk/hierarchical/source", request, ChunkDocumentResponse.class);
+ return this.chunkOps.chunkSourceWithHierarchicalChunker(request);
}
@Override
public ChunkDocumentResponse chunkSourceWithHybridChunker(HybridChunkDocumentRequest request) {
- return executePost("/v1/chunk/hybrid/source", request, ChunkDocumentResponse.class);
+ return this.chunkOps.chunkSourceWithHybridChunker(request);
+ }
+
+ @Override
+ public TaskStatusPollResponse pollTaskStatus(String taskId, @Nullable Duration waitTime) {
+ return this.taskOps.pollTaskStatus(taskId, waitTime);
+ }
+
+ @Override
+ public ConvertDocumentResponse convertTaskResult(String taskId) {
+ return this.taskOps.convertTaskResult(taskId);
+ }
+
+ @Override
+ public ChunkDocumentResponse chunkTaskResult(String taskId) {
+ return this.taskOps.chunkTaskResult(taskId);
+ }
+
+ @Override
+ public ClearResponse clearConverters() {
+ return this.clearOps.clearConverters();
+ }
+
+ @Override
+ public ClearResponse clearResults(Duration olderThen) {
+ return this.clearOps.clearResults(olderThen);
}
private class LoggingBodyPublisher implements BodyPublisher {
@@ -199,7 +262,7 @@ public long contentLength() {
@Override
public void subscribe(Subscriber super ByteBuffer> subscriber) {
if (logRequests) {
- LOG.info("→ REQUEST BODY: {}", this.stringContent);
+ LOG.info("→ REQUEST BODY: \n{}", this.stringContent);
}
this.delegate.subscribe(subscriber);
@@ -219,9 +282,10 @@ public void subscribe(Subscriber super ByteBuffer> subscriber) {
@SuppressWarnings("unchecked")
public abstract static class DoclingServeClientBuilder> implements DoclingApiBuilder {
private URI baseUrl = DEFAULT_BASE_URL;
- private HttpClient.Builder httpClientBuilder = HttpClient.newBuilder();
+ private HttpClient.Builder httpClientBuilder = HttpClient.newBuilder().followRedirects(Redirect.NORMAL);
private boolean logRequests = false;
private boolean logResponses = false;
+ private boolean prettyPrintJson = false;
/**
* Protected constructor for use by subclasses of {@link DoclingServeClientBuilder}.
@@ -300,5 +364,11 @@ public B logResponses(boolean logResponses) {
this.logResponses = logResponses;
return (B) this;
}
+
+ @Override
+ public B prettyPrint(boolean prettyPrint) {
+ this.prettyPrintJson = prettyPrint;
+ return (B) this;
+ }
}
}
diff --git a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeClientException.java b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeClientException.java
new file mode 100644
index 0000000..d20286b
--- /dev/null
+++ b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeClientException.java
@@ -0,0 +1,35 @@
+package ai.docling.serve.client;
+
+/**
+ * Exception thrown when an error occurs during a client request to the Docling Serve API.
+ * This exception encapsulates additional details about the HTTP status code and the response
+ * body returned by the server, providing more context for the error.
+ */
+public class DoclingServeClientException extends RuntimeException {
+ private final int statusCode;
+ private final String responseBody;
+
+ public DoclingServeClientException(String message, int statusCode, String responseBody) {
+ super(message);
+ this.statusCode = statusCode;
+ this.responseBody = responseBody;
+ }
+
+ /**
+ * Retrieves the HTTP status code associated with this exception.
+ *
+ * @return the HTTP status code that was returned by the server.
+ */
+ public int getStatusCode() {
+ return this.statusCode;
+ }
+
+ /**
+ * Retrieves the response body associated with this exception.
+ *
+ * @return the response body returned by the server as a string.
+ */
+ public String getResponseBody() {
+ return this.responseBody;
+ }
+}
diff --git a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeJackson2Client.java b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeJackson2Client.java
index 86b1554..5614278 100644
--- a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeJackson2Client.java
+++ b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeJackson2Client.java
@@ -55,7 +55,9 @@ protected T readValue(String json, Class valueType) {
@Override
protected String writeValueAsString(T value) {
try {
- return this.jsonMapper.writeValueAsString(value);
+ return prettyPrintJson() ?
+ this.jsonMapper.writerWithDefaultPrettyPrinter().writeValueAsString(value) :
+ this.jsonMapper.writeValueAsString(value);
} catch (JsonProcessingException e) {
throw new RuntimeException(e);
}
diff --git a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeJackson3Client.java b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeJackson3Client.java
index 918f31e..9863331 100644
--- a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeJackson3Client.java
+++ b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/DoclingServeJackson3Client.java
@@ -42,7 +42,9 @@ protected T readValue(String json, Class valueType) {
@Override
protected String writeValueAsString(T value) {
- return this.jsonMapper.writeValueAsString(value);
+ return prettyPrintJson() ?
+ this.jsonMapper.writerWithDefaultPrettyPrinter().writeValueAsString(value) :
+ this.jsonMapper.writeValueAsString(value);
}
/**
diff --git a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/HealthOperations.java b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/HealthOperations.java
new file mode 100644
index 0000000..a8bbf91
--- /dev/null
+++ b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/HealthOperations.java
@@ -0,0 +1,25 @@
+package ai.docling.serve.client;
+
+import ai.docling.serve.api.DoclingServeHealthApi;
+import ai.docling.serve.api.health.HealthCheckResponse;
+
+/**
+ * Base class for health API operations. Provides access to health check functionality
+ * of the Docling service.
+ */
+final class HealthOperations implements DoclingServeHealthApi {
+ private final HttpOperations httpOperations;
+
+ HealthOperations(HttpOperations httpOperations) {
+ this.httpOperations = httpOperations;
+ }
+
+ /**
+ * Executes a health check for the API and retrieves the health status of the service.
+ *
+ * @return a {@link HealthCheckResponse} object containing the health status of the API.
+ */
+ public HealthCheckResponse health() {
+ return this.httpOperations.executeGet("/health", HealthCheckResponse.class);
+ }
+}
diff --git a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/HttpOperations.java b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/HttpOperations.java
new file mode 100644
index 0000000..880bd20
--- /dev/null
+++ b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/HttpOperations.java
@@ -0,0 +1,31 @@
+package ai.docling.serve.client;
+
+/**
+ * Abstract base class for HTTP operations. Provides methods for executing HTTP requests such as GET and POST
+ * and deserializing responses into specified types. This class is intended to be extended by subclasses that
+ * implement these operations for specific use cases.
+ */
+public abstract class HttpOperations {
+ /**
+ * Executes an HTTP GET request to the specified URI and deserializes the response into the given type.
+ *
+ * @param the expected return type for the deserialized response.
+ * @param uri the URI to send the GET request to.
+ * @param expectedReturnType the class representing the type to which the response should be deserialized.
+ * @return an instance of the specified type containing the deserialized response data.
+ */
+ protected abstract O executeGet(String uri, Class expectedReturnType);
+
+ /**
+ * Executes an HTTP POST request to the specified URI with the given request payload and deserializes
+ * the response into the specified return type.
+ *
+ * @param the type of the request payload sent in the POST request.
+ * @param the expected return type for the deserialized response.
+ * @param uri the URI to send the POST request to.
+ * @param request the payload to be sent in the POST request.
+ * @param expectedReturnType the class representing the type to which the response should be deserialized.
+ * @return an instance of the specified type containing the deserialized response data.
+ */
+ protected abstract O executePost(String uri, I request, Class expectedReturnType);
+}
diff --git a/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/TaskOperations.java b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/TaskOperations.java
new file mode 100644
index 0000000..fc39420
--- /dev/null
+++ b/docling-serve/docling-serve-client/src/main/java/ai/docling/serve/client/TaskOperations.java
@@ -0,0 +1,85 @@
+package ai.docling.serve.client;
+
+import java.time.Duration;
+import java.util.Optional;
+
+import org.jspecify.annotations.Nullable;
+
+import ai.docling.serve.api.DoclingServeTaskApi;
+import ai.docling.serve.api.chunk.response.ChunkDocumentResponse;
+import ai.docling.serve.api.convert.response.ConvertDocumentResponse;
+import ai.docling.serve.api.task.response.TaskStatusPollResponse;
+import ai.docling.serve.api.util.ValidationUtils;
+
+/**
+ * Base class for task API operations. Provides operations for managing and querying
+ * the status of asynchronous tasks.
+ */
+final class TaskOperations implements DoclingServeTaskApi {
+ private final HttpOperations httpOperations;
+
+ TaskOperations(HttpOperations httpOperations) {
+ this.httpOperations = httpOperations;
+ }
+
+ /**
+ * Polls the current status of a specified task.
+ *
+ * This method sends a request to the server to retrieve the current status
+ * of the task identified by the given {@code taskId}. Optionally, a {@code waitTime}
+ * can be specified to indicate how long the server should wait for a status change
+ * before responding.
+ *
+ * @param taskId the unique identifier of the task whose status is being polled.
+ * Must not be blank or null.
+ * @param waitTime an optional {@link Duration} specifying how long the server should
+ * wait for a status change before responding. If null, no wait is applied.
+ * @return a {@link TaskStatusPollResponse} containing the current status details of the task.
+ * @throws IllegalArgumentException if {@code taskId} is blank or null.
+ */
+ public TaskStatusPollResponse pollTaskStatus(String taskId, @Nullable Duration waitTime) {
+ ValidationUtils.ensureNotBlank(taskId, "taskId");
+
+ var waitTimeSeconds = Optional.ofNullable(waitTime)
+ .orElse(DEFAULT_STATUS_POLL_WAIT_TIME)
+ .toSeconds();
+
+ return this.httpOperations.executeGet("/v1/status/poll/%s?wait=%d".formatted(taskId, waitTimeSeconds), TaskStatusPollResponse.class);
+ }
+
+ /**
+ * Retrieves the result of a completed task identified by the specified task ID.
+ *
+ * This method sends a GET request to the server to fetch the result of the task.
+ * The returned response includes details about the converted document and any
+ * potential errors or processing metadata associated with the task.
+ *
+ * @param taskId the unique identifier of the task whose result is being fetched.
+ * Must not be blank or null.
+ * @return a {@link ConvertDocumentResponse} containing the details of the converted document,
+ * processing time, status, and any errors encountered during processing.
+ * @throws IllegalArgumentException if {@code taskId} is blank or null.
+ */
+ public ConvertDocumentResponse convertTaskResult(String taskId) {
+ ValidationUtils.ensureNotBlank(taskId, "taskId");
+ return this.httpOperations.executeGet("/v1/result/%s".formatted(taskId), ConvertDocumentResponse.class);
+ }
+
+ /**
+ * Retrieves the result of a completed task in chunks, identified by the specified task ID.
+ *
+ * This method sends a GET request to fetch the result of the task, providing the output
+ * in a chunked format. The response includes details about the chunks, related documents,
+ * processing time, and other metadata related to task completion.
+ *
+ * @param taskId the unique identifier of the task whose chunked result is being fetched.
+ * Must not be blank or null.
+ * @return a {@link ChunkDocumentResponse} containing information about the chunks,
+ * related documents, processing time, and any additional task metadata.
+ * @throws IllegalArgumentException if {@code taskId} is blank or null.
+ */
+ public ChunkDocumentResponse chunkTaskResult(String taskId) {
+ ValidationUtils.ensureNotBlank(taskId, "taskId");
+ return this.httpOperations.executeGet("/v1/result/%s".formatted(taskId), ChunkDocumentResponse.class);
+ }
+}
diff --git a/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/AbstractDoclingServeClientTests.java b/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/AbstractDoclingServeClientTests.java
index 4192cc0..40ac119 100644
--- a/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/AbstractDoclingServeClientTests.java
+++ b/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/AbstractDoclingServeClientTests.java
@@ -1,15 +1,38 @@
package ai.docling.serve.client;
import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+import static org.awaitility.Awaitility.await;
import java.io.IOException;
import java.io.InputStream;
+import java.lang.reflect.Method;
import java.net.URI;
+import java.net.http.HttpClient;
+import java.net.http.HttpClient.Redirect;
+import java.net.http.HttpClient.Version;
+import java.net.http.HttpRequest;
+import java.net.http.HttpRequest.BodyPublisher;
+import java.net.http.HttpRequest.BodyPublishers;
+import java.net.http.HttpResponse;
+import java.net.http.HttpResponse.BodyHandlers;
+import java.nio.ByteBuffer;
import java.time.Duration;
import java.util.Base64;
import java.util.List;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.concurrent.Flow.Subscriber;
+import java.util.concurrent.atomic.AtomicReference;
+import org.jspecify.annotations.Nullable;
+import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtensionContext;
+import org.junit.jupiter.api.extension.RegisterExtension;
+import org.junit.jupiter.api.extension.TestWatcher;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import ai.docling.core.DoclingDocument;
import ai.docling.core.DoclingDocument.DocItemLabel;
@@ -20,6 +43,7 @@
import ai.docling.serve.api.chunk.request.options.HybridChunkerOptions;
import ai.docling.serve.api.chunk.response.Chunk;
import ai.docling.serve.api.chunk.response.ChunkDocumentResponse;
+import ai.docling.serve.api.clear.response.ClearResponse;
import ai.docling.serve.api.convert.request.ConvertDocumentRequest;
import ai.docling.serve.api.convert.request.options.ConvertDocumentOptions;
import ai.docling.serve.api.convert.request.options.OutputFormat;
@@ -28,178 +52,408 @@
import ai.docling.serve.api.convert.request.source.HttpSource;
import ai.docling.serve.api.convert.response.ConvertDocumentResponse;
import ai.docling.serve.api.health.HealthCheckResponse;
+import ai.docling.serve.api.task.response.TaskStatus;
+import ai.docling.serve.api.task.response.TaskStatusPollResponse;
import ai.docling.testcontainers.serve.DoclingServeContainer;
import ai.docling.testcontainers.serve.config.DoclingServeContainerConfig;
abstract class AbstractDoclingServeClientTests {
+ private static final Logger LOG = LoggerFactory.getLogger(AbstractDoclingServeClientTests.class);
+
protected static final DoclingServeContainer doclingContainer = new DoclingServeContainer(
- DoclingServeContainerConfig.builder()
- .image(DoclingServeContainerConfig.DOCLING_IMAGE)
- .enableUi(true)
- .build()
+ DoclingServeContainerConfig.builder()
+ .image(DoclingServeContainerConfig.DOCLING_IMAGE)
+ .enableUi(true)
+ .build()
);
+ // Used to output the container logs on a test failure
+ // This could be useful when debugging
+ @RegisterExtension
+ TestWatcher watcher = new TestWatcher() {
+ @Override
+ public void testFailed(ExtensionContext context, @Nullable Throwable cause) {
+ var message = """
+ Test %s.%s failed with message: %s
+ Container logs:
+ %s
+ """.formatted(
+ getClass().getName(),
+ context.getTestMethod().map(Method::getName).orElse(""),
+ Optional.ofNullable(cause).map(Throwable::getMessage).orElse(""),
+ doclingContainer.getLogs());
+
+ LOG.error(message);
+ }
+ };
+
static {
doclingContainer.start();
}
protected abstract DoclingServeApi getDoclingClient();
- @Test
- void shouldSuccessfullyCallHealthEndpoint() {
- HealthCheckResponse response = getDoclingClient().health();
+ private T readValue(String json, Class valueType) {
+ return ((DoclingServeClient) getDoclingClient()).readValue(json, valueType);
+ }
- assertThat(response)
- .isNotNull()
- .extracting(HealthCheckResponse::getStatus)
- .isEqualTo("ok");
+ private String writeValueAsString(T value) {
+ return ((DoclingServeClient) getDoclingClient()).writeValueAsString(value);
}
- @Test
- void shouldConvertHttpSourceSuccessfully() {
- ConvertDocumentRequest request = ConvertDocumentRequest.builder()
- .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build())
- .build();
+ @Nested
+ class ClearTests {
+ @Test
+ void shouldClearConvertersSuccessfully() {
+ var response = getDoclingClient().clearConverters();
- ConvertDocumentResponse response = getDoclingClient().convertSource(request);
+ assertThat(response)
+ .isNotNull()
+ .extracting(ClearResponse::getStatus)
+ .isEqualTo("ok");
+ }
- assertThat(response).isNotNull();
+ @Test
+ void shouldClearResultsSuccessfully() {
+ var response = getDoclingClient().clearResults();
- assertThat(response.getStatus()).isNotEmpty();
- assertThat(response.getDocument()).isNotNull();
- assertThat(response.getDocument().getFilename()).isNotEmpty();
+ assertThat(response)
+ .isNotNull()
+ .extracting(ClearResponse::getStatus)
+ .isEqualTo("ok");
+ }
+ }
- if (response.getProcessingTime() != null) {
- assertThat(response.getProcessingTime()).isPositive();
+ @Nested
+ class TaskTests {
+ @Test
+ void pollInvalidTaskId() {
+ assertThatThrownBy(() -> getDoclingClient().pollTaskStatus("someInvalidTaskId"))
+ .hasRootCauseInstanceOf(DoclingServeClientException.class)
+ .hasRootCauseMessage("An error occurred: {\"detail\":\"Task not found.\"}");
}
- assertThat(response.getDocument().getMarkdownContent()).isNotEmpty();
- }
+ @Test
+ void convertUrlTaskResult() throws IOException, InterruptedException {
+ var pollResponse = doPollForTaskCompletion();
+ var result = getDoclingClient().convertTaskResult(pollResponse.getTaskId());
+ ConvertTests.assertConvertHttpSource(result);
+ }
+
+ @Test
+ void pollConvertUrlTask() throws IOException, InterruptedException {
+ doPollForTaskCompletion();
+ }
+
+ private TaskStatusPollResponse doPollForTaskCompletion() throws IOException, InterruptedException {
+ var response = submitTask();
+ var doclingClient = getDoclingClient();
+ var taskPollResponse = new AtomicReference<>(doclingClient.pollTaskStatus(response.getTaskId()));
+
+ assertThat(taskPollResponse).isNotNull();
+ assertThat(taskPollResponse.get())
+ .isNotNull()
+ .extracting(
+ TaskStatusPollResponse::getTaskId,
+ TaskStatusPollResponse::getTaskStatus,
+ TaskStatusPollResponse::getTaskType
+ )
+ .allMatch(Objects::nonNull);
+
+ assertThat(taskPollResponse.get())
+ .extracting(
+ TaskStatusPollResponse::getTaskId,
+ TaskStatusPollResponse::getTaskType
+ )
+ .containsExactly(
+ response.getTaskId(),
+ response.getTaskType()
+ );
+
+ if (taskPollResponse.get().getTaskStatus() != TaskStatus.SUCCESS) {
+ await()
+ .atMost(Duration.ofMinutes(1))
+ .pollDelay(Duration.ofSeconds(5))
+ .pollInterval(Duration.ofSeconds(5))
+ .logging(LoggerFactory.getLogger("org.awaitility")::info)
+ .until(() -> {
+ taskPollResponse.set(doclingClient.pollTaskStatus(response.getTaskId()));
+ return taskPollResponse.get().getTaskStatus() == TaskStatus.SUCCESS;
+ });
+ }
+
+ assertThat(taskPollResponse.get().getTaskStatus()).isEqualTo(TaskStatus.SUCCESS);
+
+ return taskPollResponse.get();
+ }
- @Test
- void shouldConvertFileSourceSuccessfully() throws IOException {
- var fileResource = readFileFromClasspath("story.pdf");
- ConvertDocumentRequest request = ConvertDocumentRequest.builder()
- .source(FileSource.builder()
- .filename("story.pdf")
- .base64String(Base64.getEncoder().encodeToString(fileResource))
- .build()
- )
+ // @TODO The async api isn't here yet, so we have to do something on our own for now in these tests.
+ // Once https://github.com/docling-project/docling-java/issues/127 is implemented then these methods below
+ // Can be switched to use that API for making the calls
+ private TaskStatusPollResponse submitTask() throws IOException, InterruptedException {
+ var request = ConvertDocumentRequest.builder()
+ .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build())
+ .build();
- .build();
+ var httpRequest = HttpRequest.newBuilder()
+ .uri(URI.create("%s/v1/convert/source/async".formatted(doclingContainer.getApiUrl())))
+ .header("Content-Type", "application/json")
+ .header("Accept", "application/json")
+ .POST(new LoggingBodyPublisher<>(request))
+ .build();
+
+ logRequest(httpRequest);
+
+ var httpClient = HttpClient.newBuilder()
+ .followRedirects(Redirect.NORMAL)
+ .version(Version.HTTP_1_1)
+ .build();
+
+ var httpResponse = httpClient.send(httpRequest, BodyHandlers.ofString());
+ var body = httpResponse.body();
+
+ logResponse(httpResponse, Optional.ofNullable(body));
+
+ var statusCode = httpResponse.statusCode();
+
+ if (statusCode >= 400) {
+ // Handle errors
+ // The Java HTTPClient doesn't throw exceptions on error codes
+ throw new DoclingServeClientException("An error occurred: %s".formatted(body), statusCode, body);
+ }
+
+ var response = readValue(body, TaskStatusPollResponse.class);
+
+ assertThat(response)
+ .isNotNull()
+ .extracting(
+ TaskStatusPollResponse::getTaskId,
+ TaskStatusPollResponse::getTaskPosition,
+ TaskStatusPollResponse::getTaskStatus,
+ TaskStatusPollResponse::getTaskType
+ )
+ .allMatch(Objects::nonNull);
+
+ assertThat(response.getTaskType()).isEqualTo("convert");
+
+ return response;
+ }
+
+ private static void logRequest(HttpRequest request) {
+ var stringBuilder = new StringBuilder();
+ stringBuilder.append("\n→ REQUEST: %s %s\n".formatted(request.method(), request.uri()));
+ stringBuilder.append(" HEADERS:\n");
+
+ request.headers().map().forEach((key, values) ->
+ stringBuilder.append(" %s: %s\n".formatted(key, String.join(", ", values)))
+ );
+
+ LOG.info(stringBuilder.toString());
+ }
- ConvertDocumentResponse response = getDoclingClient().convertSource(request);
+ private void logResponse(HttpResponse response, Optional responseBody) {
+ var stringBuilder = new StringBuilder();
+ stringBuilder.append("\n← RESPONSE: %s\n".formatted(response.statusCode()));
+ stringBuilder.append(" HEADERS:\n");
- assertThat(response).isNotNull();
- assertThat(response.getStatus()).isNotEmpty();
- assertThat(response.getDocument()).isNotNull();
- assertThat(response.getDocument().getFilename()).isEqualTo("story.pdf");
+ response.headers().map().forEach((key, values) ->
+ stringBuilder.append(" %s: %s\n".formatted(key, String.join(", ", values)))
+ );
- if (response.getProcessingTime() != null) {
- assertThat(response.getProcessingTime()).isPositive();
+ responseBody
+ .map(body -> writeValueAsString(readValue(body, Object.class)))
+ .ifPresent(body -> stringBuilder.append(" BODY:\n%s".formatted(body)));
+ LOG.info(stringBuilder.toString());
}
- assertThat(response.getDocument().getMarkdownContent()).isNotEmpty();
+ private class LoggingBodyPublisher implements BodyPublisher {
+ private final BodyPublisher delegate;
+ private final String stringContent;
+
+ private LoggingBodyPublisher(T content) {
+ this.stringContent = writeValueAsString(content);
+ this.delegate = BodyPublishers.ofString(this.stringContent);
+ }
+
+ @Override
+ public long contentLength() {
+ return this.delegate.contentLength();
+ }
+
+ @Override
+ public void subscribe(Subscriber super ByteBuffer> subscriber) {
+ LOG.info("→ REQUEST BODY: \n{}", this.stringContent);
+ this.delegate.subscribe(subscriber);
+ }
+ }
}
- @Test
- void shouldHandleConversionWithDifferentDocumentOptions() {
- ConvertDocumentOptions options = ConvertDocumentOptions.builder()
- .doOcr(true)
- .includeImages(true)
- .tableMode(TableFormerMode.FAST)
- .documentTimeout(Duration.ofMinutes(1))
- .build();
-
- ConvertDocumentRequest request = ConvertDocumentRequest.builder()
- .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build())
- .options(options)
- .build();
-
- ConvertDocumentResponse response = getDoclingClient().convertSource(request);
-
- assertThat(response).isNotNull();
- assertThat(response.getStatus()).isNotEmpty();
- assertThat(response.getDocument()).isNotNull();
+ @Nested
+ class HealthTests {
+ @Test
+ void shouldSuccessfullyCallHealthEndpoint() {
+ HealthCheckResponse response = getDoclingClient().health();
+
+ assertThat(response)
+ .isNotNull()
+ .extracting(HealthCheckResponse::getStatus)
+ .isEqualTo("ok");
+ }
}
- @Test
- void shouldHandleResponseWithDoclingDocument() {
- ConvertDocumentOptions options = ConvertDocumentOptions.builder()
- .toFormat(OutputFormat.JSON)
- .build();
+ @Nested
+ class ConvertTests {
+ static void assertConvertHttpSource(ConvertDocumentResponse response) {
+ assertThat(response).isNotNull();
+ assertThat(response.getStatus()).isNotEmpty();
+ assertThat(response.getDocument()).isNotNull();
+ assertThat(response.getDocument().getFilename()).isNotEmpty();
- ConvertDocumentRequest request = ConvertDocumentRequest.builder()
- .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build())
- .options(options)
- .build();
+ if (response.getProcessingTime() != null) {
+ assertThat(response.getProcessingTime()).isPositive();
+ }
- ConvertDocumentResponse response = getDoclingClient().convertSource(request);
+ assertThat(response.getDocument().getMarkdownContent()).isNotEmpty();
+ }
- assertThat(response).isNotNull();
- assertThat(response.getStatus()).isNotEmpty();
- assertThat(response.getDocument()).isNotNull();
+ @Test
+ void shouldConvertHttpSourceSuccessfully() {
+ ConvertDocumentRequest request = ConvertDocumentRequest.builder()
+ .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build())
+ .build();
- DoclingDocument doclingDocument = response.getDocument().getJsonContent();
- assertThat(doclingDocument).isNotNull();
- assertThat(doclingDocument.getName()).isNotEmpty();
- assertThat(doclingDocument.getTexts().get(0).getLabel()).isEqualTo(DocItemLabel.TITLE);
- }
+ ConvertDocumentResponse response = getDoclingClient().convertSource(request);
+ assertConvertHttpSource(response);
+ }
+
+ @Test
+ void shouldConvertFileSourceSuccessfully() throws IOException {
+ var fileResource = readFileFromClasspath("story.pdf");
+ ConvertDocumentRequest request = ConvertDocumentRequest.builder()
+ .source(FileSource.builder()
+ .filename("story.pdf")
+ .base64String(Base64.getEncoder().encodeToString(fileResource))
+ .build()
+ )
+
+ .build();
+
+ ConvertDocumentResponse response = getDoclingClient().convertSource(request);
- @Test
- void shouldChunkDocumentWithHierarchicalChunker() {
- ConvertDocumentOptions options = ConvertDocumentOptions.builder()
- .toFormat(OutputFormat.JSON)
- .build();
-
- HierarchicalChunkDocumentRequest request = HierarchicalChunkDocumentRequest.builder()
- .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build())
- .options(options)
- .includeConvertedDoc(true)
- .chunkingOptions(HierarchicalChunkerOptions.builder()
- .includeRawText(true)
- .useMarkdownTables(true)
- .build())
- .build();
-
- ChunkDocumentResponse response = getDoclingClient().chunkSourceWithHierarchicalChunker(request);
-
- assertThat(response).isNotNull();
- assertThat(response.getChunks()).isNotEmpty();
- assertThat(response.getDocuments()).isNotEmpty();
- assertThat(response.getProcessingTime()).isNotNull();
-
- List chunks = response.getChunks();
- assertThat(chunks).allMatch(chunk -> !chunk.getText().isEmpty());
+ assertThat(response).isNotNull();
+ assertThat(response.getStatus()).isNotEmpty();
+ assertThat(response.getDocument()).isNotNull();
+ assertThat(response.getDocument().getFilename()).isEqualTo("story.pdf");
+
+ if (response.getProcessingTime()!=null) {
+ assertThat(response.getProcessingTime()).isPositive();
+ }
+
+ assertThat(response.getDocument().getMarkdownContent()).isNotEmpty();
+ }
+
+ @Test
+ void shouldHandleConversionWithDifferentDocumentOptions() {
+ ConvertDocumentOptions options = ConvertDocumentOptions.builder()
+ .doOcr(true)
+ .includeImages(true)
+ .tableMode(TableFormerMode.FAST)
+ .documentTimeout(Duration.ofMinutes(1))
+ .build();
+
+ ConvertDocumentRequest request = ConvertDocumentRequest.builder()
+ .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build())
+ .options(options)
+ .build();
+
+ ConvertDocumentResponse response = getDoclingClient().convertSource(request);
+
+ assertThat(response).isNotNull();
+ assertThat(response.getStatus()).isNotEmpty();
+ assertThat(response.getDocument()).isNotNull();
+ }
+
+ @Test
+ void shouldHandleResponseWithDoclingDocument() {
+ ConvertDocumentOptions options = ConvertDocumentOptions.builder()
+ .toFormat(OutputFormat.JSON)
+ .build();
+
+ ConvertDocumentRequest request = ConvertDocumentRequest.builder()
+ .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build())
+ .options(options)
+ .build();
+
+ ConvertDocumentResponse response = getDoclingClient().convertSource(request);
+
+ assertThat(response).isNotNull();
+ assertThat(response.getStatus()).isNotEmpty();
+ assertThat(response.getDocument()).isNotNull();
+
+ DoclingDocument doclingDocument = response.getDocument().getJsonContent();
+ assertThat(doclingDocument).isNotNull();
+ assertThat(doclingDocument.getName()).isNotEmpty();
+ assertThat(doclingDocument.getTexts().get(0).getLabel()).isEqualTo(DocItemLabel.TITLE);
+ }
}
- @Test
- void shouldChunkDocumentWithHybridChunker() {
- ConvertDocumentOptions options = ConvertDocumentOptions.builder()
- .toFormat(OutputFormat.JSON)
- .build();
-
- HybridChunkDocumentRequest request = HybridChunkDocumentRequest.builder()
- .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build())
- .options(options)
- .includeConvertedDoc(true)
- .chunkingOptions(HybridChunkerOptions.builder()
- .includeRawText(true)
- .useMarkdownTables(true)
- .maxTokens(10000)
- .tokenizer("sentence-transformers/all-MiniLM-L6-v2")
- .build())
- .build();
-
- ChunkDocumentResponse response = getDoclingClient().chunkSourceWithHybridChunker(request);
-
- assertThat(response).isNotNull();
- assertThat(response.getChunks()).isNotEmpty();
- assertThat(response.getDocuments()).isNotEmpty();
- assertThat(response.getProcessingTime()).isNotNull();
-
- List chunks = response.getChunks();
- assertThat(chunks).allMatch(chunk -> !chunk.getText().isEmpty());
+ @Nested
+ class ChunkTests {
+ @Test
+ void shouldChunkDocumentWithHierarchicalChunker() {
+ ConvertDocumentOptions options = ConvertDocumentOptions.builder()
+ .toFormat(OutputFormat.JSON)
+ .build();
+
+ HierarchicalChunkDocumentRequest request = HierarchicalChunkDocumentRequest.builder()
+ .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build())
+ .options(options)
+ .includeConvertedDoc(true)
+ .chunkingOptions(HierarchicalChunkerOptions.builder()
+ .includeRawText(true)
+ .useMarkdownTables(true)
+ .build())
+ .build();
+
+ ChunkDocumentResponse response = getDoclingClient().chunkSourceWithHierarchicalChunker(request);
+
+ assertThat(response).isNotNull();
+ assertThat(response.getChunks()).isNotEmpty();
+ assertThat(response.getDocuments()).isNotEmpty();
+ assertThat(response.getProcessingTime()).isNotNull();
+
+ List chunks = response.getChunks();
+ assertThat(chunks).allMatch(chunk -> !chunk.getText().isEmpty());
+ }
+
+ @Test
+ void shouldChunkDocumentWithHybridChunker() {
+ ConvertDocumentOptions options = ConvertDocumentOptions.builder()
+ .toFormat(OutputFormat.JSON)
+ .build();
+
+ HybridChunkDocumentRequest request = HybridChunkDocumentRequest.builder()
+ .source(HttpSource.builder().url(URI.create("https://docs.arconia.io/arconia-cli/latest/development/dev/")).build())
+ .options(options)
+ .includeConvertedDoc(true)
+ .chunkingOptions(HybridChunkerOptions.builder()
+ .includeRawText(true)
+ .useMarkdownTables(true)
+ .maxTokens(10000)
+ .tokenizer("sentence-transformers/all-MiniLM-L6-v2")
+ .build())
+ .build();
+
+ ChunkDocumentResponse response = getDoclingClient().chunkSourceWithHybridChunker(request);
+
+ assertThat(response).isNotNull();
+ assertThat(response.getChunks()).isNotEmpty();
+ assertThat(response.getDocuments()).isNotEmpty();
+ assertThat(response.getProcessingTime()).isNotNull();
+
+ List chunks = response.getChunks();
+ assertThat(chunks).allMatch(chunk -> !chunk.getText().isEmpty());
+ }
}
private static byte[] readFileFromClasspath(String filePath) throws IOException {
diff --git a/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/DoclingServeJackson2ClientTests.java b/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/DoclingServeJackson2ClientTests.java
index 9ad3742..a84ebaf 100644
--- a/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/DoclingServeJackson2ClientTests.java
+++ b/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/DoclingServeJackson2ClientTests.java
@@ -16,6 +16,7 @@ static void setUp() {
.baseUrl(doclingContainer.getApiUrl())
.logRequests()
.logResponses()
+ .prettyPrint()
.build();
}
diff --git a/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/DoclingServeJackson3ClientTests.java b/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/DoclingServeJackson3ClientTests.java
index be832a5..97bc9f9 100644
--- a/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/DoclingServeJackson3ClientTests.java
+++ b/docling-serve/docling-serve-client/src/test/java/ai/docling/serve/client/DoclingServeJackson3ClientTests.java
@@ -15,6 +15,7 @@ static void setUp() {
doclingClient = DoclingServeJackson3Client.builder()
.logRequests()
.logResponses()
+ .prettyPrint()
.baseUrl(doclingContainer.getApiUrl())
.build();
}
diff --git a/docling-testing/docling-version-tests/src/main/resources/application.yml b/docling-testing/docling-version-tests/src/main/resources/application.yml
index 8418a9d..818d115 100644
--- a/docling-testing/docling-version-tests/src/main/resources/application.yml
+++ b/docling-testing/docling-version-tests/src/main/resources/application.yml
@@ -1,4 +1,6 @@
quarkus:
+ http:
+ test-port: -1
rest-client:
github-container-registry:
url: https://ghcr.io
diff --git a/docs/src/doc/docs/core.md b/docs/src/doc/docs/core.md
index 2b8b896..0673f82 100644
--- a/docs/src/doc/docs/core.md
+++ b/docs/src/doc/docs/core.md
@@ -1,5 +1,7 @@
# Docling Core
+[](https://docling-project.github.io/docling-java/{{ gradle.project_version }}/core)
+
The `docling-core` module provides the core data types used by Docling for document representation. It defines the `DoclingDocument` model, which captures the structure and content of documents across various formats, along with utilities for working with these types.
The base Java version is 17. This module has no external dependencies, making it lightweight and easy to integrate into your projects. It represents the foundational building block for the other Docling Java modules.
diff --git a/docs/src/doc/docs/docling-serve/serve-api.md b/docs/src/doc/docs/docling-serve/serve-api.md
index b9fe5f5..9e2a65f 100644
--- a/docs/src/doc/docs/docling-serve/serve-api.md
+++ b/docs/src/doc/docs/docling-serve/serve-api.md
@@ -1,5 +1,7 @@
# Docling Serve API
+[](https://docling-project.github.io/docling-java/{{ gradle.project_version }}/docling-serve/serve-api)
+
The `docling-serve-api` module defines the core, framework-agnostic Java API used to communicate
with a [Docling Serve](https://github.com/docling-project/docling-serve) backend. It provides the request/response model and the main `DoclingServeApi`
interface. You can use any implementation of this interface to talk to a running
diff --git a/docs/src/doc/docs/docling-serve/serve-client.md b/docs/src/doc/docs/docling-serve/serve-client.md
index 6fea4d9..bf21455 100644
--- a/docs/src/doc/docs/docling-serve/serve-client.md
+++ b/docs/src/doc/docs/docling-serve/serve-client.md
@@ -1,5 +1,7 @@
# Docling Serve Client
+[](https://docling-project.github.io/docling-java/{{ gradle.project_version }}/docling-serve/serve-client)
+
The `docling-serve-client` module is the reference HTTP client for talking to a
[Docling Serve](https://github.com/docling-project/docling-serve) backend.
diff --git a/docs/src/doc/docs/testcontainers.md b/docs/src/doc/docs/testcontainers.md
index af0a191..f9532dc 100644
--- a/docs/src/doc/docs/testcontainers.md
+++ b/docs/src/doc/docs/testcontainers.md
@@ -1,5 +1,7 @@
# Testcontainers
+[](https://docling-project.github.io/docling-java/{{ gradle.project_version }}/testcontainers)
+
The `docling-testcontainers` module provides a ready-to-use [Testcontainers](https://testcontainers.com/) integration for running a [Docling Serve](https://github.com/docling-project/docling-serve) instance in your tests. It wraps the official container image and exposes a simple Java API so you can spin up Docling as part of your JUnit test lifecycle and exercise client code against a real server.
If you need to talk to a running server from your application code, pair this module with the reference HTTP client:
diff --git a/docs/src/doc/docs/whats-new.md b/docs/src/doc/docs/whats-new.md
index a8544e1..5c51fe6 100644
--- a/docs/src/doc/docs/whats-new.md
+++ b/docs/src/doc/docs/whats-new.md
@@ -1,9 +1,16 @@
-# What's New in Docling Java {{ gradle.project_version }}
+# What's New in Docling Java
Docling Java {{ gradle.project_version }} provides a number of new features, enhancements, and bug fixes. This page includes the highlights of the release, but you can also check out the full [release notes](https://github.com/docling-project/docling-java/releases) for more details about each new feature and bug fix.
## Docling Serve
+### {{ gradle.project_version }}
+
+* Implementation of the Docling Serve clear and task APIs in `docling-serve-api` and `docling-serve-client`.
+* Adding `pretty-print` configuration option to `DoclingServeClient` to enable pretty printing of JSON requests and responses.
+
+### 0.3.0
+
* The classes in the `docling-serve-api` module have been moved from the `ai.docling.api.serve` package to the `ai.docling.serve.api` package.
* The classes in the `docling-serve-client` module have been moved from the `ai.docling.client.serve` package to the `ai.docling.serve.client` package.
* The classes in the `docling-core` module have been moved from the `ai.docling.api.core` package to the `ai.docling.core` package.
diff --git a/gradle.properties b/gradle.properties
index b18fb65..de9400a 100644
--- a/gradle.properties
+++ b/gradle.properties
@@ -1 +1,4 @@
java.version=17
+org.gradle.parallel=true
+org.gradle.caching=true
+org.gradle.configuration-cache=true
diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml
index 5e93b46..eaf9943 100644
--- a/gradle/libs.versions.toml
+++ b/gradle/libs.versions.toml
@@ -1,5 +1,6 @@
[versions]
assertj = "3.27.6"
+awaitility = "4.3.0"
jackson2 = "2.20.1"
jackson3 = "3.0.3"
jacoco = "0.8.14"
@@ -19,6 +20,9 @@ quarkus-wiremock = "1.5.2"
# assertj
assertj-core = { group = "org.assertj", name = "assertj-core", version.ref = "assertj" }
+# awaitility
+awaitility = { group = "org.awaitility", name = "awaitility", version.ref = "awaitility" }
+
# Jackson
jackson-annotations = { group = "com.fasterxml.jackson.core", name = "jackson-annotations" }
jackson-bom = { group = "tools.jackson", name = "jackson-bom", version.ref = "jackson3" }