Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
This is the repository for Docling Java, a Java API for using [Docling](https://github.com/docling-project).

[![Docs](https://img.shields.io/badge/docs-live-brightgreen)](https://docling-project.github.io/docling-java/)
[![docling-core version](https://img.shields.io/maven-central/v/ai.docling/docling-core?label=docling-core
)](https://docling-project.github.io/docling-java/dev/core)
[![docling-serve-api version](https://img.shields.io/maven-central/v/ai.docling/docling-serve-api?label=docling-serve-api
)](https://docling-project.github.io/docling-java/dev/docling-serve/serve-api/)
[![docling-serve-client version](https://img.shields.io/maven-central/v/ai.docling/docling-serve-client?label=docling-serve-client)](https://docling-project.github.io/docling-java/dev/docling-serve/serve-client/)
Expand Down
9 changes: 8 additions & 1 deletion buildSrc/src/main/kotlin/docling-java-shared.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,13 @@ jacoco {
toolVersion = libs.findVersion("jacoco").get().toString()
}

tasks.withType<Test> {
tasks.withType<Test>().configureEach {
// Use JUnit Platform for unit tests.
useJUnitPlatform()

maxParallelForks = (Runtime.getRuntime().availableProcessors() / 2).coerceAtLeast(1)
forkEvery = 100

finalizedBy(tasks.named("jacocoTestReport"))

testLogging {
Expand Down Expand Up @@ -72,6 +75,10 @@ tasks.withType<Test> {
}
}

tasks.withType<JavaCompile>().configureEach {
options.isFork = true
}

tasks.withType<Javadoc> {
isFailOnError = false

Expand Down
1 change: 1 addition & 0 deletions docling-serve/docling-serve-api/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,6 @@ dependencies {
}

tasks.withType<Javadoc> {
source = sourceSets["main"].allJava
exclude("**/lombok.config")
}
Original file line number Diff line number Diff line change
@@ -1,44 +1,10 @@
package ai.docling.serve.api;

import ai.docling.serve.api.chunk.request.HierarchicalChunkDocumentRequest;
import ai.docling.serve.api.chunk.request.HybridChunkDocumentRequest;
import ai.docling.serve.api.chunk.response.ChunkDocumentResponse;
import ai.docling.serve.api.convert.request.ConvertDocumentRequest;
import ai.docling.serve.api.convert.response.ConvertDocumentResponse;
import ai.docling.serve.api.health.HealthCheckResponse;

/**
* Docling Serve API interface.
*/
public interface DoclingServeApi {

/**
* Executes a health check for the API and retrieves the health status of the service.
*
* @return a {@link HealthCheckResponse} object containing the health status of the API.
*/
HealthCheckResponse health();

/**
* Converts the provided document source(s) into a processed document based on the specified options.
*
* @param request the {@link ConvertDocumentRequest} containing the source(s), conversion options, and optional target.
* @return a {@link ConvertDocumentResponse} containing the processed document data, processing details, and any errors.
*/
ConvertDocumentResponse convertSource(ConvertDocumentRequest request);

/**
* Converts and chunks the provided document source(s) into a processed document based on the specified options
* and using a hierarchical chunker for splitting the document into smaller chunks.
*/
ChunkDocumentResponse chunkSourceWithHierarchicalChunker(HierarchicalChunkDocumentRequest request);

/**
* Converts and chunks the provided document source(s) into a processed document based on the specified options
* and using a hybrid chunker for splitting the document into smaller chunks.
*/
ChunkDocumentResponse chunkSourceWithHybridChunker(HybridChunkDocumentRequest request);

public interface DoclingServeApi
extends DoclingServeHealthApi, DoclingServeConvertApi, DoclingServeChunkApi, DoclingServeClearApi, DoclingServeTaskApi {
/**
* Creates and returns a builder instance capable of constructing a duplicate or modified
* version of the current API instance. The builder provides a customizable way to adjust
Expand Down Expand Up @@ -98,6 +64,30 @@ default B logResponses() {
*/
B logResponses(boolean logResponses);

/**
* Configures whether the API client should format JSON requests and responses in a "pretty" format.
* Pretty formatting organizes the response data to improve readability,
* typically by adding spacing and line breaks.
*
* This setting does not affect the functional content of the response but can
* assist with debugging or human-readable output for development purposes.
*
* @param prettyPrint {@code true} to enable pretty-printing of JSON requests and responses;
* {@code false} to use compact formatting.
* @return {@code this} builder instance for fluent API usage.
*/
B prettyPrint(boolean prettyPrint);

/**
* Configures the API client to format JSON requests and responses in a "pretty" format.
* Pretty formatting improves readability by including spacing and line breaks.
*
* @return {@code this} builder instance for fluent API usage.
*/
default B prettyPrint() {
return prettyPrint(true);
}

/**
* Builds and returns an instance of the specified type, representing the completed configuration
* of the builder. The returned instance is typically an implementation of the Docling API.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package ai.docling.serve.api;

import ai.docling.serve.api.chunk.request.HierarchicalChunkDocumentRequest;
import ai.docling.serve.api.chunk.request.HybridChunkDocumentRequest;
import ai.docling.serve.api.chunk.response.ChunkDocumentResponse;

/**
* Represents the Docling Serve Chunk API, providing methods for processing document sources
* by splitting them into smaller chunks using various chunking strategies. This interface
* ensures flexibility by supporting both hierarchical and hybrid chunking mechanisms.
*/
public interface DoclingServeChunkApi {
/**
* Converts and chunks the provided document source(s) into a processed document based on the specified options
* and using a hierarchical chunker for splitting the document into smaller chunks.
*/
ChunkDocumentResponse chunkSourceWithHierarchicalChunker(HierarchicalChunkDocumentRequest request);

/**
* Converts and chunks the provided document source(s) into a processed document based on the specified options
* and using a hybrid chunker for splitting the document into smaller chunks.
*/
ChunkDocumentResponse chunkSourceWithHybridChunker(HybridChunkDocumentRequest request);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package ai.docling.serve.api;

import java.time.Duration;

import org.jspecify.annotations.Nullable;

import ai.docling.serve.api.clear.response.ClearResponse;

/**
* Interface representing the Docling Serve Clear API. This API provides functionality
* for managing and cleaning up converters and stale data retained by the service.
* It includes methods for clearing registered converters and stored results based
* on specified thresholds or default configurations.
*/
public interface DoclingServeClearApi {
/**
* Represents the default duration used as a threshold for clearing stale results
* or data in the Docling Serve Clear API. Results older than this duration
* are considered stale and may be subject to cleanup.
*
* The value is predefined as 1 hour (3600 seconds).
*/
Duration DEFAULT_OLDER_THAN = Duration.ofSeconds(3600);

/**
* Clears all registered converters associated with the API.
* This method removes any previously configured or cached converters,
* effectively resetting the converter state to an uninitialized state.
* After invoking this method, no converters will be available until new ones are added or configured.
*/
ClearResponse clearConverters();

/**
* Clears stored results that are older than the specified duration threshold.
* This method is used for housekeeping to remove stale or outdated data from the system.
*
* @param olderThen the duration threshold; only results older than this duration will be cleared.
* @return a {@link ClearResponse} object containing the status of the clear operation.
*/
ClearResponse clearResults(@Nullable Duration olderThen);

/**
* Clears stored results that are older than the default duration threshold.
* This method uses the pre-defined {@code DEFAULT_OLDER_THAN} as the threshold
* to determine which results are considered stale and should be removed.
*
* @return a {@link ClearResponse} object containing the status of the clear operation.
*/
default ClearResponse clearResults() {
return clearResults(DEFAULT_OLDER_THAN);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package ai.docling.serve.api;

import ai.docling.serve.api.convert.request.ConvertDocumentRequest;
import ai.docling.serve.api.convert.response.ConvertDocumentResponse;

/**
* Interface representing the Docling Serve Convert API.
* This API is responsible for processing and converting document source(s) into
* a structured or processed document format based on the specified conversion options.
*/
public interface DoclingServeConvertApi {
/**
* Converts the provided document source(s) into a processed document based on the specified options.
*
* @param request the {@link ConvertDocumentRequest} containing the source(s), conversion options, and optional target.
* @return a {@link ConvertDocumentResponse} containing the processed document data, processing details, and any errors.
*/
ConvertDocumentResponse convertSource(ConvertDocumentRequest request);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package ai.docling.serve.api;

import ai.docling.serve.api.health.HealthCheckResponse;

/**
* Interface for performing health checks on the Docling service API.
* This API is designed to verify and report the operational status of the service.
*/
public interface DoclingServeHealthApi {
/**
* Executes a health check for the API and retrieves the health status of the service.
*
* @return a {@link HealthCheckResponse} object containing the health status of the API.
*/
HealthCheckResponse health();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package ai.docling.serve.api;

import java.time.Duration;

import org.jspecify.annotations.Nullable;

import ai.docling.serve.api.chunk.response.ChunkDocumentResponse;
import ai.docling.serve.api.convert.response.ConvertDocumentResponse;
import ai.docling.serve.api.task.response.TaskStatusPollResponse;

/**
* Defines the interface for the Docling Task API, which provides operations for
* managing and querying the status of asynchronous tasks.
*
* This interface supports task status polling with configurable wait durations
* and a default polling mechanism. It serves as the base for specific implementations
* such as {@link DoclingServeApi}.
*/
public interface DoclingServeTaskApi {
/**
* The default wait time between status polling attempts for a task.
* <p>
* This value is used when no explicit wait time is specified in a
* {@code TaskStatusPollRequest} instance. It is set to {@link Duration#ZERO},
* meaning there is no delay by default between consecutive polling attempts.
* </p>
*/
Duration DEFAULT_STATUS_POLL_WAIT_TIME = Duration.ZERO;

/**
* Polls the status of a task asynchronously and retrieves its current state.
* Allows for configurable wait time between polling attempts.
* If the wait time is {@code }, the default wait time ({@link #DEFAULT_STATUS_POLL_WAIT_TIME}) is used.
*
* @param taskId the unique identifier of the task whose status is being polled
* @param waitTime the duration to wait before polling the status, or null to use the default polling interval
* @return a {@link TaskStatusPollResponse} containing the current status of the task and associated metadata
*/
TaskStatusPollResponse pollTaskStatus(String taskId, @Nullable Duration waitTime);

/**
* Polls the status of a task asynchronously using the default wait time.
* This convenience method delegates to {@link #pollTaskStatus(String, Duration)}
* with {@code DEFAULT_STATUS_POLL_WAIT_TIME} as the wait time.
*
* @param taskId the unique identifier of the task whose status is being polled
* @return a {@link TaskStatusPollResponse} containing the current status of the task
* and associated metadata
*/
default TaskStatusPollResponse pollTaskStatus(String taskId) {
return pollTaskStatus(taskId, DEFAULT_STATUS_POLL_WAIT_TIME);
}

/**
* Converts the completed task result identified by the provided task ID into a document response.
* This method processes the task data associated with the given ID and generates a response
* encapsulating the converted document details.
*
* @param taskId the unique identifier of the task whose result needs to be converted into a document response
* @return a {@link ConvertDocumentResponse} containing the details of the converted document, processing metadata,
* errors (if any), and other relevant information
*/
ConvertDocumentResponse convertTaskResult(String taskId);

/**
* Processes the results of a completed task identified by the given task ID and generates a
* response containing chunked document details. This method is used to break down the document
* associated with the task into manageable chunks, making it suitable for subsequent processing
* or analysis.
*
* @param taskId the unique identifier of the task whose result is to be processed and chunked into
* a {@link ChunkDocumentResponse}
* @return a {@link ChunkDocumentResponse} containing the chunked document details and related metadata
*/
ChunkDocumentResponse chunkTaskResult(String taskId);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
@NullMarked
package ai.docling.serve.api.clear;

import org.jspecify.annotations.NullMarked;
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package ai.docling.serve.api.clear.response;

import org.jspecify.annotations.Nullable;

import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;

/**
* Represents the response returned when performing a "clear" operation
* within the Docling API. This response typically communicates the status
* of the clear operation, indicating success, failure, or other relevant
* information.
*
* The response includes a status field, which may provide additional details
* about the operation's outcome. The status field is optional and may be
* null or empty if no specific status message is provided.
*
* This class is immutable and is constructed using the {@link Builder}.
*/
@JsonInclude(JsonInclude.Include.NON_EMPTY)
@tools.jackson.databind.annotation.JsonDeserialize(builder = ClearResponse.Builder.class)
@lombok.extern.jackson.Jacksonized
@lombok.Builder(toBuilder = true)
@lombok.Getter
@lombok.ToString
public class ClearResponse {
@JsonProperty("status")
@Nullable
private String status;

@tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "")
public static class Builder { }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
@NullMarked
package ai.docling.serve.api.clear.response;

import org.jspecify.annotations.NullMarked;
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
* private Duration timeout;
* }
* </pre>
* </p>
* @see Jackson2DurationSerializer
* @see Jackson3DurationSerializer
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/**
* Serialization helpers
*/
@NullMarked
package ai.docling.serve.api.serialization;

import org.jspecify.annotations.NullMarked;
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/**
* The Docling task api
*/
@NullMarked
package ai.docling.serve.api.task;

import org.jspecify.annotations.NullMarked;
Loading