From 5c25c8344f24cd37ceab3ce91c83ffaf9ec75582 Mon Sep 17 00:00:00 2001 From: "fern-api[bot]" <115122769+fern-api[bot]@users.noreply.github.com> Date: Sun, 1 Mar 2026 01:18:25 +0000 Subject: [PATCH] SDK regeneration --- .fern/metadata.json | 4 +- README.md | 6 +- poetry.lock | 6 +- pyproject.toml | 2 +- reference.md | 928 ++++++++++---- src/runcaptain/__init__.py | 65 +- src/runcaptain/client.py | 34 +- src/runcaptain/collections/client.py | 104 +- src/runcaptain/collections/raw_client.py | 56 +- src/runcaptain/core/client_wrapper.py | 33 +- src/runcaptain/datasets/__init__.py | 33 - src/runcaptain/datasets/client.py | 110 +- src/runcaptain/datasets/raw_client.py | 90 +- src/runcaptain/datasets/types/__init__.py | 38 - .../get_dataset_article_request_dataset.py | 5 - .../types/search_dataset_request_dataset.py | 5 - src/runcaptain/indexing/__init__.py | 21 + src/runcaptain/indexing/client.py | 921 ++++++++++++-- src/runcaptain/indexing/raw_client.py | 1117 +++++++++++++---- src/runcaptain/indexing/types/__init__.py | 21 + ...ndex_r2directory_request_v2jurisdiction.py | 5 + ...x_r2directory_request_v2processing_type.py | 5 + .../index_r2file_request_v2jurisdiction.py | 5 + .../index_r2file_request_v2processing_type.py | 5 + .../types/index_r2request_v2jurisdiction.py | 5 + .../index_r2request_v2processing_type.py | 5 + .../index_url_request_v2processing_type.py | 5 + src/runcaptain/jobs/client.py | 20 +- src/runcaptain/jobs/raw_client.py | 4 - src/runcaptain/query/client.py | 110 +- src/runcaptain/query/raw_client.py | 102 +- src/runcaptain/types/__init__.py | 41 +- src/runcaptain/types/collection_item_v2.py | 21 +- .../types/dataset_article_response.py | 3 +- .../types/dataset_article_response_dataset.py | 5 - .../types/dataset_search_response.py | 3 +- .../types/dataset_search_response_dataset.py | 5 - src/runcaptain/types/document_item_v2.py | 2 +- .../types/job_status_response_v2job_type.py | 10 +- .../types/query_stream_complete_event.py | 31 + .../types/query_stream_error_event.py | 26 + src/runcaptain/types/query_stream_event.py | 103 ++ .../types/query_stream_text_event.py | 36 + .../types/query_stream_tool_end_event.py | 51 + .../types/query_stream_tool_start_event.py | 46 + 45 files changed, 3037 insertions(+), 1216 deletions(-) delete mode 100644 src/runcaptain/datasets/types/__init__.py delete mode 100644 src/runcaptain/datasets/types/get_dataset_article_request_dataset.py delete mode 100644 src/runcaptain/datasets/types/search_dataset_request_dataset.py create mode 100644 src/runcaptain/indexing/types/index_r2directory_request_v2jurisdiction.py create mode 100644 src/runcaptain/indexing/types/index_r2directory_request_v2processing_type.py create mode 100644 src/runcaptain/indexing/types/index_r2file_request_v2jurisdiction.py create mode 100644 src/runcaptain/indexing/types/index_r2file_request_v2processing_type.py create mode 100644 src/runcaptain/indexing/types/index_r2request_v2jurisdiction.py create mode 100644 src/runcaptain/indexing/types/index_r2request_v2processing_type.py create mode 100644 src/runcaptain/indexing/types/index_url_request_v2processing_type.py delete mode 100644 src/runcaptain/types/dataset_article_response_dataset.py delete mode 100644 src/runcaptain/types/dataset_search_response_dataset.py create mode 100644 src/runcaptain/types/query_stream_complete_event.py create mode 100644 src/runcaptain/types/query_stream_error_event.py create mode 100644 src/runcaptain/types/query_stream_event.py create mode 100644 src/runcaptain/types/query_stream_text_event.py create mode 100644 src/runcaptain/types/query_stream_tool_end_event.py create mode 100644 src/runcaptain/types/query_stream_tool_start_event.py diff --git a/.fern/metadata.json b/.fern/metadata.json index 3791815..ef6aa0d 100644 --- a/.fern/metadata.json +++ b/.fern/metadata.json @@ -1,9 +1,9 @@ { "cliVersion": "3.86.0", "generatorName": "fernapi/fern-python-sdk", - "generatorVersion": "4.59.0", + "generatorVersion": "4.59.4", "generatorConfig": { "client_class_name": "Captain" }, - "sdkVersion": "0.0.0" + "sdkVersion": "0.0.1" } \ No newline at end of file diff --git a/README.md b/README.md index 1a92543..51bbf66 100644 --- a/README.md +++ b/README.md @@ -37,8 +37,8 @@ Instantiate and use the client with the following: from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.query.collection_v2( collection_name="my_documents", @@ -46,6 +46,7 @@ client.query.collection_v2( inference=True, stream=True, rerank=True, + top_k=10, ) ``` @@ -59,8 +60,8 @@ import asyncio from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) @@ -71,6 +72,7 @@ async def main() -> None: inference=True, stream=True, rerank=True, + top_k=10, ) diff --git a/poetry.lock b/poetry.lock index 2f8666b..83fdc8b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -38,13 +38,13 @@ trio = ["trio (>=0.26.1)"] [[package]] name = "certifi" -version = "2026.1.4" +version = "2026.2.25" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.7" files = [ - {file = "certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c"}, - {file = "certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120"}, + {file = "certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa"}, + {file = "certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7"}, ] [[package]] diff --git a/pyproject.toml b/pyproject.toml index ac15859..88b6bc6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ dynamic = ["version"] [tool.poetry] name = "captain-sdk" -version = "0.0.0" +version = "0.0.1" description = "" readme = "README.md" authors = [] diff --git a/reference.md b/reference.md index a89bbf8..f5e5b0d 100644 --- a/reference.md +++ b/reference.md @@ -1,6 +1,6 @@ # Reference ## Collections -
client.collections.list_collections_v2(...) -> AsyncHttpResponse[CollectionListResponseV2] +
client.collections.list_collections_v2() -> AsyncHttpResponse[CollectionListResponseV2]
@@ -32,8 +32,8 @@ Returns an array of collection objects with collection_name, collection_id, and from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.collections.list_collections_v2() @@ -51,22 +51,6 @@ client.collections.list_collections_v2()
-**limit:** `typing.Optional[int]` — Maximum number of collections to return - -
-
- -
-
- -**offset:** `typing.Optional[int]` — Pagination offset - -
-
- -
-
- **request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
@@ -109,11 +93,12 @@ Create a new collection (idempotent). Returns 201 if created, 200 if already exi from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.collections.create_collection_v2( collection_name="my_documents", + description="A collection of research documents", ) ``` @@ -130,7 +115,7 @@ client.collections.create_collection_v2(
-**collection_name:** `str` — Name of the collection to create +**collection_name:** `str`
@@ -188,8 +173,8 @@ Delete a collection and all its indexed documents. from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.collections.delete_collection_v2( collection_name="my_documents", @@ -209,7 +194,7 @@ client.collections.delete_collection_v2(
-**collection_name:** `str` — Name of the collection to delete +**collection_name:** `str`
@@ -271,8 +256,8 @@ All files, indexed data, and vector embeddings are preserved. The collection's i from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.collections.change_collection_environment_v2( collection_name="my_documents", @@ -293,7 +278,7 @@ client.collections.change_collection_environment_v2(
-**collection_name:** `str` — Name of the collection to move +**collection_name:** `str`
@@ -351,13 +336,11 @@ List all documents in a collection with pagination support. from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.collections.list_documents_v2( collection_name="my_documents", - limit=100, - offset=0, ) ``` @@ -374,15 +357,7 @@ client.collections.list_documents_v2(
-**collection_name:** `str` — Name of the collection - -
-
- -
-
- -**limit:** `typing.Optional[int]` — Maximum number of documents to return +**collection_name:** `str`
@@ -440,11 +415,11 @@ Remove all documents from a collection while keeping the collection structure. from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.collections.wipe_collection_documents_v2( - collection_name="collection_name", + collection_name="my_documents", ) ``` @@ -461,7 +436,7 @@ client.collections.wipe_collection_documents_v2(
-**collection_name:** `str` — Name of the collection to wipe +**collection_name:** `str`
@@ -511,12 +486,12 @@ Delete a specific document from a collection. from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.collections.delete_document_v2( - collection_name="collection_name", - document_id="document_id", + collection_name="my_documents", + document_id="doc_abc123", ) ``` @@ -533,7 +508,7 @@ client.collections.delete_document_v2(
-**collection_name:** `str` — Name of the collection +**collection_name:** `str`
@@ -541,7 +516,7 @@ client.collections.delete_document_v2(
-**document_id:** `str` — ID of the document to delete +**document_id:** `str`
@@ -581,49 +556,36 @@ When `inference=false`, returns raw search results with content and metadata. ## Streaming (SSE) -When `stream: true` and `inference: true`, the JSON response includes a `request_id`. Refer to the sample implementations to best make use of streams. +When `stream: true` and `inference: true`, the response is a Server-Sent Events stream. Every `data:` field is a JSON object with a `type` discriminator. ### SSE Event Types -| Event | Format | Description | -|-------|--------|-------------| -| Text chunk | `data: \n\n` | Incremental text of the AI response. Plain text (not JSON). Newlines within text are escaped as `\n`. | -| Tool start | `event: tool_start\ndata: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"..."}}\n\n` | The AI agent is performing a knowledge base search. The `args.query` field contains the search query. | -| Tool end | `event: tool_end\ndata: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}}\n\n` | A search completed. `tool_call_id` correlates with the preceding `tool_start`. `result_summary.resultCount` indicates how many results were found. | -| Complete | `event: complete\ndata: {"type":"stream_complete"}\n\n` | Stream finished successfully. Close the connection after receiving this. | -| Error | `event: error\ndata: {"type":"stream_error","error":"..."}\n\n` | An error occurred during generation. Close the connection. | +| `type` value | Schema | Description | +|---|---|---| +| `text.delta` | `QueryStreamTextEvent` | Incremental text chunk of the AI response. | +| `tool.start` | `QueryStreamToolStartEvent` | The agent is performing a knowledge-base search. | +| `tool.end` | `QueryStreamToolEndEvent` | A tool call completed. `tool_call_id` correlates with the preceding `tool.start`. | +| `stream_complete` | `QueryStreamCompleteEvent` | Stream finished successfully. Close the connection. | +| `stream_error` | `QueryStreamErrorEvent` | An error occurred. Close the connection. | ### Example SSE Stream ``` -event: tool_start -data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"revenue projections Q4"}} +data: {"type":"tool.start","seq":1,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","args":{"query":"revenue projections Q4"}} -event: tool_end -data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}} +data: {"type":"tool.end","seq":2,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","ok":true,"result_summary":{"resultCount":12}} -data: Based on the documents -data: provided, the revenue -data: projections for Q4 show -data: a 15% increase over Q3. +data: {"type":"text.delta","seq":3,"run_id":"run_abc","data":"Based on the documents"} +data: {"type":"text.delta","seq":4,"run_id":"run_abc","data":" provided, the revenue"} +data: {"type":"text.delta","seq":5,"run_id":"run_abc","data":" projections for Q4 show"} +data: {"type":"text.delta","seq":6,"run_id":"run_abc","data":" a 15% increase over Q3."} -event: tool_start -data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_2","args":{"query":"Q3 comparison metrics"}} - -event: tool_end -data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_2","ok":true,"result_summary":{"resultCount":8}} - -data: Compared to Q3, the key -data: drivers were operational -data: efficiency gains. - -event: complete -data: {"type":"stream_complete"} +data: {"type":"stream_complete","metadata":{"totalResults":12,"totalSearches":1},"stats":{"totalTokens":150}} ``` ### Notes -- The agent may perform multiple searches per query. Each search produces a `tool_start`/`tool_end` pair. +- The agent may perform multiple searches per query. Each search produces a `tool.start` / `tool.end` pair. - Text chunks are interleaved between tool events — text arrives after the agent has gathered results from a search. - Connect with `Accept: text/event-stream` and set a generous timeout (120s+) for long responses.
@@ -643,8 +605,8 @@ data: {"type":"stream_complete"} from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.query.collection_v2( collection_name="my_documents", @@ -652,6 +614,7 @@ client.query.collection_v2( inference=True, stream=True, rerank=True, + top_k=10, ) ``` @@ -668,7 +631,7 @@ client.query.collection_v2(
-**collection_name:** `str` — Name of the collection to query +**collection_name:** `str`
@@ -684,14 +647,6 @@ client.query.collection_v2(
-**idempotency_key:** `typing.Optional[str]` — UUID for request deduplication - -
-
- -
-
- **inference:** `typing.Optional[bool]` — Enable LLM-generated answers based on the relevant sections retrieved. When false, returns raw search results.
@@ -783,16 +738,17 @@ Index all files from an S3 bucket into a collection. Returns a job_id for tracki from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_s3bucket_v2( collection_name="my_documents", - bucket_name="my-s3-bucket", + bucket_name="my-documents-bucket", aws_access_key_id="AKIAIOSFODNN7EXAMPLE", - aws_secret_access_key="your_secret_key", + aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", bucket_region="us-east-1", processing_type="advanced", + skip_existing=True, ) ``` @@ -809,7 +765,7 @@ client.indexing.index_s3bucket_v2(
-**collection_name:** `str` — Name of the collection to index into +**collection_name:** `str`
@@ -849,14 +805,6 @@ client.indexing.index_s3bucket_v2(
-**idempotency_key:** `typing.Optional[str]` — UUID for request deduplication - -
-
- -
-
- **bucket_region:** `typing.Optional[str]` — AWS region where the bucket is located
@@ -931,15 +879,15 @@ Index a single file from an S3 bucket into a collection. Returns a job_id for tr from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_s3file_v2( collection_name="my_documents", - bucket_name="my-s3-bucket", - file_uri="s3://my-s3-bucket/contracts/acme_contract.pdf", + bucket_name="my-documents-bucket", + file_uri="s3://my-documents-bucket/reports/quarterly-report-q4.pdf", aws_access_key_id="AKIAIOSFODNN7EXAMPLE", - aws_secret_access_key="your_secret_key", + aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", bucket_region="us-east-1", processing_type="advanced", ) @@ -958,7 +906,7 @@ client.indexing.index_s3file_v2(
-**collection_name:** `str` — Name of the collection to index into +**collection_name:** `str`
@@ -1064,13 +1012,13 @@ Index all files from a Google Cloud Storage bucket into a collection. Returns a from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_gcs_bucket_v2( collection_name="my_documents", - bucket_name="my-gcs-bucket", - service_account_json='{"type":"service_account","project_id":"my-project",...}', + bucket_name="my-gcs-documents", + service_account_json='{"type": "service_account", "project_id": "my-project", ...}', processing_type="advanced", ) @@ -1088,7 +1036,7 @@ client.indexing.index_gcs_bucket_v2(
-**collection_name:** `str` — Name of the collection to index into +**collection_name:** `str`
@@ -1186,14 +1134,14 @@ Index a single file from a GCS bucket into a collection. Returns a job_id for tr from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_gcs_file_v2( - collection_name="collection_name", - bucket_name="my-company-docs", - file_uri="gs://my-company-docs/contracts/acme_contract.pdf", - service_account_json='{"type":"service_account","project_id":"my-project",...}', + collection_name="my_documents", + bucket_name="my-gcs-documents", + file_uri="gs://my-gcs-documents/reports/annual-review.pdf", + service_account_json='{"type": "service_account", "project_id": "my-project", ...}', processing_type="advanced", ) @@ -1211,7 +1159,7 @@ client.indexing.index_gcs_file_v2(
-**collection_name:** `str` — Name of the collection to index into +**collection_name:** `str`
@@ -1301,15 +1249,15 @@ Index all files from a specific directory in an S3 bucket into a collection. Use from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_s3directory_v2( collection_name="my_documents", - bucket_name="my-s3-bucket", - directory_path="reports/2024/january", + bucket_name="my-documents-bucket", + directory_path="reports/2025/", aws_access_key_id="AKIAIOSFODNN7EXAMPLE", - aws_secret_access_key="your_secret_key", + aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", bucket_region="us-east-1", processing_type="advanced", ) @@ -1328,7 +1276,7 @@ client.indexing.index_s3directory_v2(
-**collection_name:** `str` — Name of the collection to index into +**collection_name:** `str`
@@ -1376,14 +1324,6 @@ client.indexing.index_s3directory_v2(
-**idempotency_key:** `typing.Optional[str]` — UUID for request deduplication - -
-
- -
-
- **bucket_region:** `typing.Optional[str]` — AWS region where the bucket is located
@@ -1458,14 +1398,14 @@ Index all files from a specific directory in a GCS bucket into a collection. Use from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_gcs_directory_v2( collection_name="my_documents", - bucket_name="my-gcs-bucket", - directory_path="reports/2024/january", - service_account_json='{"type":"service_account","project_id":"my-project",...}', + bucket_name="my-gcs-documents", + directory_path="reports/2025/", + service_account_json='{"type": "service_account", "project_id": "my-project", ...}', processing_type="advanced", ) @@ -1483,7 +1423,7 @@ client.indexing.index_gcs_directory_v2(
-**collection_name:** `str` — Name of the collection to index into +**collection_name:** `str`
@@ -1523,14 +1463,6 @@ client.indexing.index_gcs_directory_v2(
-**idempotency_key:** `typing.Optional[str]` — UUID for request deduplication - -
-
- -
-
- **max_files:** `typing.Optional[int]` — Maximum number of files to index (optional)
@@ -1597,14 +1529,14 @@ Index all files from an Azure Blob Storage container into a collection. Returns from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_azure_container_v2( collection_name="my_documents", - container_name="my-container", + container_name="my-azure-documents", account_name="mystorageaccount", - account_key="your_account_key", + account_key="base64encodedaccountkey==", processing_type="advanced", ) @@ -1622,7 +1554,7 @@ client.indexing.index_azure_container_v2(
-**collection_name:** `str` — Name of the collection to index into +**collection_name:** `str`
@@ -1662,14 +1594,6 @@ client.indexing.index_azure_container_v2(
-**idempotency_key:** `typing.Optional[str]` — UUID for request deduplication - -
-
- -
-
- **max_files:** `typing.Optional[int]` — Maximum number of files to index (optional)
@@ -1736,15 +1660,15 @@ Index a single file from an Azure Blob Storage container into a collection. Retu from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_azure_file_v2( collection_name="my_documents", - container_name="my-container", - file_uri="https://mystorageaccount.blob.core.windows.net/my-container/contracts/acme_contract.pdf", + container_name="my-azure-documents", + file_uri="https://mystorageaccount.blob.core.windows.net/my-azure-documents/reports/annual-review.pdf", account_name="mystorageaccount", - account_key="your_account_key", + account_key="base64encodedaccountkey==", processing_type="advanced", ) @@ -1762,7 +1686,7 @@ client.indexing.index_azure_file_v2(
-**collection_name:** `str` — Name of the collection to index into +**collection_name:** `str`
@@ -1860,15 +1784,15 @@ Index all files from a specific directory (prefix) in an Azure Blob Storage cont from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_azure_directory_v2( collection_name="my_documents", - container_name="my-container", - directory_path="reports/2024/january", + container_name="my-azure-documents", + directory_path="reports/2025/", account_name="mystorageaccount", - account_key="your_account_key", + account_key="base64encodedaccountkey==", processing_type="advanced", ) @@ -1886,7 +1810,7 @@ client.indexing.index_azure_directory_v2(
-**collection_name:** `str` — Name of the collection to index into +**collection_name:** `str`
@@ -1934,14 +1858,6 @@ client.indexing.index_azure_directory_v2(
-**idempotency_key:** `typing.Optional[str]` — UUID for request deduplication - -
-
- -
-
- **max_files:** `typing.Optional[int]` — Maximum number of files to index (optional)
@@ -1978,8 +1894,7 @@ client.indexing.index_azure_directory_v2(
-## Jobs -
client.jobs.get_job_status_v2(...) -> AsyncHttpResponse[JobStatusResponseV2] +
client.indexing.index_r2bucket_v2(...) -> AsyncHttpResponse[IndexJobResponseV2]
@@ -1991,32 +1906,7 @@ client.indexing.index_azure_directory_v2(
-Get the status of an indexing job with detailed progress information. - -## Status Values -- **pending**: Job created but processing hasn't started yet -- **running**: Job is actively processing files -- **completed**: Job finished successfully -- **failed**: Job encountered an error -- **cancelled**: Job was cancelled by user - -## Processing Stages -When status is `running`, the `progress.current_stage` field indicates which stage: -1. **scanning**: Scanning bucket for files -2. **extracting**: Extracting text content from documents -3. **chunking**: Splitting documents into semantic chunks -4. **tagging**: AI tagging and summarization -5. **embedding**: Generating vector embeddings -6. **finalizing**: Aggregating results and recording billing - -## File Status Values -Each file in the `files` array has a status: -- **queued**: Waiting to be processed -- **processing**: Currently being processed -- **completed**: Successfully indexed -- **failed**: Failed to process (see error_code/error_message) -- **skipped**: Skipped (already indexed, unsupported type, etc.) -- **cancelled**: Processing was cancelled +Index all files from a Cloudflare R2 bucket into a collection. R2 is S3-compatible — provide your R2 API token's Access Key ID and Secret Access Key. Returns a job_id for tracking progress via GET /v2/jobs/{job_id}.
@@ -2034,11 +1924,16 @@ Each file in the `files` array has a status: from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) -client.jobs.get_job_status_v2( - job_id="abc123xyz-1234567890", +client.indexing.index_r2bucket_v2( + collection_name="my_documents", + bucket_name="my-r2-bucket", + account_id="your_cloudflare_account_id", + access_key_id="your_r2_access_key_id", + secret_access_key="your_r2_secret_access_key", + processing_type="advanced", ) ``` @@ -2055,7 +1950,79 @@ client.jobs.get_job_status_v2(
-**job_id:** `str` — The job ID returned from an indexing request +**collection_name:** `str` + +
+
+ +
+
+ +**bucket_name:** `str` — Name of the R2 bucket + +
+
+ +
+
+ +**account_id:** `str` — Cloudflare account ID (found in your R2 dashboard URL) + +
+
+ +
+
+ +**access_key_id:** `str` — R2 S3 API token Access Key ID + +
+
+ +
+
+ +**secret_access_key:** `str` — R2 S3 API token Secret Access Key + +
+
+ +
+
+ +**processing_type:** `IndexR2RequestV2ProcessingType` — Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. + +
+
+ +
+
+ +**jurisdiction:** `typing.Optional[IndexR2RequestV2Jurisdiction]` — R2 jurisdiction. 'default' for global, 'eu' for EU-only storage, 'fedramp' for FedRAMP-compliant storage. + +
+
+ +
+
+ +**max_files:** `typing.Optional[int]` — Maximum number of files to index (optional) + +
+
+ +
+
+ +**skip_existing:** `typing.Optional[bool]` — Skip files that are already indexed in the collection. When true, only new files will be indexed. Set to false to re-index all files. + +
+
+ +
+
+ +**custom_metadata:** `typing.Optional[typing.Dict[str, typing.Any]]` — Custom metadata to attach to all indexed chunks. Keys must be strings. Values: str, int, float, bool, or array of strings.
@@ -2075,7 +2042,7 @@ client.jobs.get_job_status_v2(
-
client.jobs.cancel_job_v2(...) -> AsyncHttpResponse[JobCancelResponseV2] +
client.indexing.index_r2file_v2(...) -> AsyncHttpResponse[IndexJobResponseV2]
@@ -2087,11 +2054,7 @@ client.jobs.get_job_status_v2(
-Cancel an indexing job. - -Behavior: -- If job is pending or running -> transitions to cancelled -- If job is already completed/failed/cancelled -> returns 200 with current state (idempotent) +Index a single file from a Cloudflare R2 bucket into a collection. Returns a job_id for tracking progress.
@@ -2109,11 +2072,17 @@ Behavior: from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) -client.jobs.cancel_job_v2( - job_id="abc123xyz-1234567890", +client.indexing.index_r2file_v2( + collection_name="my_documents", + bucket_name="my-r2-bucket", + file_uri="r2://my-r2-bucket/reports/annual-review.pdf", + account_id="your_cloudflare_account_id", + access_key_id="your_r2_access_key_id", + secret_access_key="your_r2_secret_access_key", + processing_type="advanced", ) ``` @@ -2130,7 +2099,7 @@ client.jobs.cancel_job_v2(
-**job_id:** `str` — The job ID to cancel +**collection_name:** `str`
@@ -2138,42 +2107,95 @@ client.jobs.cancel_job_v2(
-**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration. +**bucket_name:** `str` — Name of the R2 bucket
+ +
+
+ +**file_uri:** `str` — R2 URI format: r2://bucket-name/path/to/file.pdf +
+
+
+**account_id:** `str` — Cloudflare account ID (found in your R2 dashboard URL) +
-
-## Datasets -
client.datasets.search_dataset(...) -> AsyncHttpResponse[DatasetSearchResponse]
-#### 📝 Description +**access_key_id:** `str` — R2 S3 API token Access Key ID + +
+
+**secret_access_key:** `str` — R2 S3 API token Secret Access Key + +
+
+
-Search for articles within a news dataset. +**processing_type:** `IndexR2FileRequestV2ProcessingType` — Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. + +
+
-Uses Google Search constrained to the dataset's domain to find relevant articles. +
+
-## Supported Datasets -- **nytimes**: New York Times (nytimes.com) -- **washpost**: Washington Post (washingtonpost.com) -- **sfstandard**: SF Standard (sfstandard.com) +**jurisdiction:** `typing.Optional[IndexR2FileRequestV2Jurisdiction]` — R2 jurisdiction. 'default' for global, 'eu' for EU-only storage, 'fedramp' for FedRAMP-compliant storage. + +
+
-## Response -Returns a list of search results with title, URL, snippet, and date. +
+
+ +**custom_metadata:** `typing.Optional[typing.Dict[str, typing.Any]]` — Custom metadata to attach to all chunks from this file. Keys must be strings. Values: str, int, float, bool, or array of strings. + +
+
+ +
+
+ +**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration. + +
+
+ + + + + + +
+ +
client.indexing.index_r2directory_v2(...) -> AsyncHttpResponse[IndexJobResponseV2] +
+
+ +#### 📝 Description + +
+
+ +
+
+ +Index all files from a specific directory (prefix) in a Cloudflare R2 bucket into a collection. Uses prefix-based filtering to index only objects within the specified path. Returns a job_id for tracking progress via GET /v2/jobs/{job_id}.
@@ -2191,12 +2213,17 @@ Returns a list of search results with title, URL, snippet, and date. from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) -client.datasets.search_dataset( - dataset="nytimes", - q="", +client.indexing.index_r2directory_v2( + collection_name="my_documents", + bucket_name="my-r2-bucket", + directory_path="reports/2025/", + account_id="your_cloudflare_account_id", + access_key_id="your_r2_access_key_id", + secret_access_key="your_r2_secret_access_key", + processing_type="advanced", ) ``` @@ -2213,7 +2240,15 @@ client.datasets.search_dataset(
-**dataset:** `SearchDatasetRequestDataset` — The news dataset to search. Supported: nytimes, washpost, sfstandard +**collection_name:** `str` + +
+
+ +
+
+ +**bucket_name:** `str` — Name of the R2 bucket
@@ -2221,7 +2256,7 @@ client.datasets.search_dataset(
-**q:** `str` — Search query +**directory_path:** `str` — Path to the directory (prefix) within the bucket. Accepts either a relative path (e.g., 'reports/2024/january') or a full R2 URI (e.g., 'r2://my-bucket/reports/2024/january'). All objects within this prefix will be indexed.
@@ -2229,7 +2264,429 @@ client.datasets.search_dataset(
-**limit:** `typing.Optional[int]` — Maximum number of results to return (default: 10, max: 100) +**account_id:** `str` — Cloudflare account ID (found in your R2 dashboard URL) + +
+
+ +
+
+ +**access_key_id:** `str` — R2 S3 API token Access Key ID + +
+
+ +
+
+ +**secret_access_key:** `str` — R2 S3 API token Secret Access Key + +
+
+ +
+
+ +**processing_type:** `IndexR2DirectoryRequestV2ProcessingType` — Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. + +
+
+ +
+
+ +**jurisdiction:** `typing.Optional[IndexR2DirectoryRequestV2Jurisdiction]` — R2 jurisdiction. 'default' for global, 'eu' for EU-only storage, 'fedramp' for FedRAMP-compliant storage. + +
+
+ +
+
+ +**max_files:** `typing.Optional[int]` — Maximum number of files to index (optional) + +
+
+ +
+
+ +**skip_existing:** `typing.Optional[bool]` — Skip files that are already indexed in the collection. When true, only new files will be indexed. Set to false to re-index all files. + +
+
+ +
+
+ +**custom_metadata:** `typing.Optional[typing.Dict[str, typing.Any]]` — Custom metadata to attach to all indexed chunks. Keys must be strings. Values: str, int, float, bool, or array of strings. + +
+
+ +
+
+ +**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration. + +
+
+
+
+ + + + +
+ +
client.indexing.index_url_v2(...) -> AsyncHttpResponse[IndexJobResponseV2] +
+
+ +#### 📝 Description + +
+
+ +
+
+ +Index documents from public URLs into a collection. No cloud storage credentials required. + +You can provide either: +- `url` — a single URL string for one document +- `urls` — an array of URL strings for multiple documents + +Supported file types include PDF, TXT, DOCX, CSV, XLSX, and more. Documents are downloaded and processed through the same pipeline as cloud storage indexing. + +Returns a job_id for tracking progress via GET /v2/jobs/{job_id}. +
+
+
+
+ +#### 🔌 Usage + +
+
+ +
+
+ +```python +from runcaptain import Captain + +client = Captain( + organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", +) +client.indexing.index_url_v2( + collection_name="my_documents", + urls=[ + "https://example.com/documents/report.pdf", + "https://example.com/documents/memo.txt", + "https://example.com/documents/data.csv", + ], + processing_type="advanced", +) + +``` +
+
+
+
+ +#### ⚙️ Parameters + +
+
+ +
+
+ +**collection_name:** `str` + +
+
+ +
+
+ +**processing_type:** `IndexUrlRequestV2ProcessingType` — Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. + +
+
+ +
+
+ +**url:** `typing.Optional[str]` — A single public URL to a hosted document (PDF, TXT, DOCX, etc.). Provide either 'url' or 'urls', not both. + +
+
+ +
+
+ +**urls:** `typing.Optional[typing.Sequence[str]]` — An array of public URLs to hosted documents. Provide either 'url' or 'urls', not both. + +
+
+ +
+
+ +**custom_metadata:** `typing.Optional[typing.Dict[str, typing.Any]]` — Custom metadata to attach to all indexed chunks. Keys must be strings. Values: str, int, float, bool, or array of strings. + +
+
+ +
+
+ +**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration. + +
+
+
+
+ + +
+
+
+ +## Jobs +
client.jobs.get_job_status_v2(...) -> AsyncHttpResponse[JobStatusResponseV2] +
+
+ +#### 📝 Description + +
+
+ +
+
+ +Get the status of an indexing job with detailed progress information. + +## Status Values +- **pending**: Job created but processing hasn't started yet +- **running**: Job is actively processing files +- **completed**: Job finished successfully +- **failed**: Job encountered an error +- **cancelled**: Job was cancelled by user + +## Processing Stages +When status is `running`, the `progress.current_stage` field indicates which stage: +1. **scanning**: Scanning bucket for files +2. **extracting**: Extracting text content from documents +3. **chunking**: Splitting documents into semantic chunks +4. **tagging**: AI tagging and summarization +5. **embedding**: Generating vector embeddings +6. **finalizing**: Aggregating results and recording billing + +## File Status Values +Each file in the `files` array has a status: +- **queued**: Waiting to be processed +- **processing**: Currently being processed +- **completed**: Successfully indexed +- **failed**: Failed to process (see error_code/error_message) +- **skipped**: Skipped (already indexed, unsupported type, etc.) +- **cancelled**: Processing was cancelled +
+
+
+
+ +#### 🔌 Usage + +
+
+ +
+
+ +```python +from runcaptain import Captain + +client = Captain( + organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", +) +client.jobs.get_job_status_v2( + job_id="job_s3_abc123", +) + +``` +
+
+
+
+ +#### ⚙️ Parameters + +
+
+ +
+
+ +**job_id:** `str` + +
+
+ +
+
+ +**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration. + +
+
+
+
+ + +
+
+
+ +
client.jobs.cancel_job_v2(...) -> AsyncHttpResponse[JobCancelResponseV2] +
+
+ +#### 📝 Description + +
+
+ +
+
+ +Cancel an indexing job. + +Behavior: +- If job is pending or running -> transitions to cancelled +- If job is already completed/failed/cancelled -> returns 200 with current state (idempotent) +
+
+
+
+ +#### 🔌 Usage + +
+
+ +
+
+ +```python +from runcaptain import Captain + +client = Captain( + organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", +) +client.jobs.cancel_job_v2( + job_id="job_s3_abc123", +) + +``` +
+
+
+
+ +#### ⚙️ Parameters + +
+
+ +
+
+ +**job_id:** `str` + +
+
+ +
+
+ +**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration. + +
+
+
+
+ + +
+
+
+ +## Datasets +
client.datasets.search_dataset(...) -> AsyncHttpResponse[DatasetSearchResponse] +
+
+ +#### 📝 Description + +
+
+ +
+
+ +Search for articles within a news dataset. + +Contact your Account Executive for available datasets. + +## Response +Returns a list of search results with title, URL, snippet, and date. +
+
+
+
+ +#### 🔌 Usage + +
+
+ +
+
+ +```python +from runcaptain import Captain + +client = Captain( + organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", +) +client.datasets.search_dataset( + dataset="dataset_name", +) + +``` +
+
+
+
+ +#### ⚙️ Parameters + +
+
+ +
+
+ +**dataset:** `typing.Optional[str]` — The dataset to search. Contact your Account Executive for available datasets.
@@ -2263,10 +2720,7 @@ client.datasets.search_dataset( Get a full article from a supported news dataset. -## Supported Datasets -- **nytimes**: New York Times (nytimes.com) -- **washpost**: Washington Post (washingtonpost.com) -- **sfstandard**: SF Standard (sfstandard.com) +Contact your Account Executive for available datasets. ## URL Path The article URL is appended directly to the endpoint path. The URL must match the domain of the specified dataset. @@ -2290,12 +2744,12 @@ Returns the full article content in markdown format, along with metadata like ti from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.datasets.get_dataset_article( - dataset="nytimes", - url="https://www.washingtonpost.com/example/news_example.html", + dataset="dataset_name", + url="https://www.example.com/2025/01/15/politics/example-article", ) ``` @@ -2312,7 +2766,7 @@ client.datasets.get_dataset_article(
-**dataset:** `GetDatasetArticleRequestDataset` — The news dataset to get articles from. Supported: nytimes, washpost, sfstandard +**dataset:** `typing.Optional[str]` — The dataset to get articles from. Contact your Account Executive for available datasets.
@@ -2320,7 +2774,7 @@ client.datasets.get_dataset_article(
-**url:** `str` — Full URL of the article to get, appended to the path. Must match the dataset's domain. +**url:** `str`
diff --git a/src/runcaptain/__init__.py b/src/runcaptain/__init__.py index b0d8097..c0b7499 100644 --- a/src/runcaptain/__init__.py +++ b/src/runcaptain/__init__.py @@ -15,9 +15,7 @@ CollectionListResponseV2, CollectionResponseV2, DatasetArticleResponse, - DatasetArticleResponseDataset, DatasetSearchResponse, - DatasetSearchResponseDataset, DatasetSearchResult, DocumentDeleteResponseV2, DocumentItemV2, @@ -38,6 +36,17 @@ JobStatusResponseV2JobType, JobStatusResponseV2Status, QueryResponseV2, + QueryStreamCompleteEvent, + QueryStreamErrorEvent, + QueryStreamEvent, + QueryStreamEvent_StreamComplete, + QueryStreamEvent_StreamError, + QueryStreamEvent_TextDelta, + QueryStreamEvent_ToolEnd, + QueryStreamEvent_ToolStart, + QueryStreamTextEvent, + QueryStreamToolEndEvent, + QueryStreamToolStartEvent, RelevantDocumentV2, SearchResult, StandardResponseV2, @@ -56,7 +65,6 @@ from . import collections, datasets, indexing, jobs, query from .client import AsyncCaptain, Captain from .collections import ChangeEnvironmentRequestV2NewEnvironment - from .datasets import GetDatasetArticleRequestDataset, SearchDatasetRequestDataset from .environment import CaptainEnvironment from .indexing import ( IndexAzureDirectoryRequestV2ProcessingType, @@ -65,9 +73,16 @@ IndexGcsDirectoryRequestV2ProcessingType, IndexGcsFileRequestV2ProcessingType, IndexGcsRequestV2ProcessingType, + IndexR2DirectoryRequestV2Jurisdiction, + IndexR2DirectoryRequestV2ProcessingType, + IndexR2FileRequestV2Jurisdiction, + IndexR2FileRequestV2ProcessingType, + IndexR2RequestV2Jurisdiction, + IndexR2RequestV2ProcessingType, IndexS3DirectoryRequestV2ProcessingType, IndexS3FileRequestV2ProcessingType, IndexS3RequestV2ProcessingType, + IndexUrlRequestV2ProcessingType, ) from .version import __version__ _dynamic_imports: typing.Dict[str, str] = { @@ -85,9 +100,7 @@ "CollectionResponseV2": ".types", "ConflictError": ".errors", "DatasetArticleResponse": ".types", - "DatasetArticleResponseDataset": ".types", "DatasetSearchResponse": ".types", - "DatasetSearchResponseDataset": ".types", "DatasetSearchResult": ".types", "DocumentDeleteResponseV2": ".types", "DocumentItemV2": ".types", @@ -96,7 +109,6 @@ "FileStatusStatus": ".types", "FilesPage": ".types", "ForbiddenError": ".errors", - "GetDatasetArticleRequestDataset": ".datasets", "HttpValidationError": ".types", "IndexAzureDirectoryRequestV2ProcessingType": ".indexing", "IndexAzureFileRequestV2ProcessingType": ".indexing", @@ -106,9 +118,16 @@ "IndexGcsRequestV2ProcessingType": ".indexing", "IndexJobResponseV2": ".types", "IndexJobResponseV2Status": ".types", + "IndexR2DirectoryRequestV2Jurisdiction": ".indexing", + "IndexR2DirectoryRequestV2ProcessingType": ".indexing", + "IndexR2FileRequestV2Jurisdiction": ".indexing", + "IndexR2FileRequestV2ProcessingType": ".indexing", + "IndexR2RequestV2Jurisdiction": ".indexing", + "IndexR2RequestV2ProcessingType": ".indexing", "IndexS3DirectoryRequestV2ProcessingType": ".indexing", "IndexS3FileRequestV2ProcessingType": ".indexing", "IndexS3RequestV2ProcessingType": ".indexing", + "IndexUrlRequestV2ProcessingType": ".indexing", "JobBilling": ".types", "JobBillingProcessingType": ".types", "JobCancelResponseV2": ".types", @@ -120,8 +139,18 @@ "JobStatusResponseV2Status": ".types", "NotFoundError": ".errors", "QueryResponseV2": ".types", + "QueryStreamCompleteEvent": ".types", + "QueryStreamErrorEvent": ".types", + "QueryStreamEvent": ".types", + "QueryStreamEvent_StreamComplete": ".types", + "QueryStreamEvent_StreamError": ".types", + "QueryStreamEvent_TextDelta": ".types", + "QueryStreamEvent_ToolEnd": ".types", + "QueryStreamEvent_ToolStart": ".types", + "QueryStreamTextEvent": ".types", + "QueryStreamToolEndEvent": ".types", + "QueryStreamToolStartEvent": ".types", "RelevantDocumentV2": ".types", - "SearchDatasetRequestDataset": ".datasets", "SearchResult": ".types", "ServiceUnavailableError": ".errors", "StandardResponseV2": ".types", @@ -174,9 +203,7 @@ def __dir__(): "CollectionResponseV2", "ConflictError", "DatasetArticleResponse", - "DatasetArticleResponseDataset", "DatasetSearchResponse", - "DatasetSearchResponseDataset", "DatasetSearchResult", "DocumentDeleteResponseV2", "DocumentItemV2", @@ -185,7 +212,6 @@ def __dir__(): "FileStatusStatus", "FilesPage", "ForbiddenError", - "GetDatasetArticleRequestDataset", "HttpValidationError", "IndexAzureDirectoryRequestV2ProcessingType", "IndexAzureFileRequestV2ProcessingType", @@ -195,9 +221,16 @@ def __dir__(): "IndexGcsRequestV2ProcessingType", "IndexJobResponseV2", "IndexJobResponseV2Status", + "IndexR2DirectoryRequestV2Jurisdiction", + "IndexR2DirectoryRequestV2ProcessingType", + "IndexR2FileRequestV2Jurisdiction", + "IndexR2FileRequestV2ProcessingType", + "IndexR2RequestV2Jurisdiction", + "IndexR2RequestV2ProcessingType", "IndexS3DirectoryRequestV2ProcessingType", "IndexS3FileRequestV2ProcessingType", "IndexS3RequestV2ProcessingType", + "IndexUrlRequestV2ProcessingType", "JobBilling", "JobBillingProcessingType", "JobCancelResponseV2", @@ -209,8 +242,18 @@ def __dir__(): "JobStatusResponseV2Status", "NotFoundError", "QueryResponseV2", + "QueryStreamCompleteEvent", + "QueryStreamErrorEvent", + "QueryStreamEvent", + "QueryStreamEvent_StreamComplete", + "QueryStreamEvent_StreamError", + "QueryStreamEvent_TextDelta", + "QueryStreamEvent_ToolEnd", + "QueryStreamEvent_ToolStart", + "QueryStreamTextEvent", + "QueryStreamToolEndEvent", + "QueryStreamToolStartEvent", "RelevantDocumentV2", - "SearchDatasetRequestDataset", "SearchResult", "ServiceUnavailableError", "StandardResponseV2", diff --git a/src/runcaptain/client.py b/src/runcaptain/client.py index 4c84d40..6ef8f91 100644 --- a/src/runcaptain/client.py +++ b/src/runcaptain/client.py @@ -2,9 +2,11 @@ from __future__ import annotations +import os import typing import httpx +from .core.api_error import ApiError from .core.client_wrapper import AsyncClientWrapper, SyncClientWrapper from .core.logging import LogConfig, Logger from .environment import CaptainEnvironment @@ -35,8 +37,8 @@ class Captain: - authorization : str organization_id : typing.Optional[str] + key : typing.Optional[typing.Union[str, typing.Callable[[], str]]] headers : typing.Optional[typing.Dict[str, str]] Additional headers to send with every request. @@ -57,8 +59,8 @@ class Captain: from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) """ @@ -67,8 +69,8 @@ def __init__( *, base_url: typing.Optional[str] = None, environment: CaptainEnvironment = CaptainEnvironment.DEFAULT, - authorization: str, - organization_id: typing.Optional[str] = None, + organization_id: typing.Optional[str] = os.getenv("CAPTAIN_ORGANIZATION_ID"), + key: typing.Optional[typing.Union[str, typing.Callable[[], str]]] = os.getenv("CAPTAIN_API_KEY"), headers: typing.Optional[typing.Dict[str, str]] = None, timeout: typing.Optional[float] = None, follow_redirects: typing.Optional[bool] = True, @@ -78,10 +80,16 @@ def __init__( _defaulted_timeout = ( timeout if timeout is not None else 60 if httpx_client is None else httpx_client.timeout.read ) + if organization_id is None: + raise ApiError( + body="The client must be instantiated be either passing in organization_id or setting CAPTAIN_ORGANIZATION_ID" + ) + if key is None: + raise ApiError(body="The client must be instantiated be either passing in key or setting CAPTAIN_API_KEY") self._client_wrapper = SyncClientWrapper( base_url=_get_base_url(base_url=base_url, environment=environment), - authorization=authorization, organization_id=organization_id, + key=key, headers=headers, httpx_client=httpx_client if httpx_client is not None @@ -156,8 +164,8 @@ class AsyncCaptain: - authorization : str organization_id : typing.Optional[str] + key : typing.Optional[typing.Union[str, typing.Callable[[], str]]] headers : typing.Optional[typing.Dict[str, str]] Additional headers to send with every request. @@ -178,8 +186,8 @@ class AsyncCaptain: from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) """ @@ -188,8 +196,8 @@ def __init__( *, base_url: typing.Optional[str] = None, environment: CaptainEnvironment = CaptainEnvironment.DEFAULT, - authorization: str, - organization_id: typing.Optional[str] = None, + organization_id: typing.Optional[str] = os.getenv("CAPTAIN_ORGANIZATION_ID"), + key: typing.Optional[typing.Union[str, typing.Callable[[], str]]] = os.getenv("CAPTAIN_API_KEY"), headers: typing.Optional[typing.Dict[str, str]] = None, timeout: typing.Optional[float] = None, follow_redirects: typing.Optional[bool] = True, @@ -199,10 +207,16 @@ def __init__( _defaulted_timeout = ( timeout if timeout is not None else 60 if httpx_client is None else httpx_client.timeout.read ) + if organization_id is None: + raise ApiError( + body="The client must be instantiated be either passing in organization_id or setting CAPTAIN_ORGANIZATION_ID" + ) + if key is None: + raise ApiError(body="The client must be instantiated be either passing in key or setting CAPTAIN_API_KEY") self._client_wrapper = AsyncClientWrapper( base_url=_get_base_url(base_url=base_url, environment=environment), - authorization=authorization, organization_id=organization_id, + key=key, headers=headers, httpx_client=httpx_client if httpx_client is not None diff --git a/src/runcaptain/collections/client.py b/src/runcaptain/collections/client.py index d043171..4bda78d 100644 --- a/src/runcaptain/collections/client.py +++ b/src/runcaptain/collections/client.py @@ -33,11 +33,7 @@ def with_raw_response(self) -> RawCollectionsClient: return self._raw_client def list_collections_v2( - self, - *, - limit: typing.Optional[int] = None, - offset: typing.Optional[int] = None, - request_options: typing.Optional[RequestOptions] = None, + self, *, request_options: typing.Optional[RequestOptions] = None ) -> CollectionListResponseV2: """ List all collections for an organization. @@ -46,12 +42,6 @@ def list_collections_v2( Parameters ---------- - limit : typing.Optional[int] - Maximum number of collections to return - - offset : typing.Optional[int] - Pagination offset - request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -65,12 +55,12 @@ def list_collections_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.collections.list_collections_v2() """ - _response = self._raw_client.list_collections_v2(limit=limit, offset=offset, request_options=request_options) + _response = self._raw_client.list_collections_v2(request_options=request_options) return _response.data def create_collection_v2( @@ -86,7 +76,6 @@ def create_collection_v2( Parameters ---------- collection_name : str - Name of the collection to create description : typing.Optional[str] @@ -103,11 +92,12 @@ def create_collection_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.collections.create_collection_v2( collection_name="my_documents", + description="A collection of research documents", ) """ _response = self._raw_client.create_collection_v2( @@ -124,7 +114,6 @@ def delete_collection_v2( Parameters ---------- collection_name : str - Name of the collection to delete request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -139,8 +128,8 @@ def delete_collection_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.collections.delete_collection_v2( collection_name="my_documents", @@ -174,7 +163,6 @@ def change_collection_environment_v2( Parameters ---------- collection_name : str - Name of the collection to move new_environment : ChangeEnvironmentRequestV2NewEnvironment The target environment to move the collection to @@ -192,8 +180,8 @@ def change_collection_environment_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.collections.change_collection_environment_v2( collection_name="my_documents", @@ -209,7 +197,6 @@ def list_documents_v2( self, collection_name: str, *, - limit: typing.Optional[int] = None, offset: typing.Optional[int] = None, request_options: typing.Optional[RequestOptions] = None, ) -> DocumentListResponseV2: @@ -219,10 +206,6 @@ def list_documents_v2( Parameters ---------- collection_name : str - Name of the collection - - limit : typing.Optional[int] - Maximum number of documents to return offset : typing.Optional[int] Pagination offset @@ -240,18 +223,14 @@ def list_documents_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.collections.list_documents_v2( collection_name="my_documents", - limit=100, - offset=0, ) """ - _response = self._raw_client.list_documents_v2( - collection_name, limit=limit, offset=offset, request_options=request_options - ) + _response = self._raw_client.list_documents_v2(collection_name, offset=offset, request_options=request_options) return _response.data def wipe_collection_documents_v2( @@ -263,7 +242,6 @@ def wipe_collection_documents_v2( Parameters ---------- collection_name : str - Name of the collection to wipe request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -278,11 +256,11 @@ def wipe_collection_documents_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.collections.wipe_collection_documents_v2( - collection_name="collection_name", + collection_name="my_documents", ) """ _response = self._raw_client.wipe_collection_documents_v2(collection_name, request_options=request_options) @@ -297,10 +275,8 @@ def delete_document_v2( Parameters ---------- collection_name : str - Name of the collection document_id : str - ID of the document to delete request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -315,12 +291,12 @@ def delete_document_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.collections.delete_document_v2( - collection_name="collection_name", - document_id="document_id", + collection_name="my_documents", + document_id="doc_abc123", ) """ _response = self._raw_client.delete_document_v2(collection_name, document_id, request_options=request_options) @@ -343,11 +319,7 @@ def with_raw_response(self) -> AsyncRawCollectionsClient: return self._raw_client async def list_collections_v2( - self, - *, - limit: typing.Optional[int] = None, - offset: typing.Optional[int] = None, - request_options: typing.Optional[RequestOptions] = None, + self, *, request_options: typing.Optional[RequestOptions] = None ) -> CollectionListResponseV2: """ List all collections for an organization. @@ -356,12 +328,6 @@ async def list_collections_v2( Parameters ---------- - limit : typing.Optional[int] - Maximum number of collections to return - - offset : typing.Optional[int] - Pagination offset - request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -377,8 +343,8 @@ async def list_collections_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) @@ -388,9 +354,7 @@ async def main() -> None: asyncio.run(main()) """ - _response = await self._raw_client.list_collections_v2( - limit=limit, offset=offset, request_options=request_options - ) + _response = await self._raw_client.list_collections_v2(request_options=request_options) return _response.data async def create_collection_v2( @@ -406,7 +370,6 @@ async def create_collection_v2( Parameters ---------- collection_name : str - Name of the collection to create description : typing.Optional[str] @@ -425,14 +388,15 @@ async def create_collection_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.collections.create_collection_v2( collection_name="my_documents", + description="A collection of research documents", ) @@ -452,7 +416,6 @@ async def delete_collection_v2( Parameters ---------- collection_name : str - Name of the collection to delete request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -469,8 +432,8 @@ async def delete_collection_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) @@ -510,7 +473,6 @@ async def change_collection_environment_v2( Parameters ---------- collection_name : str - Name of the collection to move new_environment : ChangeEnvironmentRequestV2NewEnvironment The target environment to move the collection to @@ -530,8 +492,8 @@ async def change_collection_environment_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) @@ -553,7 +515,6 @@ async def list_documents_v2( self, collection_name: str, *, - limit: typing.Optional[int] = None, offset: typing.Optional[int] = None, request_options: typing.Optional[RequestOptions] = None, ) -> DocumentListResponseV2: @@ -563,10 +524,6 @@ async def list_documents_v2( Parameters ---------- collection_name : str - Name of the collection - - limit : typing.Optional[int] - Maximum number of documents to return offset : typing.Optional[int] Pagination offset @@ -586,23 +543,21 @@ async def list_documents_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.collections.list_documents_v2( collection_name="my_documents", - limit=100, - offset=0, ) asyncio.run(main()) """ _response = await self._raw_client.list_documents_v2( - collection_name, limit=limit, offset=offset, request_options=request_options + collection_name, offset=offset, request_options=request_options ) return _response.data @@ -615,7 +570,6 @@ async def wipe_collection_documents_v2( Parameters ---------- collection_name : str - Name of the collection to wipe request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -632,14 +586,14 @@ async def wipe_collection_documents_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.collections.wipe_collection_documents_v2( - collection_name="collection_name", + collection_name="my_documents", ) @@ -659,10 +613,8 @@ async def delete_document_v2( Parameters ---------- collection_name : str - Name of the collection document_id : str - ID of the document to delete request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -679,15 +631,15 @@ async def delete_document_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.collections.delete_document_v2( - collection_name="collection_name", - document_id="document_id", + collection_name="my_documents", + document_id="doc_abc123", ) diff --git a/src/runcaptain/collections/raw_client.py b/src/runcaptain/collections/raw_client.py index 8db600a..15af9fc 100644 --- a/src/runcaptain/collections/raw_client.py +++ b/src/runcaptain/collections/raw_client.py @@ -29,11 +29,7 @@ def __init__(self, *, client_wrapper: SyncClientWrapper): self._client_wrapper = client_wrapper def list_collections_v2( - self, - *, - limit: typing.Optional[int] = None, - offset: typing.Optional[int] = None, - request_options: typing.Optional[RequestOptions] = None, + self, *, request_options: typing.Optional[RequestOptions] = None ) -> HttpResponse[CollectionListResponseV2]: """ List all collections for an organization. @@ -42,12 +38,6 @@ def list_collections_v2( Parameters ---------- - limit : typing.Optional[int] - Maximum number of collections to return - - offset : typing.Optional[int] - Pagination offset - request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -59,10 +49,6 @@ def list_collections_v2( _response = self._client_wrapper.httpx_client.request( "v2/collections", method="GET", - params={ - "limit": limit, - "offset": offset, - }, request_options=request_options, ) try: @@ -93,7 +79,6 @@ def create_collection_v2( Parameters ---------- collection_name : str - Name of the collection to create description : typing.Optional[str] @@ -141,7 +126,6 @@ def delete_collection_v2( Parameters ---------- collection_name : str - Name of the collection to delete request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -207,7 +191,6 @@ def change_collection_environment_v2( Parameters ---------- collection_name : str - Name of the collection to move new_environment : ChangeEnvironmentRequestV2NewEnvironment The target environment to move the collection to @@ -284,7 +267,6 @@ def list_documents_v2( self, collection_name: str, *, - limit: typing.Optional[int] = None, offset: typing.Optional[int] = None, request_options: typing.Optional[RequestOptions] = None, ) -> HttpResponse[DocumentListResponseV2]: @@ -294,10 +276,6 @@ def list_documents_v2( Parameters ---------- collection_name : str - Name of the collection - - limit : typing.Optional[int] - Maximum number of documents to return offset : typing.Optional[int] Pagination offset @@ -314,7 +292,6 @@ def list_documents_v2( f"v2/collections/{jsonable_encoder(collection_name)}/documents", method="GET", params={ - "limit": limit, "offset": offset, }, request_options=request_options, @@ -343,7 +320,6 @@ def wipe_collection_documents_v2( Parameters ---------- collection_name : str - Name of the collection to wipe request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -382,10 +358,8 @@ def delete_document_v2( Parameters ---------- collection_name : str - Name of the collection document_id : str - ID of the document to delete request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -432,11 +406,7 @@ def __init__(self, *, client_wrapper: AsyncClientWrapper): self._client_wrapper = client_wrapper async def list_collections_v2( - self, - *, - limit: typing.Optional[int] = None, - offset: typing.Optional[int] = None, - request_options: typing.Optional[RequestOptions] = None, + self, *, request_options: typing.Optional[RequestOptions] = None ) -> AsyncHttpResponse[CollectionListResponseV2]: """ List all collections for an organization. @@ -445,12 +415,6 @@ async def list_collections_v2( Parameters ---------- - limit : typing.Optional[int] - Maximum number of collections to return - - offset : typing.Optional[int] - Pagination offset - request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -462,10 +426,6 @@ async def list_collections_v2( _response = await self._client_wrapper.httpx_client.request( "v2/collections", method="GET", - params={ - "limit": limit, - "offset": offset, - }, request_options=request_options, ) try: @@ -496,7 +456,6 @@ async def create_collection_v2( Parameters ---------- collection_name : str - Name of the collection to create description : typing.Optional[str] @@ -544,7 +503,6 @@ async def delete_collection_v2( Parameters ---------- collection_name : str - Name of the collection to delete request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -610,7 +568,6 @@ async def change_collection_environment_v2( Parameters ---------- collection_name : str - Name of the collection to move new_environment : ChangeEnvironmentRequestV2NewEnvironment The target environment to move the collection to @@ -687,7 +644,6 @@ async def list_documents_v2( self, collection_name: str, *, - limit: typing.Optional[int] = None, offset: typing.Optional[int] = None, request_options: typing.Optional[RequestOptions] = None, ) -> AsyncHttpResponse[DocumentListResponseV2]: @@ -697,10 +653,6 @@ async def list_documents_v2( Parameters ---------- collection_name : str - Name of the collection - - limit : typing.Optional[int] - Maximum number of documents to return offset : typing.Optional[int] Pagination offset @@ -717,7 +669,6 @@ async def list_documents_v2( f"v2/collections/{jsonable_encoder(collection_name)}/documents", method="GET", params={ - "limit": limit, "offset": offset, }, request_options=request_options, @@ -746,7 +697,6 @@ async def wipe_collection_documents_v2( Parameters ---------- collection_name : str - Name of the collection to wipe request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -785,10 +735,8 @@ async def delete_document_v2( Parameters ---------- collection_name : str - Name of the collection document_id : str - ID of the document to delete request_options : typing.Optional[RequestOptions] Request-specific configuration. diff --git a/src/runcaptain/core/client_wrapper.py b/src/runcaptain/core/client_wrapper.py index 8b7c254..0094bb5 100644 --- a/src/runcaptain/core/client_wrapper.py +++ b/src/runcaptain/core/client_wrapper.py @@ -11,15 +11,15 @@ class BaseClientWrapper: def __init__( self, *, - authorization: str, - organization_id: typing.Optional[str] = None, + organization_id: str, + key: typing.Union[str, typing.Callable[[], str]], headers: typing.Optional[typing.Dict[str, str]] = None, base_url: str, timeout: typing.Optional[float] = None, logging: typing.Optional[typing.Union[LogConfig, Logger]] = None, ): - self._authorization = authorization self._organization_id = organization_id + self._key = key self._headers = headers self._base_url = base_url self._timeout = timeout @@ -29,19 +29,24 @@ def get_headers(self) -> typing.Dict[str, str]: import platform headers: typing.Dict[str, str] = { - "User-Agent": "captain-sdk/0.0.0", + "User-Agent": "captain-sdk/0.0.1", "X-Fern-Language": "Python", "X-Fern-Runtime": f"python/{platform.python_version()}", "X-Fern-Platform": f"{platform.system().lower()}/{platform.release()}", "X-Fern-SDK-Name": "captain-sdk", - "X-Fern-SDK-Version": "0.0.0", + "X-Fern-SDK-Version": "0.0.1", **(self.get_custom_headers() or {}), } - headers["Authorization"] = self._authorization - if self._organization_id is not None: - headers["X-Organization-ID"] = self._organization_id + headers["X-Organization-ID"] = self._organization_id + headers["Authorization"] = f"Bearer {self._get_key()}" return headers + def _get_key(self) -> str: + if isinstance(self._key, str): + return self._key + else: + return self._key() + def get_custom_headers(self) -> typing.Optional[typing.Dict[str, str]]: return self._headers @@ -56,8 +61,8 @@ class SyncClientWrapper(BaseClientWrapper): def __init__( self, *, - authorization: str, - organization_id: typing.Optional[str] = None, + organization_id: str, + key: typing.Union[str, typing.Callable[[], str]], headers: typing.Optional[typing.Dict[str, str]] = None, base_url: str, timeout: typing.Optional[float] = None, @@ -65,8 +70,8 @@ def __init__( httpx_client: httpx.Client, ): super().__init__( - authorization=authorization, organization_id=organization_id, + key=key, headers=headers, base_url=base_url, timeout=timeout, @@ -85,8 +90,8 @@ class AsyncClientWrapper(BaseClientWrapper): def __init__( self, *, - authorization: str, - organization_id: typing.Optional[str] = None, + organization_id: str, + key: typing.Union[str, typing.Callable[[], str]], headers: typing.Optional[typing.Dict[str, str]] = None, base_url: str, timeout: typing.Optional[float] = None, @@ -95,8 +100,8 @@ def __init__( httpx_client: httpx.AsyncClient, ): super().__init__( - authorization=authorization, organization_id=organization_id, + key=key, headers=headers, base_url=base_url, timeout=timeout, diff --git a/src/runcaptain/datasets/__init__.py b/src/runcaptain/datasets/__init__.py index 9a43856..5cde020 100644 --- a/src/runcaptain/datasets/__init__.py +++ b/src/runcaptain/datasets/__init__.py @@ -2,36 +2,3 @@ # isort: skip_file -import typing -from importlib import import_module - -if typing.TYPE_CHECKING: - from .types import GetDatasetArticleRequestDataset, SearchDatasetRequestDataset -_dynamic_imports: typing.Dict[str, str] = { - "GetDatasetArticleRequestDataset": ".types", - "SearchDatasetRequestDataset": ".types", -} - - -def __getattr__(attr_name: str) -> typing.Any: - module_name = _dynamic_imports.get(attr_name) - if module_name is None: - raise AttributeError(f"No {attr_name} found in _dynamic_imports for module name -> {__name__}") - try: - module = import_module(module_name, __package__) - if module_name == f".{attr_name}": - return module - else: - return getattr(module, attr_name) - except ImportError as e: - raise ImportError(f"Failed to import {attr_name} from {module_name}: {e}") from e - except AttributeError as e: - raise AttributeError(f"Failed to get {attr_name} from {module_name}: {e}") from e - - -def __dir__(): - lazy_attrs = list(_dynamic_imports.keys()) - return sorted(lazy_attrs) - - -__all__ = ["GetDatasetArticleRequestDataset", "SearchDatasetRequestDataset"] diff --git a/src/runcaptain/datasets/client.py b/src/runcaptain/datasets/client.py index e20d30b..c9826e9 100644 --- a/src/runcaptain/datasets/client.py +++ b/src/runcaptain/datasets/client.py @@ -7,8 +7,6 @@ from ..types.dataset_article_response import DatasetArticleResponse from ..types.dataset_search_response import DatasetSearchResponse from .raw_client import AsyncRawDatasetsClient, RawDatasetsClient -from .types.get_dataset_article_request_dataset import GetDatasetArticleRequestDataset -from .types.search_dataset_request_dataset import SearchDatasetRequestDataset class DatasetsClient: @@ -27,36 +25,20 @@ def with_raw_response(self) -> RawDatasetsClient: return self._raw_client def search_dataset( - self, - dataset: SearchDatasetRequestDataset, - *, - q: str, - limit: typing.Optional[int] = None, - request_options: typing.Optional[RequestOptions] = None, + self, dataset: typing.Optional[str], *, request_options: typing.Optional[RequestOptions] = None ) -> DatasetSearchResponse: """ Search for articles within a news dataset. - Uses Google Search constrained to the dataset's domain to find relevant articles. - - ## Supported Datasets - - **nytimes**: New York Times (nytimes.com) - - **washpost**: Washington Post (washingtonpost.com) - - **sfstandard**: SF Standard (sfstandard.com) + Contact your Account Executive for available datasets. ## Response Returns a list of search results with title, URL, snippet, and date. Parameters ---------- - dataset : SearchDatasetRequestDataset - The news dataset to search. Supported: nytimes, washpost, sfstandard - - q : str - Search query - - limit : typing.Optional[int] - Maximum number of results to return (default: 10, max: 100) + dataset : typing.Optional[str] + The dataset to search. Contact your Account Executive for available datasets. request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -71,32 +53,23 @@ def search_dataset( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.datasets.search_dataset( - dataset="nytimes", - q="artificial intelligence", - limit=5, + dataset="dataset_name", ) """ - _response = self._raw_client.search_dataset(dataset, q=q, limit=limit, request_options=request_options) + _response = self._raw_client.search_dataset(dataset, request_options=request_options) return _response.data def get_dataset_article( - self, - dataset: GetDatasetArticleRequestDataset, - url: str, - *, - request_options: typing.Optional[RequestOptions] = None, + self, dataset: typing.Optional[str], url: str, *, request_options: typing.Optional[RequestOptions] = None ) -> DatasetArticleResponse: """ Get a full article from a supported news dataset. - ## Supported Datasets - - **nytimes**: New York Times (nytimes.com) - - **washpost**: Washington Post (washingtonpost.com) - - **sfstandard**: SF Standard (sfstandard.com) + Contact your Account Executive for available datasets. ## URL Path The article URL is appended directly to the endpoint path. The URL must match the domain of the specified dataset. @@ -106,11 +79,10 @@ def get_dataset_article( Parameters ---------- - dataset : GetDatasetArticleRequestDataset - The news dataset to get articles from. Supported: nytimes, washpost, sfstandard + dataset : typing.Optional[str] + The dataset to get articles from. Contact your Account Executive for available datasets. url : str - Full URL of the article to get, appended to the path. Must match the dataset's domain. request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -125,12 +97,12 @@ def get_dataset_article( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.datasets.get_dataset_article( - dataset="nytimes", - url="https://www.nytimes.com/example/news_example.html", + dataset="dataset_name", + url="https://www.example.com/2025/01/15/politics/example-article", ) """ _response = self._raw_client.get_dataset_article(dataset, url, request_options=request_options) @@ -153,36 +125,20 @@ def with_raw_response(self) -> AsyncRawDatasetsClient: return self._raw_client async def search_dataset( - self, - dataset: SearchDatasetRequestDataset, - *, - q: str, - limit: typing.Optional[int] = None, - request_options: typing.Optional[RequestOptions] = None, + self, dataset: typing.Optional[str], *, request_options: typing.Optional[RequestOptions] = None ) -> DatasetSearchResponse: """ Search for articles within a news dataset. - Uses Google Search constrained to the dataset's domain to find relevant articles. - - ## Supported Datasets - - **nytimes**: New York Times (nytimes.com) - - **washpost**: Washington Post (washingtonpost.com) - - **sfstandard**: SF Standard (sfstandard.com) + Contact your Account Executive for available datasets. ## Response Returns a list of search results with title, URL, snippet, and date. Parameters ---------- - dataset : SearchDatasetRequestDataset - The news dataset to search. Supported: nytimes, washpost, sfstandard - - q : str - Search query - - limit : typing.Optional[int] - Maximum number of results to return (default: 10, max: 100) + dataset : typing.Optional[str] + The dataset to search. Contact your Account Executive for available datasets. request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -199,38 +155,29 @@ async def search_dataset( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.datasets.search_dataset( - dataset="nytimes", - q="artificial intelligence", - limit=5, + dataset="dataset_name", ) asyncio.run(main()) """ - _response = await self._raw_client.search_dataset(dataset, q=q, limit=limit, request_options=request_options) + _response = await self._raw_client.search_dataset(dataset, request_options=request_options) return _response.data async def get_dataset_article( - self, - dataset: GetDatasetArticleRequestDataset, - url: str, - *, - request_options: typing.Optional[RequestOptions] = None, + self, dataset: typing.Optional[str], url: str, *, request_options: typing.Optional[RequestOptions] = None ) -> DatasetArticleResponse: """ Get a full article from a supported news dataset. - ## Supported Datasets - - **nytimes**: New York Times (nytimes.com) - - **washpost**: Washington Post (washingtonpost.com) - - **sfstandard**: SF Standard (sfstandard.com) + Contact your Account Executive for available datasets. ## URL Path The article URL is appended directly to the endpoint path. The URL must match the domain of the specified dataset. @@ -240,11 +187,10 @@ async def get_dataset_article( Parameters ---------- - dataset : GetDatasetArticleRequestDataset - The news dataset to get articles from. Supported: nytimes, washpost, sfstandard + dataset : typing.Optional[str] + The dataset to get articles from. Contact your Account Executive for available datasets. url : str - Full URL of the article to get, appended to the path. Must match the dataset's domain. request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -261,15 +207,15 @@ async def get_dataset_article( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.datasets.get_dataset_article( - dataset="nytimes", - url="https://www.nytimes.com/example/news_example.html", + dataset="dataset_name", + url="https://www.example.com/2025/01/15/politics/example-article", ) diff --git a/src/runcaptain/datasets/raw_client.py b/src/runcaptain/datasets/raw_client.py index c32f304..9cd2363 100644 --- a/src/runcaptain/datasets/raw_client.py +++ b/src/runcaptain/datasets/raw_client.py @@ -15,8 +15,6 @@ from ..errors.unauthorized_error import UnauthorizedError from ..types.dataset_article_response import DatasetArticleResponse from ..types.dataset_search_response import DatasetSearchResponse -from .types.get_dataset_article_request_dataset import GetDatasetArticleRequestDataset -from .types.search_dataset_request_dataset import SearchDatasetRequestDataset class RawDatasetsClient: @@ -24,36 +22,20 @@ def __init__(self, *, client_wrapper: SyncClientWrapper): self._client_wrapper = client_wrapper def search_dataset( - self, - dataset: SearchDatasetRequestDataset, - *, - q: str, - limit: typing.Optional[int] = None, - request_options: typing.Optional[RequestOptions] = None, + self, dataset: typing.Optional[str], *, request_options: typing.Optional[RequestOptions] = None ) -> HttpResponse[DatasetSearchResponse]: """ Search for articles within a news dataset. - Uses Google Search constrained to the dataset's domain to find relevant articles. - - ## Supported Datasets - - **nytimes**: New York Times (nytimes.com) - - **washpost**: Washington Post (washingtonpost.com) - - **sfstandard**: SF Standard (sfstandard.com) + Contact your Account Executive for available datasets. ## Response Returns a list of search results with title, URL, snippet, and date. Parameters ---------- - dataset : SearchDatasetRequestDataset - The news dataset to search. Supported: nytimes, washpost, sfstandard - - q : str - Search query - - limit : typing.Optional[int] - Maximum number of results to return (default: 10, max: 100) + dataset : typing.Optional[str] + The dataset to search. Contact your Account Executive for available datasets. request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -66,10 +48,6 @@ def search_dataset( _response = self._client_wrapper.httpx_client.request( f"v2/datasets/{jsonable_encoder(dataset)}/search", method="GET", - params={ - "q": q, - "limit": limit, - }, request_options=request_options, ) try: @@ -132,19 +110,12 @@ def search_dataset( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) def get_dataset_article( - self, - dataset: GetDatasetArticleRequestDataset, - url: str, - *, - request_options: typing.Optional[RequestOptions] = None, + self, dataset: typing.Optional[str], url: str, *, request_options: typing.Optional[RequestOptions] = None ) -> HttpResponse[DatasetArticleResponse]: """ Get a full article from a supported news dataset. - ## Supported Datasets - - **nytimes**: New York Times (nytimes.com) - - **washpost**: Washington Post (washingtonpost.com) - - **sfstandard**: SF Standard (sfstandard.com) + Contact your Account Executive for available datasets. ## URL Path The article URL is appended directly to the endpoint path. The URL must match the domain of the specified dataset. @@ -154,11 +125,10 @@ def get_dataset_article( Parameters ---------- - dataset : GetDatasetArticleRequestDataset - The news dataset to get articles from. Supported: nytimes, washpost, sfstandard + dataset : typing.Optional[str] + The dataset to get articles from. Contact your Account Executive for available datasets. url : str - Full URL of the article to get, appended to the path. Must match the dataset's domain. request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -238,36 +208,20 @@ def __init__(self, *, client_wrapper: AsyncClientWrapper): self._client_wrapper = client_wrapper async def search_dataset( - self, - dataset: SearchDatasetRequestDataset, - *, - q: str, - limit: typing.Optional[int] = None, - request_options: typing.Optional[RequestOptions] = None, + self, dataset: typing.Optional[str], *, request_options: typing.Optional[RequestOptions] = None ) -> AsyncHttpResponse[DatasetSearchResponse]: """ Search for articles within a news dataset. - Uses Google Search constrained to the dataset's domain to find relevant articles. - - ## Supported Datasets - - **nytimes**: New York Times (nytimes.com) - - **washpost**: Washington Post (washingtonpost.com) - - **sfstandard**: SF Standard (sfstandard.com) + Contact your Account Executive for available datasets. ## Response Returns a list of search results with title, URL, snippet, and date. Parameters ---------- - dataset : SearchDatasetRequestDataset - The news dataset to search. Supported: nytimes, washpost, sfstandard - - q : str - Search query - - limit : typing.Optional[int] - Maximum number of results to return (default: 10, max: 100) + dataset : typing.Optional[str] + The dataset to search. Contact your Account Executive for available datasets. request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -280,10 +234,6 @@ async def search_dataset( _response = await self._client_wrapper.httpx_client.request( f"v2/datasets/{jsonable_encoder(dataset)}/search", method="GET", - params={ - "q": q, - "limit": limit, - }, request_options=request_options, ) try: @@ -346,19 +296,12 @@ async def search_dataset( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) async def get_dataset_article( - self, - dataset: GetDatasetArticleRequestDataset, - url: str, - *, - request_options: typing.Optional[RequestOptions] = None, + self, dataset: typing.Optional[str], url: str, *, request_options: typing.Optional[RequestOptions] = None ) -> AsyncHttpResponse[DatasetArticleResponse]: """ Get a full article from a supported news dataset. - ## Supported Datasets - - **nytimes**: New York Times (nytimes.com) - - **washpost**: Washington Post (washingtonpost.com) - - **sfstandard**: SF Standard (sfstandard.com) + Contact your Account Executive for available datasets. ## URL Path The article URL is appended directly to the endpoint path. The URL must match the domain of the specified dataset. @@ -368,11 +311,10 @@ async def get_dataset_article( Parameters ---------- - dataset : GetDatasetArticleRequestDataset - The news dataset to get articles from. Supported: nytimes, washpost, sfstandard + dataset : typing.Optional[str] + The dataset to get articles from. Contact your Account Executive for available datasets. url : str - Full URL of the article to get, appended to the path. Must match the dataset's domain. request_options : typing.Optional[RequestOptions] Request-specific configuration. diff --git a/src/runcaptain/datasets/types/__init__.py b/src/runcaptain/datasets/types/__init__.py deleted file mode 100644 index c7026d5..0000000 --- a/src/runcaptain/datasets/types/__init__.py +++ /dev/null @@ -1,38 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -# isort: skip_file - -import typing -from importlib import import_module - -if typing.TYPE_CHECKING: - from .get_dataset_article_request_dataset import GetDatasetArticleRequestDataset - from .search_dataset_request_dataset import SearchDatasetRequestDataset -_dynamic_imports: typing.Dict[str, str] = { - "GetDatasetArticleRequestDataset": ".get_dataset_article_request_dataset", - "SearchDatasetRequestDataset": ".search_dataset_request_dataset", -} - - -def __getattr__(attr_name: str) -> typing.Any: - module_name = _dynamic_imports.get(attr_name) - if module_name is None: - raise AttributeError(f"No {attr_name} found in _dynamic_imports for module name -> {__name__}") - try: - module = import_module(module_name, __package__) - if module_name == f".{attr_name}": - return module - else: - return getattr(module, attr_name) - except ImportError as e: - raise ImportError(f"Failed to import {attr_name} from {module_name}: {e}") from e - except AttributeError as e: - raise AttributeError(f"Failed to get {attr_name} from {module_name}: {e}") from e - - -def __dir__(): - lazy_attrs = list(_dynamic_imports.keys()) - return sorted(lazy_attrs) - - -__all__ = ["GetDatasetArticleRequestDataset", "SearchDatasetRequestDataset"] diff --git a/src/runcaptain/datasets/types/get_dataset_article_request_dataset.py b/src/runcaptain/datasets/types/get_dataset_article_request_dataset.py deleted file mode 100644 index a1cbd8a..0000000 --- a/src/runcaptain/datasets/types/get_dataset_article_request_dataset.py +++ /dev/null @@ -1,5 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -GetDatasetArticleRequestDataset = typing.Union[typing.Literal["nytimes", "washpost", "sfstandard"], typing.Any] diff --git a/src/runcaptain/datasets/types/search_dataset_request_dataset.py b/src/runcaptain/datasets/types/search_dataset_request_dataset.py deleted file mode 100644 index a1aff73..0000000 --- a/src/runcaptain/datasets/types/search_dataset_request_dataset.py +++ /dev/null @@ -1,5 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -SearchDatasetRequestDataset = typing.Union[typing.Literal["nytimes", "washpost", "sfstandard"], typing.Any] diff --git a/src/runcaptain/indexing/__init__.py b/src/runcaptain/indexing/__init__.py index 5d67a26..2c72af1 100644 --- a/src/runcaptain/indexing/__init__.py +++ b/src/runcaptain/indexing/__init__.py @@ -13,9 +13,16 @@ IndexGcsDirectoryRequestV2ProcessingType, IndexGcsFileRequestV2ProcessingType, IndexGcsRequestV2ProcessingType, + IndexR2DirectoryRequestV2Jurisdiction, + IndexR2DirectoryRequestV2ProcessingType, + IndexR2FileRequestV2Jurisdiction, + IndexR2FileRequestV2ProcessingType, + IndexR2RequestV2Jurisdiction, + IndexR2RequestV2ProcessingType, IndexS3DirectoryRequestV2ProcessingType, IndexS3FileRequestV2ProcessingType, IndexS3RequestV2ProcessingType, + IndexUrlRequestV2ProcessingType, ) _dynamic_imports: typing.Dict[str, str] = { "IndexAzureDirectoryRequestV2ProcessingType": ".types", @@ -24,9 +31,16 @@ "IndexGcsDirectoryRequestV2ProcessingType": ".types", "IndexGcsFileRequestV2ProcessingType": ".types", "IndexGcsRequestV2ProcessingType": ".types", + "IndexR2DirectoryRequestV2Jurisdiction": ".types", + "IndexR2DirectoryRequestV2ProcessingType": ".types", + "IndexR2FileRequestV2Jurisdiction": ".types", + "IndexR2FileRequestV2ProcessingType": ".types", + "IndexR2RequestV2Jurisdiction": ".types", + "IndexR2RequestV2ProcessingType": ".types", "IndexS3DirectoryRequestV2ProcessingType": ".types", "IndexS3FileRequestV2ProcessingType": ".types", "IndexS3RequestV2ProcessingType": ".types", + "IndexUrlRequestV2ProcessingType": ".types", } @@ -58,7 +72,14 @@ def __dir__(): "IndexGcsDirectoryRequestV2ProcessingType", "IndexGcsFileRequestV2ProcessingType", "IndexGcsRequestV2ProcessingType", + "IndexR2DirectoryRequestV2Jurisdiction", + "IndexR2DirectoryRequestV2ProcessingType", + "IndexR2FileRequestV2Jurisdiction", + "IndexR2FileRequestV2ProcessingType", + "IndexR2RequestV2Jurisdiction", + "IndexR2RequestV2ProcessingType", "IndexS3DirectoryRequestV2ProcessingType", "IndexS3FileRequestV2ProcessingType", "IndexS3RequestV2ProcessingType", + "IndexUrlRequestV2ProcessingType", ] diff --git a/src/runcaptain/indexing/client.py b/src/runcaptain/indexing/client.py index 41b804c..6a63666 100644 --- a/src/runcaptain/indexing/client.py +++ b/src/runcaptain/indexing/client.py @@ -12,9 +12,16 @@ from .types.index_gcs_directory_request_v2processing_type import IndexGcsDirectoryRequestV2ProcessingType from .types.index_gcs_file_request_v2processing_type import IndexGcsFileRequestV2ProcessingType from .types.index_gcs_request_v2processing_type import IndexGcsRequestV2ProcessingType +from .types.index_r2directory_request_v2jurisdiction import IndexR2DirectoryRequestV2Jurisdiction +from .types.index_r2directory_request_v2processing_type import IndexR2DirectoryRequestV2ProcessingType +from .types.index_r2file_request_v2jurisdiction import IndexR2FileRequestV2Jurisdiction +from .types.index_r2file_request_v2processing_type import IndexR2FileRequestV2ProcessingType +from .types.index_r2request_v2jurisdiction import IndexR2RequestV2Jurisdiction +from .types.index_r2request_v2processing_type import IndexR2RequestV2ProcessingType from .types.index_s3directory_request_v2processing_type import IndexS3DirectoryRequestV2ProcessingType from .types.index_s3file_request_v2processing_type import IndexS3FileRequestV2ProcessingType from .types.index_s3request_v2processing_type import IndexS3RequestV2ProcessingType +from .types.index_url_request_v2processing_type import IndexUrlRequestV2ProcessingType # this is used as the default value for optional parameters OMIT = typing.cast(typing.Any, ...) @@ -43,7 +50,6 @@ def index_s3bucket_v2( aws_access_key_id: str, aws_secret_access_key: str, processing_type: IndexS3RequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, bucket_region: typing.Optional[str] = OMIT, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, @@ -56,7 +62,6 @@ def index_s3bucket_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the S3 bucket @@ -70,9 +75,6 @@ def index_s3bucket_v2( processing_type : IndexS3RequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - bucket_region : typing.Optional[str] AWS region where the bucket is located @@ -98,16 +100,17 @@ def index_s3bucket_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_s3bucket_v2( collection_name="my_documents", - bucket_name="my-s3-bucket", + bucket_name="my-documents-bucket", aws_access_key_id="AKIAIOSFODNN7EXAMPLE", - aws_secret_access_key="your_secret_key", + aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", bucket_region="us-east-1", processing_type="advanced", + skip_existing=True, ) """ _response = self._raw_client.index_s3bucket_v2( @@ -116,7 +119,6 @@ def index_s3bucket_v2( aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, processing_type=processing_type, - idempotency_key=idempotency_key, bucket_region=bucket_region, max_files=max_files, skip_existing=skip_existing, @@ -144,7 +146,6 @@ def index_s3file_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the S3 bucket @@ -180,15 +181,15 @@ def index_s3file_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_s3file_v2( collection_name="my_documents", - bucket_name="my-s3-bucket", - file_uri="s3://my-s3-bucket/contracts/acme_contract.pdf", + bucket_name="my-documents-bucket", + file_uri="s3://my-documents-bucket/reports/quarterly-report-q4.pdf", aws_access_key_id="AKIAIOSFODNN7EXAMPLE", - aws_secret_access_key="your_secret_key", + aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", bucket_region="us-east-1", processing_type="advanced", ) @@ -224,7 +225,6 @@ def index_gcs_bucket_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the GCS bucket @@ -257,13 +257,13 @@ def index_gcs_bucket_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_gcs_bucket_v2( collection_name="my_documents", - bucket_name="my-gcs-bucket", - service_account_json='{"type":"service_account","project_id":"my-project",...}', + bucket_name="my-gcs-documents", + service_account_json='{"type": "service_account", "project_id": "my-project", ...}', processing_type="advanced", ) """ @@ -296,7 +296,6 @@ def index_gcs_file_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the GCS bucket @@ -326,14 +325,14 @@ def index_gcs_file_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_gcs_file_v2( - collection_name="collection_name", - bucket_name="my-company-docs", - file_uri="gs://my-company-docs/contracts/acme_contract.pdf", - service_account_json='{"type":"service_account","project_id":"my-project",...}', + collection_name="my_documents", + bucket_name="my-gcs-documents", + file_uri="gs://my-gcs-documents/reports/annual-review.pdf", + service_account_json='{"type": "service_account", "project_id": "my-project", ...}', processing_type="advanced", ) """ @@ -357,7 +356,6 @@ def index_s3directory_v2( aws_access_key_id: str, aws_secret_access_key: str, processing_type: IndexS3DirectoryRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, bucket_region: typing.Optional[str] = OMIT, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, @@ -370,7 +368,6 @@ def index_s3directory_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the S3 bucket @@ -387,9 +384,6 @@ def index_s3directory_v2( processing_type : IndexS3DirectoryRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - bucket_region : typing.Optional[str] AWS region where the bucket is located @@ -415,15 +409,15 @@ def index_s3directory_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_s3directory_v2( collection_name="my_documents", - bucket_name="my-s3-bucket", - directory_path="reports/2024/january", + bucket_name="my-documents-bucket", + directory_path="reports/2025/", aws_access_key_id="AKIAIOSFODNN7EXAMPLE", - aws_secret_access_key="your_secret_key", + aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", bucket_region="us-east-1", processing_type="advanced", ) @@ -435,7 +429,6 @@ def index_s3directory_v2( aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, processing_type=processing_type, - idempotency_key=idempotency_key, bucket_region=bucket_region, max_files=max_files, skip_existing=skip_existing, @@ -452,7 +445,6 @@ def index_gcs_directory_v2( directory_path: str, service_account_json: str, processing_type: IndexGcsDirectoryRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, @@ -464,7 +456,6 @@ def index_gcs_directory_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the GCS bucket @@ -478,9 +469,6 @@ def index_gcs_directory_v2( processing_type : IndexGcsDirectoryRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - max_files : typing.Optional[int] Maximum number of files to index (optional) @@ -503,14 +491,14 @@ def index_gcs_directory_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_gcs_directory_v2( collection_name="my_documents", - bucket_name="my-gcs-bucket", - directory_path="reports/2024/january", - service_account_json='{"type":"service_account","project_id":"my-project",...}', + bucket_name="my-gcs-documents", + directory_path="reports/2025/", + service_account_json='{"type": "service_account", "project_id": "my-project", ...}', processing_type="advanced", ) """ @@ -520,7 +508,6 @@ def index_gcs_directory_v2( directory_path=directory_path, service_account_json=service_account_json, processing_type=processing_type, - idempotency_key=idempotency_key, max_files=max_files, skip_existing=skip_existing, custom_metadata=custom_metadata, @@ -536,7 +523,6 @@ def index_azure_container_v2( account_name: str, account_key: str, processing_type: IndexAzureRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, @@ -548,7 +534,6 @@ def index_azure_container_v2( Parameters ---------- collection_name : str - Name of the collection to index into container_name : str Name of the Azure Blob Storage container @@ -562,9 +547,6 @@ def index_azure_container_v2( processing_type : IndexAzureRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - max_files : typing.Optional[int] Maximum number of files to index (optional) @@ -587,14 +569,14 @@ def index_azure_container_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_azure_container_v2( collection_name="my_documents", - container_name="my-container", + container_name="my-azure-documents", account_name="mystorageaccount", - account_key="your_account_key", + account_key="base64encodedaccountkey==", processing_type="advanced", ) """ @@ -604,7 +586,6 @@ def index_azure_container_v2( account_name=account_name, account_key=account_key, processing_type=processing_type, - idempotency_key=idempotency_key, max_files=max_files, skip_existing=skip_existing, custom_metadata=custom_metadata, @@ -630,7 +611,6 @@ def index_azure_file_v2( Parameters ---------- collection_name : str - Name of the collection to index into container_name : str Name of the Azure Blob Storage container @@ -663,15 +643,15 @@ def index_azure_file_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_azure_file_v2( collection_name="my_documents", - container_name="my-container", - file_uri="https://mystorageaccount.blob.core.windows.net/my-container/contracts/acme_contract.pdf", + container_name="my-azure-documents", + file_uri="https://mystorageaccount.blob.core.windows.net/my-azure-documents/reports/annual-review.pdf", account_name="mystorageaccount", - account_key="your_account_key", + account_key="base64encodedaccountkey==", processing_type="advanced", ) """ @@ -696,7 +676,6 @@ def index_azure_directory_v2( account_name: str, account_key: str, processing_type: IndexAzureDirectoryRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, @@ -708,7 +687,6 @@ def index_azure_directory_v2( Parameters ---------- collection_name : str - Name of the collection to index into container_name : str Name of the Azure Blob Storage container @@ -725,9 +703,6 @@ def index_azure_directory_v2( processing_type : IndexAzureDirectoryRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - max_files : typing.Optional[int] Maximum number of files to index (optional) @@ -750,15 +725,15 @@ def index_azure_directory_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_azure_directory_v2( collection_name="my_documents", - container_name="my-container", - directory_path="reports/2024/january", + container_name="my-azure-documents", + directory_path="reports/2025/", account_name="mystorageaccount", - account_key="your_account_key", + account_key="base64encodedaccountkey==", processing_type="advanced", ) """ @@ -769,7 +744,6 @@ def index_azure_directory_v2( account_name=account_name, account_key=account_key, processing_type=processing_type, - idempotency_key=idempotency_key, max_files=max_files, skip_existing=skip_existing, custom_metadata=custom_metadata, @@ -777,6 +751,344 @@ def index_azure_directory_v2( ) return _response.data + def index_r2bucket_v2( + self, + collection_name: str, + *, + bucket_name: str, + account_id: str, + access_key_id: str, + secret_access_key: str, + processing_type: IndexR2RequestV2ProcessingType, + jurisdiction: typing.Optional[IndexR2RequestV2Jurisdiction] = OMIT, + max_files: typing.Optional[int] = OMIT, + skip_existing: typing.Optional[bool] = OMIT, + custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> IndexJobResponseV2: + """ + Index all files from a Cloudflare R2 bucket into a collection. R2 is S3-compatible — provide your R2 API token's Access Key ID and Secret Access Key. Returns a job_id for tracking progress via GET /v2/jobs/{job_id}. + + Parameters + ---------- + collection_name : str + + bucket_name : str + Name of the R2 bucket + + account_id : str + Cloudflare account ID (found in your R2 dashboard URL) + + access_key_id : str + R2 S3 API token Access Key ID + + secret_access_key : str + R2 S3 API token Secret Access Key + + processing_type : IndexR2RequestV2ProcessingType + Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. + + jurisdiction : typing.Optional[IndexR2RequestV2Jurisdiction] + R2 jurisdiction. 'default' for global, 'eu' for EU-only storage, 'fedramp' for FedRAMP-compliant storage. + + max_files : typing.Optional[int] + Maximum number of files to index (optional) + + skip_existing : typing.Optional[bool] + Skip files that are already indexed in the collection. When true, only new files will be indexed. Set to false to re-index all files. + + custom_metadata : typing.Optional[typing.Dict[str, typing.Any]] + Custom metadata to attach to all indexed chunks. Keys must be strings. Values: str, int, float, bool, or array of strings. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + IndexJobResponseV2 + Indexing Job Started + + Examples + -------- + from runcaptain import Captain + + client = Captain( + organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", + ) + client.indexing.index_r2bucket_v2( + collection_name="my_documents", + bucket_name="my-r2-bucket", + account_id="your_cloudflare_account_id", + access_key_id="your_r2_access_key_id", + secret_access_key="your_r2_secret_access_key", + processing_type="advanced", + ) + """ + _response = self._raw_client.index_r2bucket_v2( + collection_name, + bucket_name=bucket_name, + account_id=account_id, + access_key_id=access_key_id, + secret_access_key=secret_access_key, + processing_type=processing_type, + jurisdiction=jurisdiction, + max_files=max_files, + skip_existing=skip_existing, + custom_metadata=custom_metadata, + request_options=request_options, + ) + return _response.data + + def index_r2file_v2( + self, + collection_name: str, + *, + bucket_name: str, + file_uri: str, + account_id: str, + access_key_id: str, + secret_access_key: str, + processing_type: IndexR2FileRequestV2ProcessingType, + jurisdiction: typing.Optional[IndexR2FileRequestV2Jurisdiction] = OMIT, + custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> IndexJobResponseV2: + """ + Index a single file from a Cloudflare R2 bucket into a collection. Returns a job_id for tracking progress. + + Parameters + ---------- + collection_name : str + + bucket_name : str + Name of the R2 bucket + + file_uri : str + R2 URI format: r2://bucket-name/path/to/file.pdf + + account_id : str + Cloudflare account ID (found in your R2 dashboard URL) + + access_key_id : str + R2 S3 API token Access Key ID + + secret_access_key : str + R2 S3 API token Secret Access Key + + processing_type : IndexR2FileRequestV2ProcessingType + Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. + + jurisdiction : typing.Optional[IndexR2FileRequestV2Jurisdiction] + R2 jurisdiction. 'default' for global, 'eu' for EU-only storage, 'fedramp' for FedRAMP-compliant storage. + + custom_metadata : typing.Optional[typing.Dict[str, typing.Any]] + Custom metadata to attach to all chunks from this file. Keys must be strings. Values: str, int, float, bool, or array of strings. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + IndexJobResponseV2 + Indexing Job Started + + Examples + -------- + from runcaptain import Captain + + client = Captain( + organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", + ) + client.indexing.index_r2file_v2( + collection_name="my_documents", + bucket_name="my-r2-bucket", + file_uri="r2://my-r2-bucket/reports/annual-review.pdf", + account_id="your_cloudflare_account_id", + access_key_id="your_r2_access_key_id", + secret_access_key="your_r2_secret_access_key", + processing_type="advanced", + ) + """ + _response = self._raw_client.index_r2file_v2( + collection_name, + bucket_name=bucket_name, + file_uri=file_uri, + account_id=account_id, + access_key_id=access_key_id, + secret_access_key=secret_access_key, + processing_type=processing_type, + jurisdiction=jurisdiction, + custom_metadata=custom_metadata, + request_options=request_options, + ) + return _response.data + + def index_r2directory_v2( + self, + collection_name: str, + *, + bucket_name: str, + directory_path: str, + account_id: str, + access_key_id: str, + secret_access_key: str, + processing_type: IndexR2DirectoryRequestV2ProcessingType, + jurisdiction: typing.Optional[IndexR2DirectoryRequestV2Jurisdiction] = OMIT, + max_files: typing.Optional[int] = OMIT, + skip_existing: typing.Optional[bool] = OMIT, + custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> IndexJobResponseV2: + """ + Index all files from a specific directory (prefix) in a Cloudflare R2 bucket into a collection. Uses prefix-based filtering to index only objects within the specified path. Returns a job_id for tracking progress via GET /v2/jobs/{job_id}. + + Parameters + ---------- + collection_name : str + + bucket_name : str + Name of the R2 bucket + + directory_path : str + Path to the directory (prefix) within the bucket. Accepts either a relative path (e.g., 'reports/2024/january') or a full R2 URI (e.g., 'r2://my-bucket/reports/2024/january'). All objects within this prefix will be indexed. + + account_id : str + Cloudflare account ID (found in your R2 dashboard URL) + + access_key_id : str + R2 S3 API token Access Key ID + + secret_access_key : str + R2 S3 API token Secret Access Key + + processing_type : IndexR2DirectoryRequestV2ProcessingType + Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. + + jurisdiction : typing.Optional[IndexR2DirectoryRequestV2Jurisdiction] + R2 jurisdiction. 'default' for global, 'eu' for EU-only storage, 'fedramp' for FedRAMP-compliant storage. + + max_files : typing.Optional[int] + Maximum number of files to index (optional) + + skip_existing : typing.Optional[bool] + Skip files that are already indexed in the collection. When true, only new files will be indexed. Set to false to re-index all files. + + custom_metadata : typing.Optional[typing.Dict[str, typing.Any]] + Custom metadata to attach to all indexed chunks. Keys must be strings. Values: str, int, float, bool, or array of strings. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + IndexJobResponseV2 + Indexing Job Started + + Examples + -------- + from runcaptain import Captain + + client = Captain( + organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", + ) + client.indexing.index_r2directory_v2( + collection_name="my_documents", + bucket_name="my-r2-bucket", + directory_path="reports/2025/", + account_id="your_cloudflare_account_id", + access_key_id="your_r2_access_key_id", + secret_access_key="your_r2_secret_access_key", + processing_type="advanced", + ) + """ + _response = self._raw_client.index_r2directory_v2( + collection_name, + bucket_name=bucket_name, + directory_path=directory_path, + account_id=account_id, + access_key_id=access_key_id, + secret_access_key=secret_access_key, + processing_type=processing_type, + jurisdiction=jurisdiction, + max_files=max_files, + skip_existing=skip_existing, + custom_metadata=custom_metadata, + request_options=request_options, + ) + return _response.data + + def index_url_v2( + self, + collection_name: str, + *, + processing_type: IndexUrlRequestV2ProcessingType, + url: typing.Optional[str] = OMIT, + urls: typing.Optional[typing.Sequence[str]] = OMIT, + custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> IndexJobResponseV2: + """ + Index documents from public URLs into a collection. No cloud storage credentials required. + + You can provide either: + - `url` — a single URL string for one document + - `urls` — an array of URL strings for multiple documents + + Supported file types include PDF, TXT, DOCX, CSV, XLSX, and more. Documents are downloaded and processed through the same pipeline as cloud storage indexing. + + Returns a job_id for tracking progress via GET /v2/jobs/{job_id}. + + Parameters + ---------- + collection_name : str + + processing_type : IndexUrlRequestV2ProcessingType + Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. + + url : typing.Optional[str] + A single public URL to a hosted document (PDF, TXT, DOCX, etc.). Provide either 'url' or 'urls', not both. + + urls : typing.Optional[typing.Sequence[str]] + An array of public URLs to hosted documents. Provide either 'url' or 'urls', not both. + + custom_metadata : typing.Optional[typing.Dict[str, typing.Any]] + Custom metadata to attach to all indexed chunks. Keys must be strings. Values: str, int, float, bool, or array of strings. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + IndexJobResponseV2 + Indexing job started + + Examples + -------- + from runcaptain import Captain + + client = Captain( + organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", + ) + client.indexing.index_url_v2( + collection_name="my_documents", + url="https://example.com/documents/report.pdf", + processing_type="advanced", + ) + """ + _response = self._raw_client.index_url_v2( + collection_name, + processing_type=processing_type, + url=url, + urls=urls, + custom_metadata=custom_metadata, + request_options=request_options, + ) + return _response.data + class AsyncIndexingClient: def __init__(self, *, client_wrapper: AsyncClientWrapper): @@ -801,7 +1113,6 @@ async def index_s3bucket_v2( aws_access_key_id: str, aws_secret_access_key: str, processing_type: IndexS3RequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, bucket_region: typing.Optional[str] = OMIT, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, @@ -814,7 +1125,6 @@ async def index_s3bucket_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the S3 bucket @@ -828,9 +1138,6 @@ async def index_s3bucket_v2( processing_type : IndexS3RequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - bucket_region : typing.Optional[str] AWS region where the bucket is located @@ -858,19 +1165,20 @@ async def index_s3bucket_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.indexing.index_s3bucket_v2( collection_name="my_documents", - bucket_name="my-s3-bucket", + bucket_name="my-documents-bucket", aws_access_key_id="AKIAIOSFODNN7EXAMPLE", - aws_secret_access_key="your_secret_key", + aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", bucket_region="us-east-1", processing_type="advanced", + skip_existing=True, ) @@ -882,7 +1190,6 @@ async def main() -> None: aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, processing_type=processing_type, - idempotency_key=idempotency_key, bucket_region=bucket_region, max_files=max_files, skip_existing=skip_existing, @@ -910,7 +1217,6 @@ async def index_s3file_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the S3 bucket @@ -948,18 +1254,18 @@ async def index_s3file_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.indexing.index_s3file_v2( collection_name="my_documents", - bucket_name="my-s3-bucket", - file_uri="s3://my-s3-bucket/contracts/acme_contract.pdf", + bucket_name="my-documents-bucket", + file_uri="s3://my-documents-bucket/reports/quarterly-report-q4.pdf", aws_access_key_id="AKIAIOSFODNN7EXAMPLE", - aws_secret_access_key="your_secret_key", + aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", bucket_region="us-east-1", processing_type="advanced", ) @@ -998,7 +1304,6 @@ async def index_gcs_bucket_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the GCS bucket @@ -1033,16 +1338,16 @@ async def index_gcs_bucket_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.indexing.index_gcs_bucket_v2( collection_name="my_documents", - bucket_name="my-gcs-bucket", - service_account_json='{"type":"service_account","project_id":"my-project",...}', + bucket_name="my-gcs-documents", + service_account_json='{"type": "service_account", "project_id": "my-project", ...}', processing_type="advanced", ) @@ -1078,7 +1383,6 @@ async def index_gcs_file_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the GCS bucket @@ -1110,17 +1414,17 @@ async def index_gcs_file_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.indexing.index_gcs_file_v2( - collection_name="collection_name", - bucket_name="my-company-docs", - file_uri="gs://my-company-docs/contracts/acme_contract.pdf", - service_account_json='{"type":"service_account","project_id":"my-project",...}', + collection_name="my_documents", + bucket_name="my-gcs-documents", + file_uri="gs://my-gcs-documents/reports/annual-review.pdf", + service_account_json='{"type": "service_account", "project_id": "my-project", ...}', processing_type="advanced", ) @@ -1147,7 +1451,6 @@ async def index_s3directory_v2( aws_access_key_id: str, aws_secret_access_key: str, processing_type: IndexS3DirectoryRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, bucket_region: typing.Optional[str] = OMIT, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, @@ -1160,7 +1463,6 @@ async def index_s3directory_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the S3 bucket @@ -1177,9 +1479,6 @@ async def index_s3directory_v2( processing_type : IndexS3DirectoryRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - bucket_region : typing.Optional[str] AWS region where the bucket is located @@ -1207,18 +1506,18 @@ async def index_s3directory_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.indexing.index_s3directory_v2( collection_name="my_documents", - bucket_name="my-s3-bucket", - directory_path="reports/2024/january", + bucket_name="my-documents-bucket", + directory_path="reports/2025/", aws_access_key_id="AKIAIOSFODNN7EXAMPLE", - aws_secret_access_key="your_secret_key", + aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", bucket_region="us-east-1", processing_type="advanced", ) @@ -1233,7 +1532,6 @@ async def main() -> None: aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, processing_type=processing_type, - idempotency_key=idempotency_key, bucket_region=bucket_region, max_files=max_files, skip_existing=skip_existing, @@ -1250,7 +1548,6 @@ async def index_gcs_directory_v2( directory_path: str, service_account_json: str, processing_type: IndexGcsDirectoryRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, @@ -1262,7 +1559,6 @@ async def index_gcs_directory_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the GCS bucket @@ -1276,9 +1572,6 @@ async def index_gcs_directory_v2( processing_type : IndexGcsDirectoryRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - max_files : typing.Optional[int] Maximum number of files to index (optional) @@ -1303,17 +1596,17 @@ async def index_gcs_directory_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.indexing.index_gcs_directory_v2( collection_name="my_documents", - bucket_name="my-gcs-bucket", - directory_path="reports/2024/january", - service_account_json='{"type":"service_account","project_id":"my-project",...}', + bucket_name="my-gcs-documents", + directory_path="reports/2025/", + service_account_json='{"type": "service_account", "project_id": "my-project", ...}', processing_type="advanced", ) @@ -1326,7 +1619,6 @@ async def main() -> None: directory_path=directory_path, service_account_json=service_account_json, processing_type=processing_type, - idempotency_key=idempotency_key, max_files=max_files, skip_existing=skip_existing, custom_metadata=custom_metadata, @@ -1342,7 +1634,6 @@ async def index_azure_container_v2( account_name: str, account_key: str, processing_type: IndexAzureRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, @@ -1354,7 +1645,6 @@ async def index_azure_container_v2( Parameters ---------- collection_name : str - Name of the collection to index into container_name : str Name of the Azure Blob Storage container @@ -1368,9 +1658,6 @@ async def index_azure_container_v2( processing_type : IndexAzureRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - max_files : typing.Optional[int] Maximum number of files to index (optional) @@ -1395,17 +1682,17 @@ async def index_azure_container_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.indexing.index_azure_container_v2( collection_name="my_documents", - container_name="my-container", + container_name="my-azure-documents", account_name="mystorageaccount", - account_key="your_account_key", + account_key="base64encodedaccountkey==", processing_type="advanced", ) @@ -1418,7 +1705,6 @@ async def main() -> None: account_name=account_name, account_key=account_key, processing_type=processing_type, - idempotency_key=idempotency_key, max_files=max_files, skip_existing=skip_existing, custom_metadata=custom_metadata, @@ -1444,7 +1730,6 @@ async def index_azure_file_v2( Parameters ---------- collection_name : str - Name of the collection to index into container_name : str Name of the Azure Blob Storage container @@ -1479,18 +1764,18 @@ async def index_azure_file_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.indexing.index_azure_file_v2( collection_name="my_documents", - container_name="my-container", - file_uri="https://mystorageaccount.blob.core.windows.net/my-container/contracts/acme_contract.pdf", + container_name="my-azure-documents", + file_uri="https://mystorageaccount.blob.core.windows.net/my-azure-documents/reports/annual-review.pdf", account_name="mystorageaccount", - account_key="your_account_key", + account_key="base64encodedaccountkey==", processing_type="advanced", ) @@ -1518,7 +1803,6 @@ async def index_azure_directory_v2( account_name: str, account_key: str, processing_type: IndexAzureDirectoryRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, @@ -1530,7 +1814,6 @@ async def index_azure_directory_v2( Parameters ---------- collection_name : str - Name of the collection to index into container_name : str Name of the Azure Blob Storage container @@ -1547,9 +1830,6 @@ async def index_azure_directory_v2( processing_type : IndexAzureDirectoryRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - max_files : typing.Optional[int] Maximum number of files to index (optional) @@ -1574,18 +1854,18 @@ async def index_azure_directory_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.indexing.index_azure_directory_v2( collection_name="my_documents", - container_name="my-container", - directory_path="reports/2024/january", + container_name="my-azure-documents", + directory_path="reports/2025/", account_name="mystorageaccount", - account_key="your_account_key", + account_key="base64encodedaccountkey==", processing_type="advanced", ) @@ -1599,10 +1879,379 @@ async def main() -> None: account_name=account_name, account_key=account_key, processing_type=processing_type, - idempotency_key=idempotency_key, max_files=max_files, skip_existing=skip_existing, custom_metadata=custom_metadata, request_options=request_options, ) return _response.data + + async def index_r2bucket_v2( + self, + collection_name: str, + *, + bucket_name: str, + account_id: str, + access_key_id: str, + secret_access_key: str, + processing_type: IndexR2RequestV2ProcessingType, + jurisdiction: typing.Optional[IndexR2RequestV2Jurisdiction] = OMIT, + max_files: typing.Optional[int] = OMIT, + skip_existing: typing.Optional[bool] = OMIT, + custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> IndexJobResponseV2: + """ + Index all files from a Cloudflare R2 bucket into a collection. R2 is S3-compatible — provide your R2 API token's Access Key ID and Secret Access Key. Returns a job_id for tracking progress via GET /v2/jobs/{job_id}. + + Parameters + ---------- + collection_name : str + + bucket_name : str + Name of the R2 bucket + + account_id : str + Cloudflare account ID (found in your R2 dashboard URL) + + access_key_id : str + R2 S3 API token Access Key ID + + secret_access_key : str + R2 S3 API token Secret Access Key + + processing_type : IndexR2RequestV2ProcessingType + Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. + + jurisdiction : typing.Optional[IndexR2RequestV2Jurisdiction] + R2 jurisdiction. 'default' for global, 'eu' for EU-only storage, 'fedramp' for FedRAMP-compliant storage. + + max_files : typing.Optional[int] + Maximum number of files to index (optional) + + skip_existing : typing.Optional[bool] + Skip files that are already indexed in the collection. When true, only new files will be indexed. Set to false to re-index all files. + + custom_metadata : typing.Optional[typing.Dict[str, typing.Any]] + Custom metadata to attach to all indexed chunks. Keys must be strings. Values: str, int, float, bool, or array of strings. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + IndexJobResponseV2 + Indexing Job Started + + Examples + -------- + import asyncio + + from runcaptain import AsyncCaptain + + client = AsyncCaptain( + organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", + ) + + + async def main() -> None: + await client.indexing.index_r2bucket_v2( + collection_name="my_documents", + bucket_name="my-r2-bucket", + account_id="your_cloudflare_account_id", + access_key_id="your_r2_access_key_id", + secret_access_key="your_r2_secret_access_key", + processing_type="advanced", + ) + + + asyncio.run(main()) + """ + _response = await self._raw_client.index_r2bucket_v2( + collection_name, + bucket_name=bucket_name, + account_id=account_id, + access_key_id=access_key_id, + secret_access_key=secret_access_key, + processing_type=processing_type, + jurisdiction=jurisdiction, + max_files=max_files, + skip_existing=skip_existing, + custom_metadata=custom_metadata, + request_options=request_options, + ) + return _response.data + + async def index_r2file_v2( + self, + collection_name: str, + *, + bucket_name: str, + file_uri: str, + account_id: str, + access_key_id: str, + secret_access_key: str, + processing_type: IndexR2FileRequestV2ProcessingType, + jurisdiction: typing.Optional[IndexR2FileRequestV2Jurisdiction] = OMIT, + custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> IndexJobResponseV2: + """ + Index a single file from a Cloudflare R2 bucket into a collection. Returns a job_id for tracking progress. + + Parameters + ---------- + collection_name : str + + bucket_name : str + Name of the R2 bucket + + file_uri : str + R2 URI format: r2://bucket-name/path/to/file.pdf + + account_id : str + Cloudflare account ID (found in your R2 dashboard URL) + + access_key_id : str + R2 S3 API token Access Key ID + + secret_access_key : str + R2 S3 API token Secret Access Key + + processing_type : IndexR2FileRequestV2ProcessingType + Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. + + jurisdiction : typing.Optional[IndexR2FileRequestV2Jurisdiction] + R2 jurisdiction. 'default' for global, 'eu' for EU-only storage, 'fedramp' for FedRAMP-compliant storage. + + custom_metadata : typing.Optional[typing.Dict[str, typing.Any]] + Custom metadata to attach to all chunks from this file. Keys must be strings. Values: str, int, float, bool, or array of strings. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + IndexJobResponseV2 + Indexing Job Started + + Examples + -------- + import asyncio + + from runcaptain import AsyncCaptain + + client = AsyncCaptain( + organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", + ) + + + async def main() -> None: + await client.indexing.index_r2file_v2( + collection_name="my_documents", + bucket_name="my-r2-bucket", + file_uri="r2://my-r2-bucket/reports/annual-review.pdf", + account_id="your_cloudflare_account_id", + access_key_id="your_r2_access_key_id", + secret_access_key="your_r2_secret_access_key", + processing_type="advanced", + ) + + + asyncio.run(main()) + """ + _response = await self._raw_client.index_r2file_v2( + collection_name, + bucket_name=bucket_name, + file_uri=file_uri, + account_id=account_id, + access_key_id=access_key_id, + secret_access_key=secret_access_key, + processing_type=processing_type, + jurisdiction=jurisdiction, + custom_metadata=custom_metadata, + request_options=request_options, + ) + return _response.data + + async def index_r2directory_v2( + self, + collection_name: str, + *, + bucket_name: str, + directory_path: str, + account_id: str, + access_key_id: str, + secret_access_key: str, + processing_type: IndexR2DirectoryRequestV2ProcessingType, + jurisdiction: typing.Optional[IndexR2DirectoryRequestV2Jurisdiction] = OMIT, + max_files: typing.Optional[int] = OMIT, + skip_existing: typing.Optional[bool] = OMIT, + custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> IndexJobResponseV2: + """ + Index all files from a specific directory (prefix) in a Cloudflare R2 bucket into a collection. Uses prefix-based filtering to index only objects within the specified path. Returns a job_id for tracking progress via GET /v2/jobs/{job_id}. + + Parameters + ---------- + collection_name : str + + bucket_name : str + Name of the R2 bucket + + directory_path : str + Path to the directory (prefix) within the bucket. Accepts either a relative path (e.g., 'reports/2024/january') or a full R2 URI (e.g., 'r2://my-bucket/reports/2024/january'). All objects within this prefix will be indexed. + + account_id : str + Cloudflare account ID (found in your R2 dashboard URL) + + access_key_id : str + R2 S3 API token Access Key ID + + secret_access_key : str + R2 S3 API token Secret Access Key + + processing_type : IndexR2DirectoryRequestV2ProcessingType + Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. + + jurisdiction : typing.Optional[IndexR2DirectoryRequestV2Jurisdiction] + R2 jurisdiction. 'default' for global, 'eu' for EU-only storage, 'fedramp' for FedRAMP-compliant storage. + + max_files : typing.Optional[int] + Maximum number of files to index (optional) + + skip_existing : typing.Optional[bool] + Skip files that are already indexed in the collection. When true, only new files will be indexed. Set to false to re-index all files. + + custom_metadata : typing.Optional[typing.Dict[str, typing.Any]] + Custom metadata to attach to all indexed chunks. Keys must be strings. Values: str, int, float, bool, or array of strings. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + IndexJobResponseV2 + Indexing Job Started + + Examples + -------- + import asyncio + + from runcaptain import AsyncCaptain + + client = AsyncCaptain( + organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", + ) + + + async def main() -> None: + await client.indexing.index_r2directory_v2( + collection_name="my_documents", + bucket_name="my-r2-bucket", + directory_path="reports/2025/", + account_id="your_cloudflare_account_id", + access_key_id="your_r2_access_key_id", + secret_access_key="your_r2_secret_access_key", + processing_type="advanced", + ) + + + asyncio.run(main()) + """ + _response = await self._raw_client.index_r2directory_v2( + collection_name, + bucket_name=bucket_name, + directory_path=directory_path, + account_id=account_id, + access_key_id=access_key_id, + secret_access_key=secret_access_key, + processing_type=processing_type, + jurisdiction=jurisdiction, + max_files=max_files, + skip_existing=skip_existing, + custom_metadata=custom_metadata, + request_options=request_options, + ) + return _response.data + + async def index_url_v2( + self, + collection_name: str, + *, + processing_type: IndexUrlRequestV2ProcessingType, + url: typing.Optional[str] = OMIT, + urls: typing.Optional[typing.Sequence[str]] = OMIT, + custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> IndexJobResponseV2: + """ + Index documents from public URLs into a collection. No cloud storage credentials required. + + You can provide either: + - `url` — a single URL string for one document + - `urls` — an array of URL strings for multiple documents + + Supported file types include PDF, TXT, DOCX, CSV, XLSX, and more. Documents are downloaded and processed through the same pipeline as cloud storage indexing. + + Returns a job_id for tracking progress via GET /v2/jobs/{job_id}. + + Parameters + ---------- + collection_name : str + + processing_type : IndexUrlRequestV2ProcessingType + Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. + + url : typing.Optional[str] + A single public URL to a hosted document (PDF, TXT, DOCX, etc.). Provide either 'url' or 'urls', not both. + + urls : typing.Optional[typing.Sequence[str]] + An array of public URLs to hosted documents. Provide either 'url' or 'urls', not both. + + custom_metadata : typing.Optional[typing.Dict[str, typing.Any]] + Custom metadata to attach to all indexed chunks. Keys must be strings. Values: str, int, float, bool, or array of strings. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + IndexJobResponseV2 + Indexing job started + + Examples + -------- + import asyncio + + from runcaptain import AsyncCaptain + + client = AsyncCaptain( + organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", + ) + + + async def main() -> None: + await client.indexing.index_url_v2( + collection_name="my_documents", + url="https://example.com/documents/report.pdf", + processing_type="advanced", + ) + + + asyncio.run(main()) + """ + _response = await self._raw_client.index_url_v2( + collection_name, + processing_type=processing_type, + url=url, + urls=urls, + custom_metadata=custom_metadata, + request_options=request_options, + ) + return _response.data diff --git a/src/runcaptain/indexing/raw_client.py b/src/runcaptain/indexing/raw_client.py index bb8f09b..9af4bd5 100644 --- a/src/runcaptain/indexing/raw_client.py +++ b/src/runcaptain/indexing/raw_client.py @@ -16,9 +16,16 @@ from .types.index_gcs_directory_request_v2processing_type import IndexGcsDirectoryRequestV2ProcessingType from .types.index_gcs_file_request_v2processing_type import IndexGcsFileRequestV2ProcessingType from .types.index_gcs_request_v2processing_type import IndexGcsRequestV2ProcessingType +from .types.index_r2directory_request_v2jurisdiction import IndexR2DirectoryRequestV2Jurisdiction +from .types.index_r2directory_request_v2processing_type import IndexR2DirectoryRequestV2ProcessingType +from .types.index_r2file_request_v2jurisdiction import IndexR2FileRequestV2Jurisdiction +from .types.index_r2file_request_v2processing_type import IndexR2FileRequestV2ProcessingType +from .types.index_r2request_v2jurisdiction import IndexR2RequestV2Jurisdiction +from .types.index_r2request_v2processing_type import IndexR2RequestV2ProcessingType from .types.index_s3directory_request_v2processing_type import IndexS3DirectoryRequestV2ProcessingType from .types.index_s3file_request_v2processing_type import IndexS3FileRequestV2ProcessingType from .types.index_s3request_v2processing_type import IndexS3RequestV2ProcessingType +from .types.index_url_request_v2processing_type import IndexUrlRequestV2ProcessingType # this is used as the default value for optional parameters OMIT = typing.cast(typing.Any, ...) @@ -36,7 +43,6 @@ def index_s3bucket_v2( aws_access_key_id: str, aws_secret_access_key: str, processing_type: IndexS3RequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, bucket_region: typing.Optional[str] = OMIT, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, @@ -49,7 +55,6 @@ def index_s3bucket_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the S3 bucket @@ -63,9 +68,6 @@ def index_s3bucket_v2( processing_type : IndexS3RequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - bucket_region : typing.Optional[str] AWS region where the bucket is located @@ -101,7 +103,6 @@ def index_s3bucket_v2( }, headers={ "content-type": "application/json", - "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None, }, request_options=request_options, omit=OMIT, @@ -140,7 +141,6 @@ def index_s3file_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the S3 bucket @@ -222,7 +222,6 @@ def index_gcs_bucket_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the GCS bucket @@ -299,7 +298,6 @@ def index_gcs_file_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the GCS bucket @@ -364,7 +362,6 @@ def index_s3directory_v2( aws_access_key_id: str, aws_secret_access_key: str, processing_type: IndexS3DirectoryRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, bucket_region: typing.Optional[str] = OMIT, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, @@ -377,7 +374,6 @@ def index_s3directory_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the S3 bucket @@ -394,9 +390,6 @@ def index_s3directory_v2( processing_type : IndexS3DirectoryRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - bucket_region : typing.Optional[str] AWS region where the bucket is located @@ -433,7 +426,6 @@ def index_s3directory_v2( }, headers={ "content-type": "application/json", - "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None, }, request_options=request_options, omit=OMIT, @@ -461,7 +453,6 @@ def index_gcs_directory_v2( directory_path: str, service_account_json: str, processing_type: IndexGcsDirectoryRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, @@ -473,7 +464,6 @@ def index_gcs_directory_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the GCS bucket @@ -487,9 +477,6 @@ def index_gcs_directory_v2( processing_type : IndexGcsDirectoryRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - max_files : typing.Optional[int] Maximum number of files to index (optional) @@ -521,7 +508,6 @@ def index_gcs_directory_v2( }, headers={ "content-type": "application/json", - "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None, }, request_options=request_options, omit=OMIT, @@ -549,7 +535,6 @@ def index_azure_container_v2( account_name: str, account_key: str, processing_type: IndexAzureRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, @@ -561,7 +546,6 @@ def index_azure_container_v2( Parameters ---------- collection_name : str - Name of the collection to index into container_name : str Name of the Azure Blob Storage container @@ -575,9 +559,6 @@ def index_azure_container_v2( processing_type : IndexAzureRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - max_files : typing.Optional[int] Maximum number of files to index (optional) @@ -609,7 +590,6 @@ def index_azure_container_v2( }, headers={ "content-type": "application/json", - "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None, }, request_options=request_options, omit=OMIT, @@ -647,7 +627,6 @@ def index_azure_file_v2( Parameters ---------- collection_name : str - Name of the collection to index into container_name : str Name of the Azure Blob Storage container @@ -716,7 +695,6 @@ def index_azure_directory_v2( account_name: str, account_key: str, processing_type: IndexAzureDirectoryRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, @@ -728,7 +706,6 @@ def index_azure_directory_v2( Parameters ---------- collection_name : str - Name of the collection to index into container_name : str Name of the Azure Blob Storage container @@ -745,9 +722,6 @@ def index_azure_directory_v2( processing_type : IndexAzureDirectoryRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - max_files : typing.Optional[int] Maximum number of files to index (optional) @@ -780,7 +754,6 @@ def index_azure_directory_v2( }, headers={ "content-type": "application/json", - "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None, }, request_options=request_options, omit=OMIT, @@ -800,51 +773,45 @@ def index_azure_directory_v2( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) - -class AsyncRawIndexingClient: - def __init__(self, *, client_wrapper: AsyncClientWrapper): - self._client_wrapper = client_wrapper - - async def index_s3bucket_v2( + def index_r2bucket_v2( self, collection_name: str, *, bucket_name: str, - aws_access_key_id: str, - aws_secret_access_key: str, - processing_type: IndexS3RequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, - bucket_region: typing.Optional[str] = OMIT, + account_id: str, + access_key_id: str, + secret_access_key: str, + processing_type: IndexR2RequestV2ProcessingType, + jurisdiction: typing.Optional[IndexR2RequestV2Jurisdiction] = OMIT, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, request_options: typing.Optional[RequestOptions] = None, - ) -> AsyncHttpResponse[IndexJobResponseV2]: + ) -> HttpResponse[IndexJobResponseV2]: """ - Index all files from an S3 bucket into a collection. Returns a job_id for tracking progress via GET /v2/jobs/{job_id}. + Index all files from a Cloudflare R2 bucket into a collection. R2 is S3-compatible — provide your R2 API token's Access Key ID and Secret Access Key. Returns a job_id for tracking progress via GET /v2/jobs/{job_id}. Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str - Name of the S3 bucket + Name of the R2 bucket - aws_access_key_id : str - AWS access key ID with read access to the bucket + account_id : str + Cloudflare account ID (found in your R2 dashboard URL) - aws_secret_access_key : str - AWS secret access key + access_key_id : str + R2 S3 API token Access Key ID - processing_type : IndexS3RequestV2ProcessingType - Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. + secret_access_key : str + R2 S3 API token Secret Access Key - idempotency_key : typing.Optional[str] - UUID for request deduplication + processing_type : IndexR2RequestV2ProcessingType + Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - bucket_region : typing.Optional[str] - AWS region where the bucket is located + jurisdiction : typing.Optional[IndexR2RequestV2Jurisdiction] + R2 jurisdiction. 'default' for global, 'eu' for EU-only storage, 'fedramp' for FedRAMP-compliant storage. max_files : typing.Optional[int] Maximum number of files to index (optional) @@ -860,17 +827,18 @@ async def index_s3bucket_v2( Returns ------- - AsyncHttpResponse[IndexJobResponseV2] + HttpResponse[IndexJobResponseV2] Indexing Job Started """ - _response = await self._client_wrapper.httpx_client.request( - f"v2/collections/{jsonable_encoder(collection_name)}/index/s3", + _response = self._client_wrapper.httpx_client.request( + f"v2/collections/{jsonable_encoder(collection_name)}/index/r2", method="POST", json={ "bucket_name": bucket_name, - "aws_access_key_id": aws_access_key_id, - "aws_secret_access_key": aws_secret_access_key, - "bucket_region": bucket_region, + "account_id": account_id, + "access_key_id": access_key_id, + "secret_access_key": secret_access_key, + "jurisdiction": jurisdiction, "processing_type": processing_type, "max_files": max_files, "skip_existing": skip_existing, @@ -878,7 +846,6 @@ async def index_s3bucket_v2( }, headers={ "content-type": "application/json", - "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None, }, request_options=request_options, omit=OMIT, @@ -892,50 +859,53 @@ async def index_s3bucket_v2( object_=_response.json(), ), ) - return AsyncHttpResponse(response=_response, data=_data) + return HttpResponse(response=_response, data=_data) _response_json = _response.json() except JSONDecodeError: raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) - async def index_s3file_v2( + def index_r2file_v2( self, collection_name: str, *, bucket_name: str, file_uri: str, - aws_access_key_id: str, - aws_secret_access_key: str, - processing_type: IndexS3FileRequestV2ProcessingType, - bucket_region: typing.Optional[str] = OMIT, + account_id: str, + access_key_id: str, + secret_access_key: str, + processing_type: IndexR2FileRequestV2ProcessingType, + jurisdiction: typing.Optional[IndexR2FileRequestV2Jurisdiction] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, request_options: typing.Optional[RequestOptions] = None, - ) -> AsyncHttpResponse[IndexJobResponseV2]: + ) -> HttpResponse[IndexJobResponseV2]: """ - Index a single file from an S3 bucket into a collection. Returns a job_id for tracking progress. + Index a single file from a Cloudflare R2 bucket into a collection. Returns a job_id for tracking progress. Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str - Name of the S3 bucket + Name of the R2 bucket file_uri : str - S3 URI format: s3://bucket-name/path/to/file.pdf + R2 URI format: r2://bucket-name/path/to/file.pdf - aws_access_key_id : str - AWS access key ID with read access to the bucket + account_id : str + Cloudflare account ID (found in your R2 dashboard URL) - aws_secret_access_key : str - AWS secret access key + access_key_id : str + R2 S3 API token Access Key ID - processing_type : IndexS3FileRequestV2ProcessingType + secret_access_key : str + R2 S3 API token Secret Access Key + + processing_type : IndexR2FileRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - bucket_region : typing.Optional[str] - AWS region where the bucket is located + jurisdiction : typing.Optional[IndexR2FileRequestV2Jurisdiction] + R2 jurisdiction. 'default' for global, 'eu' for EU-only storage, 'fedramp' for FedRAMP-compliant storage. custom_metadata : typing.Optional[typing.Dict[str, typing.Any]] Custom metadata to attach to all chunks from this file. Keys must be strings. Values: str, int, float, bool, or array of strings. @@ -945,18 +915,19 @@ async def index_s3file_v2( Returns ------- - AsyncHttpResponse[IndexJobResponseV2] + HttpResponse[IndexJobResponseV2] Indexing Job Started """ - _response = await self._client_wrapper.httpx_client.request( - f"v2/collections/{jsonable_encoder(collection_name)}/index/s3/file", + _response = self._client_wrapper.httpx_client.request( + f"v2/collections/{jsonable_encoder(collection_name)}/index/r2/file", method="POST", json={ "bucket_name": bucket_name, "file_uri": file_uri, - "aws_access_key_id": aws_access_key_id, - "aws_secret_access_key": aws_secret_access_key, - "bucket_region": bucket_region, + "account_id": account_id, + "access_key_id": access_key_id, + "secret_access_key": secret_access_key, + "jurisdiction": jurisdiction, "processing_type": processing_type, "custom_metadata": custom_metadata, }, @@ -975,41 +946,56 @@ async def index_s3file_v2( object_=_response.json(), ), ) - return AsyncHttpResponse(response=_response, data=_data) + return HttpResponse(response=_response, data=_data) _response_json = _response.json() except JSONDecodeError: raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) - async def index_gcs_bucket_v2( + def index_r2directory_v2( self, collection_name: str, *, bucket_name: str, - service_account_json: str, - processing_type: IndexGcsRequestV2ProcessingType, + directory_path: str, + account_id: str, + access_key_id: str, + secret_access_key: str, + processing_type: IndexR2DirectoryRequestV2ProcessingType, + jurisdiction: typing.Optional[IndexR2DirectoryRequestV2Jurisdiction] = OMIT, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, request_options: typing.Optional[RequestOptions] = None, - ) -> AsyncHttpResponse[IndexJobResponseV2]: + ) -> HttpResponse[IndexJobResponseV2]: """ - Index all files from a Google Cloud Storage bucket into a collection. Returns a job_id for tracking progress. + Index all files from a specific directory (prefix) in a Cloudflare R2 bucket into a collection. Uses prefix-based filtering to index only objects within the specified path. Returns a job_id for tracking progress via GET /v2/jobs/{job_id}. Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str - Name of the GCS bucket + Name of the R2 bucket - service_account_json : str - GCP service account JSON key with read access to the bucket + directory_path : str + Path to the directory (prefix) within the bucket. Accepts either a relative path (e.g., 'reports/2024/january') or a full R2 URI (e.g., 'r2://my-bucket/reports/2024/january'). All objects within this prefix will be indexed. - processing_type : IndexGcsRequestV2ProcessingType + account_id : str + Cloudflare account ID (found in your R2 dashboard URL) + + access_key_id : str + R2 S3 API token Access Key ID + + secret_access_key : str + R2 S3 API token Secret Access Key + + processing_type : IndexR2DirectoryRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. + jurisdiction : typing.Optional[IndexR2DirectoryRequestV2Jurisdiction] + R2 jurisdiction. 'default' for global, 'eu' for EU-only storage, 'fedramp' for FedRAMP-compliant storage. + max_files : typing.Optional[int] Maximum number of files to index (optional) @@ -1024,15 +1010,19 @@ async def index_gcs_bucket_v2( Returns ------- - AsyncHttpResponse[IndexJobResponseV2] + HttpResponse[IndexJobResponseV2] Indexing Job Started """ - _response = await self._client_wrapper.httpx_client.request( - f"v2/collections/{jsonable_encoder(collection_name)}/index/gcs", + _response = self._client_wrapper.httpx_client.request( + f"v2/collections/{jsonable_encoder(collection_name)}/index/r2/directory", method="POST", json={ "bucket_name": bucket_name, - "service_account_json": service_account_json, + "directory_path": directory_path, + "account_id": account_id, + "access_key_id": access_key_id, + "secret_access_key": secret_access_key, + "jurisdiction": jurisdiction, "processing_type": processing_type, "max_files": max_files, "skip_existing": skip_existing, @@ -1053,61 +1043,63 @@ async def index_gcs_bucket_v2( object_=_response.json(), ), ) - return AsyncHttpResponse(response=_response, data=_data) + return HttpResponse(response=_response, data=_data) _response_json = _response.json() except JSONDecodeError: raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) - async def index_gcs_file_v2( + def index_url_v2( self, collection_name: str, *, - bucket_name: str, - file_uri: str, - service_account_json: str, - processing_type: IndexGcsFileRequestV2ProcessingType, + processing_type: IndexUrlRequestV2ProcessingType, + url: typing.Optional[str] = OMIT, + urls: typing.Optional[typing.Sequence[str]] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, request_options: typing.Optional[RequestOptions] = None, - ) -> AsyncHttpResponse[IndexJobResponseV2]: + ) -> HttpResponse[IndexJobResponseV2]: """ - Index a single file from a GCS bucket into a collection. Returns a job_id for tracking progress. + Index documents from public URLs into a collection. No cloud storage credentials required. + + You can provide either: + - `url` — a single URL string for one document + - `urls` — an array of URL strings for multiple documents + + Supported file types include PDF, TXT, DOCX, CSV, XLSX, and more. Documents are downloaded and processed through the same pipeline as cloud storage indexing. + + Returns a job_id for tracking progress via GET /v2/jobs/{job_id}. Parameters ---------- collection_name : str - Name of the collection to index into - - bucket_name : str - Name of the GCS bucket - file_uri : str - GCS URI format: gs://bucket-name/path/to/file.pdf + processing_type : IndexUrlRequestV2ProcessingType + Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - service_account_json : str - GCP service account JSON key with read access to the bucket + url : typing.Optional[str] + A single public URL to a hosted document (PDF, TXT, DOCX, etc.). Provide either 'url' or 'urls', not both. - processing_type : IndexGcsFileRequestV2ProcessingType - Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. + urls : typing.Optional[typing.Sequence[str]] + An array of public URLs to hosted documents. Provide either 'url' or 'urls', not both. custom_metadata : typing.Optional[typing.Dict[str, typing.Any]] - Custom metadata to attach to all chunks from this file. Keys must be strings. Values: str, int, float, bool, or array of strings. + Custom metadata to attach to all indexed chunks. Keys must be strings. Values: str, int, float, bool, or array of strings. request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- - AsyncHttpResponse[IndexJobResponseV2] - Indexing Job Started + HttpResponse[IndexJobResponseV2] + Indexing job started """ - _response = await self._client_wrapper.httpx_client.request( - f"v2/collections/{jsonable_encoder(collection_name)}/index/gcs/file", + _response = self._client_wrapper.httpx_client.request( + f"v2/collections/{jsonable_encoder(collection_name)}/index/url", method="POST", json={ - "bucket_name": bucket_name, - "file_uri": file_uri, - "service_account_json": service_account_json, + "url": url, + "urls": urls, "processing_type": processing_type, "custom_metadata": custom_metadata, }, @@ -1126,22 +1118,25 @@ async def index_gcs_file_v2( object_=_response.json(), ), ) - return AsyncHttpResponse(response=_response, data=_data) + return HttpResponse(response=_response, data=_data) _response_json = _response.json() except JSONDecodeError: raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) - async def index_s3directory_v2( + +class AsyncRawIndexingClient: + def __init__(self, *, client_wrapper: AsyncClientWrapper): + self._client_wrapper = client_wrapper + + async def index_s3bucket_v2( self, collection_name: str, *, bucket_name: str, - directory_path: str, aws_access_key_id: str, aws_secret_access_key: str, - processing_type: IndexS3DirectoryRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, + processing_type: IndexS3RequestV2ProcessingType, bucket_region: typing.Optional[str] = OMIT, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, @@ -1149,31 +1144,24 @@ async def index_s3directory_v2( request_options: typing.Optional[RequestOptions] = None, ) -> AsyncHttpResponse[IndexJobResponseV2]: """ - Index all files from a specific directory in an S3 bucket into a collection. Uses prefix-based filtering to index only files within the specified directory path. Returns a job_id for tracking progress via GET /v2/jobs/{job_id}. + Index all files from an S3 bucket into a collection. Returns a job_id for tracking progress via GET /v2/jobs/{job_id}. Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the S3 bucket - directory_path : str - Path to the directory within the bucket. Accepts either a relative path (e.g., 'reports/2024/january') or a full S3 URI (e.g., 's3://my-bucket/reports/2024/january'). All files within this directory and its subdirectories will be indexed. - aws_access_key_id : str AWS access key ID with read access to the bucket aws_secret_access_key : str AWS secret access key - processing_type : IndexS3DirectoryRequestV2ProcessingType + processing_type : IndexS3RequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - bucket_region : typing.Optional[str] AWS region where the bucket is located @@ -1195,11 +1183,10 @@ async def index_s3directory_v2( Indexing Job Started """ _response = await self._client_wrapper.httpx_client.request( - f"v2/collections/{jsonable_encoder(collection_name)}/index/s3/directory", + f"v2/collections/{jsonable_encoder(collection_name)}/index/s3", method="POST", json={ "bucket_name": bucket_name, - "directory_path": directory_path, "aws_access_key_id": aws_access_key_id, "aws_secret_access_key": aws_secret_access_key, "bucket_region": bucket_region, @@ -1210,7 +1197,6 @@ async def index_s3directory_v2( }, headers={ "content-type": "application/json", - "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None, }, request_options=request_options, omit=OMIT, @@ -1230,51 +1216,46 @@ async def index_s3directory_v2( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) - async def index_gcs_directory_v2( + async def index_s3file_v2( self, collection_name: str, *, bucket_name: str, - directory_path: str, - service_account_json: str, - processing_type: IndexGcsDirectoryRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, - max_files: typing.Optional[int] = OMIT, - skip_existing: typing.Optional[bool] = OMIT, + file_uri: str, + aws_access_key_id: str, + aws_secret_access_key: str, + processing_type: IndexS3FileRequestV2ProcessingType, + bucket_region: typing.Optional[str] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> AsyncHttpResponse[IndexJobResponseV2]: """ - Index all files from a specific directory in a GCS bucket into a collection. Uses prefix-based filtering to index only files within the specified directory path. Returns a job_id for tracking progress via GET /v2/jobs/{job_id}. + Index a single file from an S3 bucket into a collection. Returns a job_id for tracking progress. Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str - Name of the GCS bucket - - directory_path : str - Path to the directory within the bucket. Accepts either a relative path (e.g., 'reports/2024/january') or a full GCS URI (e.g., 'gs://my-bucket/reports/2024/january'). All files within this directory and its subdirectories will be indexed. + Name of the S3 bucket - service_account_json : str - GCP service account JSON key with read access to the bucket + file_uri : str + S3 URI format: s3://bucket-name/path/to/file.pdf - processing_type : IndexGcsDirectoryRequestV2ProcessingType - Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. + aws_access_key_id : str + AWS access key ID with read access to the bucket - idempotency_key : typing.Optional[str] - UUID for request deduplication + aws_secret_access_key : str + AWS secret access key - max_files : typing.Optional[int] - Maximum number of files to index (optional) + processing_type : IndexS3FileRequestV2ProcessingType + Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - skip_existing : typing.Optional[bool] - Skip files that are already indexed in the collection. When true, only new files will be indexed. Set to false to re-index all files. + bucket_region : typing.Optional[str] + AWS region where the bucket is located custom_metadata : typing.Optional[typing.Dict[str, typing.Any]] - Custom metadata to attach to all indexed chunks. Keys must be strings. Values: str, int, float, bool, or array of strings. + Custom metadata to attach to all chunks from this file. Keys must be strings. Values: str, int, float, bool, or array of strings. request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -1285,20 +1266,19 @@ async def index_gcs_directory_v2( Indexing Job Started """ _response = await self._client_wrapper.httpx_client.request( - f"v2/collections/{jsonable_encoder(collection_name)}/index/gcs/directory", + f"v2/collections/{jsonable_encoder(collection_name)}/index/s3/file", method="POST", json={ "bucket_name": bucket_name, - "directory_path": directory_path, - "service_account_json": service_account_json, + "file_uri": file_uri, + "aws_access_key_id": aws_access_key_id, + "aws_secret_access_key": aws_secret_access_key, + "bucket_region": bucket_region, "processing_type": processing_type, - "max_files": max_files, - "skip_existing": skip_existing, "custom_metadata": custom_metadata, }, headers={ "content-type": "application/json", - "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None, }, request_options=request_options, omit=OMIT, @@ -1318,43 +1298,34 @@ async def index_gcs_directory_v2( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) - async def index_azure_container_v2( + async def index_gcs_bucket_v2( self, collection_name: str, *, - container_name: str, - account_name: str, - account_key: str, - processing_type: IndexAzureRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, + bucket_name: str, + service_account_json: str, + processing_type: IndexGcsRequestV2ProcessingType, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> AsyncHttpResponse[IndexJobResponseV2]: """ - Index all files from an Azure Blob Storage container into a collection. Returns a job_id for tracking progress via GET /v2/jobs/{job_id}. + Index all files from a Google Cloud Storage bucket into a collection. Returns a job_id for tracking progress. Parameters ---------- collection_name : str - Name of the collection to index into - - container_name : str - Name of the Azure Blob Storage container - account_name : str - Azure Storage account name + bucket_name : str + Name of the GCS bucket - account_key : str - Azure Storage account key (base64-encoded) + service_account_json : str + GCP service account JSON key with read access to the bucket - processing_type : IndexAzureRequestV2ProcessingType + processing_type : IndexGcsRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - max_files : typing.Optional[int] Maximum number of files to index (optional) @@ -1373,12 +1344,11 @@ async def index_azure_container_v2( Indexing Job Started """ _response = await self._client_wrapper.httpx_client.request( - f"v2/collections/{jsonable_encoder(collection_name)}/index/azure", + f"v2/collections/{jsonable_encoder(collection_name)}/index/gcs", method="POST", json={ - "container_name": container_name, - "account_name": account_name, - "account_key": account_key, + "bucket_name": bucket_name, + "service_account_json": service_account_json, "processing_type": processing_type, "max_files": max_files, "skip_existing": skip_existing, @@ -1386,7 +1356,6 @@ async def index_azure_container_v2( }, headers={ "content-type": "application/json", - "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None, }, request_options=request_options, omit=OMIT, @@ -1406,39 +1375,34 @@ async def index_azure_container_v2( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) - async def index_azure_file_v2( + async def index_gcs_file_v2( self, collection_name: str, *, - container_name: str, + bucket_name: str, file_uri: str, - account_name: str, - account_key: str, - processing_type: IndexAzureFileRequestV2ProcessingType, + service_account_json: str, + processing_type: IndexGcsFileRequestV2ProcessingType, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> AsyncHttpResponse[IndexJobResponseV2]: """ - Index a single file from an Azure Blob Storage container into a collection. Returns a job_id for tracking progress. + Index a single file from a GCS bucket into a collection. Returns a job_id for tracking progress. Parameters ---------- collection_name : str - Name of the collection to index into - container_name : str - Name of the Azure Blob Storage container + bucket_name : str + Name of the GCS bucket file_uri : str - Azure Blob Storage URI format: https://{account}.blob.core.windows.net/{container}/path/to/file.pdf - - account_name : str - Azure Storage account name + GCS URI format: gs://bucket-name/path/to/file.pdf - account_key : str - Azure Storage account key (base64-encoded) + service_account_json : str + GCP service account JSON key with read access to the bucket - processing_type : IndexAzureFileRequestV2ProcessingType + processing_type : IndexGcsFileRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. custom_metadata : typing.Optional[typing.Dict[str, typing.Any]] @@ -1453,13 +1417,12 @@ async def index_azure_file_v2( Indexing Job Started """ _response = await self._client_wrapper.httpx_client.request( - f"v2/collections/{jsonable_encoder(collection_name)}/index/azure/file", + f"v2/collections/{jsonable_encoder(collection_name)}/index/gcs/file", method="POST", json={ - "container_name": container_name, + "bucket_name": bucket_name, "file_uri": file_uri, - "account_name": account_name, - "account_key": account_key, + "service_account_json": service_account_json, "processing_type": processing_type, "custom_metadata": custom_metadata, }, @@ -1484,47 +1447,212 @@ async def index_azure_file_v2( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) - async def index_azure_directory_v2( + async def index_s3directory_v2( self, collection_name: str, *, - container_name: str, + bucket_name: str, + directory_path: str, + aws_access_key_id: str, + aws_secret_access_key: str, + processing_type: IndexS3DirectoryRequestV2ProcessingType, + bucket_region: typing.Optional[str] = OMIT, + max_files: typing.Optional[int] = OMIT, + skip_existing: typing.Optional[bool] = OMIT, + custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> AsyncHttpResponse[IndexJobResponseV2]: + """ + Index all files from a specific directory in an S3 bucket into a collection. Uses prefix-based filtering to index only files within the specified directory path. Returns a job_id for tracking progress via GET /v2/jobs/{job_id}. + + Parameters + ---------- + collection_name : str + + bucket_name : str + Name of the S3 bucket + + directory_path : str + Path to the directory within the bucket. Accepts either a relative path (e.g., 'reports/2024/january') or a full S3 URI (e.g., 's3://my-bucket/reports/2024/january'). All files within this directory and its subdirectories will be indexed. + + aws_access_key_id : str + AWS access key ID with read access to the bucket + + aws_secret_access_key : str + AWS secret access key + + processing_type : IndexS3DirectoryRequestV2ProcessingType + Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. + + bucket_region : typing.Optional[str] + AWS region where the bucket is located + + max_files : typing.Optional[int] + Maximum number of files to index (optional) + + skip_existing : typing.Optional[bool] + Skip files that are already indexed in the collection. When true, only new files will be indexed. Set to false to re-index all files. + + custom_metadata : typing.Optional[typing.Dict[str, typing.Any]] + Custom metadata to attach to all indexed chunks. Keys must be strings. Values: str, int, float, bool, or array of strings. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + AsyncHttpResponse[IndexJobResponseV2] + Indexing Job Started + """ + _response = await self._client_wrapper.httpx_client.request( + f"v2/collections/{jsonable_encoder(collection_name)}/index/s3/directory", + method="POST", + json={ + "bucket_name": bucket_name, + "directory_path": directory_path, + "aws_access_key_id": aws_access_key_id, + "aws_secret_access_key": aws_secret_access_key, + "bucket_region": bucket_region, + "processing_type": processing_type, + "max_files": max_files, + "skip_existing": skip_existing, + "custom_metadata": custom_metadata, + }, + headers={ + "content-type": "application/json", + }, + request_options=request_options, + omit=OMIT, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + IndexJobResponseV2, + parse_obj_as( + type_=IndexJobResponseV2, # type: ignore + object_=_response.json(), + ), + ) + return AsyncHttpResponse(response=_response, data=_data) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) + + async def index_gcs_directory_v2( + self, + collection_name: str, + *, + bucket_name: str, directory_path: str, + service_account_json: str, + processing_type: IndexGcsDirectoryRequestV2ProcessingType, + max_files: typing.Optional[int] = OMIT, + skip_existing: typing.Optional[bool] = OMIT, + custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> AsyncHttpResponse[IndexJobResponseV2]: + """ + Index all files from a specific directory in a GCS bucket into a collection. Uses prefix-based filtering to index only files within the specified directory path. Returns a job_id for tracking progress via GET /v2/jobs/{job_id}. + + Parameters + ---------- + collection_name : str + + bucket_name : str + Name of the GCS bucket + + directory_path : str + Path to the directory within the bucket. Accepts either a relative path (e.g., 'reports/2024/january') or a full GCS URI (e.g., 'gs://my-bucket/reports/2024/january'). All files within this directory and its subdirectories will be indexed. + + service_account_json : str + GCP service account JSON key with read access to the bucket + + processing_type : IndexGcsDirectoryRequestV2ProcessingType + Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. + + max_files : typing.Optional[int] + Maximum number of files to index (optional) + + skip_existing : typing.Optional[bool] + Skip files that are already indexed in the collection. When true, only new files will be indexed. Set to false to re-index all files. + + custom_metadata : typing.Optional[typing.Dict[str, typing.Any]] + Custom metadata to attach to all indexed chunks. Keys must be strings. Values: str, int, float, bool, or array of strings. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + AsyncHttpResponse[IndexJobResponseV2] + Indexing Job Started + """ + _response = await self._client_wrapper.httpx_client.request( + f"v2/collections/{jsonable_encoder(collection_name)}/index/gcs/directory", + method="POST", + json={ + "bucket_name": bucket_name, + "directory_path": directory_path, + "service_account_json": service_account_json, + "processing_type": processing_type, + "max_files": max_files, + "skip_existing": skip_existing, + "custom_metadata": custom_metadata, + }, + headers={ + "content-type": "application/json", + }, + request_options=request_options, + omit=OMIT, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + IndexJobResponseV2, + parse_obj_as( + type_=IndexJobResponseV2, # type: ignore + object_=_response.json(), + ), + ) + return AsyncHttpResponse(response=_response, data=_data) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) + + async def index_azure_container_v2( + self, + collection_name: str, + *, + container_name: str, account_name: str, account_key: str, - processing_type: IndexAzureDirectoryRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, + processing_type: IndexAzureRequestV2ProcessingType, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> AsyncHttpResponse[IndexJobResponseV2]: """ - Index all files from a specific directory (prefix) in an Azure Blob Storage container into a collection. Uses prefix-based filtering to index only blobs within the specified path. Returns a job_id for tracking progress via GET /v2/jobs/{job_id}. + Index all files from an Azure Blob Storage container into a collection. Returns a job_id for tracking progress via GET /v2/jobs/{job_id}. Parameters ---------- collection_name : str - Name of the collection to index into container_name : str Name of the Azure Blob Storage container - directory_path : str - Path to the directory (prefix) within the container. Accepts either a relative path (e.g., 'reports/2024/january') or a full Azure Blob URI (e.g., 'https://account.blob.core.windows.net/container/reports/2024/january'). All blobs within this prefix will be indexed. - account_name : str Azure Storage account name account_key : str Azure Storage account key (base64-encoded) - processing_type : IndexAzureDirectoryRequestV2ProcessingType + processing_type : IndexAzureRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - max_files : typing.Optional[int] Maximum number of files to index (optional) @@ -1543,11 +1671,10 @@ async def index_azure_directory_v2( Indexing Job Started """ _response = await self._client_wrapper.httpx_client.request( - f"v2/collections/{jsonable_encoder(collection_name)}/index/azure/directory", + f"v2/collections/{jsonable_encoder(collection_name)}/index/azure", method="POST", json={ "container_name": container_name, - "directory_path": directory_path, "account_name": account_name, "account_key": account_key, "processing_type": processing_type, @@ -1557,7 +1684,521 @@ async def index_azure_directory_v2( }, headers={ "content-type": "application/json", - "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None, + }, + request_options=request_options, + omit=OMIT, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + IndexJobResponseV2, + parse_obj_as( + type_=IndexJobResponseV2, # type: ignore + object_=_response.json(), + ), + ) + return AsyncHttpResponse(response=_response, data=_data) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) + + async def index_azure_file_v2( + self, + collection_name: str, + *, + container_name: str, + file_uri: str, + account_name: str, + account_key: str, + processing_type: IndexAzureFileRequestV2ProcessingType, + custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> AsyncHttpResponse[IndexJobResponseV2]: + """ + Index a single file from an Azure Blob Storage container into a collection. Returns a job_id for tracking progress. + + Parameters + ---------- + collection_name : str + + container_name : str + Name of the Azure Blob Storage container + + file_uri : str + Azure Blob Storage URI format: https://{account}.blob.core.windows.net/{container}/path/to/file.pdf + + account_name : str + Azure Storage account name + + account_key : str + Azure Storage account key (base64-encoded) + + processing_type : IndexAzureFileRequestV2ProcessingType + Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. + + custom_metadata : typing.Optional[typing.Dict[str, typing.Any]] + Custom metadata to attach to all chunks from this file. Keys must be strings. Values: str, int, float, bool, or array of strings. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + AsyncHttpResponse[IndexJobResponseV2] + Indexing Job Started + """ + _response = await self._client_wrapper.httpx_client.request( + f"v2/collections/{jsonable_encoder(collection_name)}/index/azure/file", + method="POST", + json={ + "container_name": container_name, + "file_uri": file_uri, + "account_name": account_name, + "account_key": account_key, + "processing_type": processing_type, + "custom_metadata": custom_metadata, + }, + headers={ + "content-type": "application/json", + }, + request_options=request_options, + omit=OMIT, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + IndexJobResponseV2, + parse_obj_as( + type_=IndexJobResponseV2, # type: ignore + object_=_response.json(), + ), + ) + return AsyncHttpResponse(response=_response, data=_data) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) + + async def index_azure_directory_v2( + self, + collection_name: str, + *, + container_name: str, + directory_path: str, + account_name: str, + account_key: str, + processing_type: IndexAzureDirectoryRequestV2ProcessingType, + max_files: typing.Optional[int] = OMIT, + skip_existing: typing.Optional[bool] = OMIT, + custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> AsyncHttpResponse[IndexJobResponseV2]: + """ + Index all files from a specific directory (prefix) in an Azure Blob Storage container into a collection. Uses prefix-based filtering to index only blobs within the specified path. Returns a job_id for tracking progress via GET /v2/jobs/{job_id}. + + Parameters + ---------- + collection_name : str + + container_name : str + Name of the Azure Blob Storage container + + directory_path : str + Path to the directory (prefix) within the container. Accepts either a relative path (e.g., 'reports/2024/january') or a full Azure Blob URI (e.g., 'https://account.blob.core.windows.net/container/reports/2024/january'). All blobs within this prefix will be indexed. + + account_name : str + Azure Storage account name + + account_key : str + Azure Storage account key (base64-encoded) + + processing_type : IndexAzureDirectoryRequestV2ProcessingType + Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. + + max_files : typing.Optional[int] + Maximum number of files to index (optional) + + skip_existing : typing.Optional[bool] + Skip files that are already indexed in the collection. When true, only new files will be indexed. Set to false to re-index all files. + + custom_metadata : typing.Optional[typing.Dict[str, typing.Any]] + Custom metadata to attach to all indexed chunks. Keys must be strings. Values: str, int, float, bool, or array of strings. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + AsyncHttpResponse[IndexJobResponseV2] + Indexing Job Started + """ + _response = await self._client_wrapper.httpx_client.request( + f"v2/collections/{jsonable_encoder(collection_name)}/index/azure/directory", + method="POST", + json={ + "container_name": container_name, + "directory_path": directory_path, + "account_name": account_name, + "account_key": account_key, + "processing_type": processing_type, + "max_files": max_files, + "skip_existing": skip_existing, + "custom_metadata": custom_metadata, + }, + headers={ + "content-type": "application/json", + }, + request_options=request_options, + omit=OMIT, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + IndexJobResponseV2, + parse_obj_as( + type_=IndexJobResponseV2, # type: ignore + object_=_response.json(), + ), + ) + return AsyncHttpResponse(response=_response, data=_data) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) + + async def index_r2bucket_v2( + self, + collection_name: str, + *, + bucket_name: str, + account_id: str, + access_key_id: str, + secret_access_key: str, + processing_type: IndexR2RequestV2ProcessingType, + jurisdiction: typing.Optional[IndexR2RequestV2Jurisdiction] = OMIT, + max_files: typing.Optional[int] = OMIT, + skip_existing: typing.Optional[bool] = OMIT, + custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> AsyncHttpResponse[IndexJobResponseV2]: + """ + Index all files from a Cloudflare R2 bucket into a collection. R2 is S3-compatible — provide your R2 API token's Access Key ID and Secret Access Key. Returns a job_id for tracking progress via GET /v2/jobs/{job_id}. + + Parameters + ---------- + collection_name : str + + bucket_name : str + Name of the R2 bucket + + account_id : str + Cloudflare account ID (found in your R2 dashboard URL) + + access_key_id : str + R2 S3 API token Access Key ID + + secret_access_key : str + R2 S3 API token Secret Access Key + + processing_type : IndexR2RequestV2ProcessingType + Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. + + jurisdiction : typing.Optional[IndexR2RequestV2Jurisdiction] + R2 jurisdiction. 'default' for global, 'eu' for EU-only storage, 'fedramp' for FedRAMP-compliant storage. + + max_files : typing.Optional[int] + Maximum number of files to index (optional) + + skip_existing : typing.Optional[bool] + Skip files that are already indexed in the collection. When true, only new files will be indexed. Set to false to re-index all files. + + custom_metadata : typing.Optional[typing.Dict[str, typing.Any]] + Custom metadata to attach to all indexed chunks. Keys must be strings. Values: str, int, float, bool, or array of strings. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + AsyncHttpResponse[IndexJobResponseV2] + Indexing Job Started + """ + _response = await self._client_wrapper.httpx_client.request( + f"v2/collections/{jsonable_encoder(collection_name)}/index/r2", + method="POST", + json={ + "bucket_name": bucket_name, + "account_id": account_id, + "access_key_id": access_key_id, + "secret_access_key": secret_access_key, + "jurisdiction": jurisdiction, + "processing_type": processing_type, + "max_files": max_files, + "skip_existing": skip_existing, + "custom_metadata": custom_metadata, + }, + headers={ + "content-type": "application/json", + }, + request_options=request_options, + omit=OMIT, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + IndexJobResponseV2, + parse_obj_as( + type_=IndexJobResponseV2, # type: ignore + object_=_response.json(), + ), + ) + return AsyncHttpResponse(response=_response, data=_data) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) + + async def index_r2file_v2( + self, + collection_name: str, + *, + bucket_name: str, + file_uri: str, + account_id: str, + access_key_id: str, + secret_access_key: str, + processing_type: IndexR2FileRequestV2ProcessingType, + jurisdiction: typing.Optional[IndexR2FileRequestV2Jurisdiction] = OMIT, + custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> AsyncHttpResponse[IndexJobResponseV2]: + """ + Index a single file from a Cloudflare R2 bucket into a collection. Returns a job_id for tracking progress. + + Parameters + ---------- + collection_name : str + + bucket_name : str + Name of the R2 bucket + + file_uri : str + R2 URI format: r2://bucket-name/path/to/file.pdf + + account_id : str + Cloudflare account ID (found in your R2 dashboard URL) + + access_key_id : str + R2 S3 API token Access Key ID + + secret_access_key : str + R2 S3 API token Secret Access Key + + processing_type : IndexR2FileRequestV2ProcessingType + Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. + + jurisdiction : typing.Optional[IndexR2FileRequestV2Jurisdiction] + R2 jurisdiction. 'default' for global, 'eu' for EU-only storage, 'fedramp' for FedRAMP-compliant storage. + + custom_metadata : typing.Optional[typing.Dict[str, typing.Any]] + Custom metadata to attach to all chunks from this file. Keys must be strings. Values: str, int, float, bool, or array of strings. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + AsyncHttpResponse[IndexJobResponseV2] + Indexing Job Started + """ + _response = await self._client_wrapper.httpx_client.request( + f"v2/collections/{jsonable_encoder(collection_name)}/index/r2/file", + method="POST", + json={ + "bucket_name": bucket_name, + "file_uri": file_uri, + "account_id": account_id, + "access_key_id": access_key_id, + "secret_access_key": secret_access_key, + "jurisdiction": jurisdiction, + "processing_type": processing_type, + "custom_metadata": custom_metadata, + }, + headers={ + "content-type": "application/json", + }, + request_options=request_options, + omit=OMIT, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + IndexJobResponseV2, + parse_obj_as( + type_=IndexJobResponseV2, # type: ignore + object_=_response.json(), + ), + ) + return AsyncHttpResponse(response=_response, data=_data) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) + + async def index_r2directory_v2( + self, + collection_name: str, + *, + bucket_name: str, + directory_path: str, + account_id: str, + access_key_id: str, + secret_access_key: str, + processing_type: IndexR2DirectoryRequestV2ProcessingType, + jurisdiction: typing.Optional[IndexR2DirectoryRequestV2Jurisdiction] = OMIT, + max_files: typing.Optional[int] = OMIT, + skip_existing: typing.Optional[bool] = OMIT, + custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> AsyncHttpResponse[IndexJobResponseV2]: + """ + Index all files from a specific directory (prefix) in a Cloudflare R2 bucket into a collection. Uses prefix-based filtering to index only objects within the specified path. Returns a job_id for tracking progress via GET /v2/jobs/{job_id}. + + Parameters + ---------- + collection_name : str + + bucket_name : str + Name of the R2 bucket + + directory_path : str + Path to the directory (prefix) within the bucket. Accepts either a relative path (e.g., 'reports/2024/january') or a full R2 URI (e.g., 'r2://my-bucket/reports/2024/january'). All objects within this prefix will be indexed. + + account_id : str + Cloudflare account ID (found in your R2 dashboard URL) + + access_key_id : str + R2 S3 API token Access Key ID + + secret_access_key : str + R2 S3 API token Secret Access Key + + processing_type : IndexR2DirectoryRequestV2ProcessingType + Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. + + jurisdiction : typing.Optional[IndexR2DirectoryRequestV2Jurisdiction] + R2 jurisdiction. 'default' for global, 'eu' for EU-only storage, 'fedramp' for FedRAMP-compliant storage. + + max_files : typing.Optional[int] + Maximum number of files to index (optional) + + skip_existing : typing.Optional[bool] + Skip files that are already indexed in the collection. When true, only new files will be indexed. Set to false to re-index all files. + + custom_metadata : typing.Optional[typing.Dict[str, typing.Any]] + Custom metadata to attach to all indexed chunks. Keys must be strings. Values: str, int, float, bool, or array of strings. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + AsyncHttpResponse[IndexJobResponseV2] + Indexing Job Started + """ + _response = await self._client_wrapper.httpx_client.request( + f"v2/collections/{jsonable_encoder(collection_name)}/index/r2/directory", + method="POST", + json={ + "bucket_name": bucket_name, + "directory_path": directory_path, + "account_id": account_id, + "access_key_id": access_key_id, + "secret_access_key": secret_access_key, + "jurisdiction": jurisdiction, + "processing_type": processing_type, + "max_files": max_files, + "skip_existing": skip_existing, + "custom_metadata": custom_metadata, + }, + headers={ + "content-type": "application/json", + }, + request_options=request_options, + omit=OMIT, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + IndexJobResponseV2, + parse_obj_as( + type_=IndexJobResponseV2, # type: ignore + object_=_response.json(), + ), + ) + return AsyncHttpResponse(response=_response, data=_data) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) + + async def index_url_v2( + self, + collection_name: str, + *, + processing_type: IndexUrlRequestV2ProcessingType, + url: typing.Optional[str] = OMIT, + urls: typing.Optional[typing.Sequence[str]] = OMIT, + custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> AsyncHttpResponse[IndexJobResponseV2]: + """ + Index documents from public URLs into a collection. No cloud storage credentials required. + + You can provide either: + - `url` — a single URL string for one document + - `urls` — an array of URL strings for multiple documents + + Supported file types include PDF, TXT, DOCX, CSV, XLSX, and more. Documents are downloaded and processed through the same pipeline as cloud storage indexing. + + Returns a job_id for tracking progress via GET /v2/jobs/{job_id}. + + Parameters + ---------- + collection_name : str + + processing_type : IndexUrlRequestV2ProcessingType + Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. + + url : typing.Optional[str] + A single public URL to a hosted document (PDF, TXT, DOCX, etc.). Provide either 'url' or 'urls', not both. + + urls : typing.Optional[typing.Sequence[str]] + An array of public URLs to hosted documents. Provide either 'url' or 'urls', not both. + + custom_metadata : typing.Optional[typing.Dict[str, typing.Any]] + Custom metadata to attach to all indexed chunks. Keys must be strings. Values: str, int, float, bool, or array of strings. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + AsyncHttpResponse[IndexJobResponseV2] + Indexing job started + """ + _response = await self._client_wrapper.httpx_client.request( + f"v2/collections/{jsonable_encoder(collection_name)}/index/url", + method="POST", + json={ + "url": url, + "urls": urls, + "processing_type": processing_type, + "custom_metadata": custom_metadata, + }, + headers={ + "content-type": "application/json", }, request_options=request_options, omit=OMIT, diff --git a/src/runcaptain/indexing/types/__init__.py b/src/runcaptain/indexing/types/__init__.py index fa3fb07..817f73a 100644 --- a/src/runcaptain/indexing/types/__init__.py +++ b/src/runcaptain/indexing/types/__init__.py @@ -12,9 +12,16 @@ from .index_gcs_directory_request_v2processing_type import IndexGcsDirectoryRequestV2ProcessingType from .index_gcs_file_request_v2processing_type import IndexGcsFileRequestV2ProcessingType from .index_gcs_request_v2processing_type import IndexGcsRequestV2ProcessingType + from .index_r2directory_request_v2jurisdiction import IndexR2DirectoryRequestV2Jurisdiction + from .index_r2directory_request_v2processing_type import IndexR2DirectoryRequestV2ProcessingType + from .index_r2file_request_v2jurisdiction import IndexR2FileRequestV2Jurisdiction + from .index_r2file_request_v2processing_type import IndexR2FileRequestV2ProcessingType + from .index_r2request_v2jurisdiction import IndexR2RequestV2Jurisdiction + from .index_r2request_v2processing_type import IndexR2RequestV2ProcessingType from .index_s3directory_request_v2processing_type import IndexS3DirectoryRequestV2ProcessingType from .index_s3file_request_v2processing_type import IndexS3FileRequestV2ProcessingType from .index_s3request_v2processing_type import IndexS3RequestV2ProcessingType + from .index_url_request_v2processing_type import IndexUrlRequestV2ProcessingType _dynamic_imports: typing.Dict[str, str] = { "IndexAzureDirectoryRequestV2ProcessingType": ".index_azure_directory_request_v2processing_type", "IndexAzureFileRequestV2ProcessingType": ".index_azure_file_request_v2processing_type", @@ -22,9 +29,16 @@ "IndexGcsDirectoryRequestV2ProcessingType": ".index_gcs_directory_request_v2processing_type", "IndexGcsFileRequestV2ProcessingType": ".index_gcs_file_request_v2processing_type", "IndexGcsRequestV2ProcessingType": ".index_gcs_request_v2processing_type", + "IndexR2DirectoryRequestV2Jurisdiction": ".index_r2directory_request_v2jurisdiction", + "IndexR2DirectoryRequestV2ProcessingType": ".index_r2directory_request_v2processing_type", + "IndexR2FileRequestV2Jurisdiction": ".index_r2file_request_v2jurisdiction", + "IndexR2FileRequestV2ProcessingType": ".index_r2file_request_v2processing_type", + "IndexR2RequestV2Jurisdiction": ".index_r2request_v2jurisdiction", + "IndexR2RequestV2ProcessingType": ".index_r2request_v2processing_type", "IndexS3DirectoryRequestV2ProcessingType": ".index_s3directory_request_v2processing_type", "IndexS3FileRequestV2ProcessingType": ".index_s3file_request_v2processing_type", "IndexS3RequestV2ProcessingType": ".index_s3request_v2processing_type", + "IndexUrlRequestV2ProcessingType": ".index_url_request_v2processing_type", } @@ -56,7 +70,14 @@ def __dir__(): "IndexGcsDirectoryRequestV2ProcessingType", "IndexGcsFileRequestV2ProcessingType", "IndexGcsRequestV2ProcessingType", + "IndexR2DirectoryRequestV2Jurisdiction", + "IndexR2DirectoryRequestV2ProcessingType", + "IndexR2FileRequestV2Jurisdiction", + "IndexR2FileRequestV2ProcessingType", + "IndexR2RequestV2Jurisdiction", + "IndexR2RequestV2ProcessingType", "IndexS3DirectoryRequestV2ProcessingType", "IndexS3FileRequestV2ProcessingType", "IndexS3RequestV2ProcessingType", + "IndexUrlRequestV2ProcessingType", ] diff --git a/src/runcaptain/indexing/types/index_r2directory_request_v2jurisdiction.py b/src/runcaptain/indexing/types/index_r2directory_request_v2jurisdiction.py new file mode 100644 index 0000000..013d0a0 --- /dev/null +++ b/src/runcaptain/indexing/types/index_r2directory_request_v2jurisdiction.py @@ -0,0 +1,5 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +IndexR2DirectoryRequestV2Jurisdiction = typing.Union[typing.Literal["default", "eu", "fedramp"], typing.Any] diff --git a/src/runcaptain/indexing/types/index_r2directory_request_v2processing_type.py b/src/runcaptain/indexing/types/index_r2directory_request_v2processing_type.py new file mode 100644 index 0000000..300f4be --- /dev/null +++ b/src/runcaptain/indexing/types/index_r2directory_request_v2processing_type.py @@ -0,0 +1,5 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +IndexR2DirectoryRequestV2ProcessingType = typing.Union[typing.Literal["advanced", "basic"], typing.Any] diff --git a/src/runcaptain/indexing/types/index_r2file_request_v2jurisdiction.py b/src/runcaptain/indexing/types/index_r2file_request_v2jurisdiction.py new file mode 100644 index 0000000..c53252a --- /dev/null +++ b/src/runcaptain/indexing/types/index_r2file_request_v2jurisdiction.py @@ -0,0 +1,5 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +IndexR2FileRequestV2Jurisdiction = typing.Union[typing.Literal["default", "eu", "fedramp"], typing.Any] diff --git a/src/runcaptain/indexing/types/index_r2file_request_v2processing_type.py b/src/runcaptain/indexing/types/index_r2file_request_v2processing_type.py new file mode 100644 index 0000000..23bf063 --- /dev/null +++ b/src/runcaptain/indexing/types/index_r2file_request_v2processing_type.py @@ -0,0 +1,5 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +IndexR2FileRequestV2ProcessingType = typing.Union[typing.Literal["advanced", "basic"], typing.Any] diff --git a/src/runcaptain/indexing/types/index_r2request_v2jurisdiction.py b/src/runcaptain/indexing/types/index_r2request_v2jurisdiction.py new file mode 100644 index 0000000..37b97b5 --- /dev/null +++ b/src/runcaptain/indexing/types/index_r2request_v2jurisdiction.py @@ -0,0 +1,5 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +IndexR2RequestV2Jurisdiction = typing.Union[typing.Literal["default", "eu", "fedramp"], typing.Any] diff --git a/src/runcaptain/indexing/types/index_r2request_v2processing_type.py b/src/runcaptain/indexing/types/index_r2request_v2processing_type.py new file mode 100644 index 0000000..4440034 --- /dev/null +++ b/src/runcaptain/indexing/types/index_r2request_v2processing_type.py @@ -0,0 +1,5 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +IndexR2RequestV2ProcessingType = typing.Union[typing.Literal["advanced", "basic"], typing.Any] diff --git a/src/runcaptain/indexing/types/index_url_request_v2processing_type.py b/src/runcaptain/indexing/types/index_url_request_v2processing_type.py new file mode 100644 index 0000000..b4d0387 --- /dev/null +++ b/src/runcaptain/indexing/types/index_url_request_v2processing_type.py @@ -0,0 +1,5 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +IndexUrlRequestV2ProcessingType = typing.Union[typing.Literal["advanced", "basic"], typing.Any] diff --git a/src/runcaptain/jobs/client.py b/src/runcaptain/jobs/client.py index cd021dd..9666a4f 100644 --- a/src/runcaptain/jobs/client.py +++ b/src/runcaptain/jobs/client.py @@ -58,7 +58,6 @@ def get_job_status_v2( Parameters ---------- job_id : str - The job ID returned from an indexing request request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -73,11 +72,11 @@ def get_job_status_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.jobs.get_job_status_v2( - job_id="abc123xyz-1234567890", + job_id="job_s3_abc123", ) """ _response = self._raw_client.get_job_status_v2(job_id, request_options=request_options) @@ -96,7 +95,6 @@ def cancel_job_v2( Parameters ---------- job_id : str - The job ID to cancel request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -111,11 +109,11 @@ def cancel_job_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.jobs.cancel_job_v2( - job_id="abc123xyz-1234567890", + job_id="job_s3_abc123", ) """ _response = self._raw_client.cancel_job_v2(job_id, request_options=request_options) @@ -171,7 +169,6 @@ async def get_job_status_v2( Parameters ---------- job_id : str - The job ID returned from an indexing request request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -188,14 +185,14 @@ async def get_job_status_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.jobs.get_job_status_v2( - job_id="abc123xyz-1234567890", + job_id="job_s3_abc123", ) @@ -217,7 +214,6 @@ async def cancel_job_v2( Parameters ---------- job_id : str - The job ID to cancel request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -234,14 +230,14 @@ async def cancel_job_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.jobs.cancel_job_v2( - job_id="abc123xyz-1234567890", + job_id="job_s3_abc123", ) diff --git a/src/runcaptain/jobs/raw_client.py b/src/runcaptain/jobs/raw_client.py index 8f15a15..74cd595 100644 --- a/src/runcaptain/jobs/raw_client.py +++ b/src/runcaptain/jobs/raw_client.py @@ -52,7 +52,6 @@ def get_job_status_v2( Parameters ---------- job_id : str - The job ID returned from an indexing request request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -106,7 +105,6 @@ def cancel_job_v2( Parameters ---------- job_id : str - The job ID to cancel request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -175,7 +173,6 @@ async def get_job_status_v2( Parameters ---------- job_id : str - The job ID returned from an indexing request request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -229,7 +226,6 @@ async def cancel_job_v2( Parameters ---------- job_id : str - The job ID to cancel request_options : typing.Optional[RequestOptions] Request-specific configuration. diff --git a/src/runcaptain/query/client.py b/src/runcaptain/query/client.py index a923999..d0d0ed9 100644 --- a/src/runcaptain/query/client.py +++ b/src/runcaptain/query/client.py @@ -31,7 +31,6 @@ def collection_v2( collection_name: str, *, query: str, - idempotency_key: typing.Optional[str] = None, inference: typing.Optional[bool] = OMIT, stream: typing.Optional[bool] = OMIT, top_k: typing.Optional[int] = OMIT, @@ -48,63 +47,46 @@ def collection_v2( ## Streaming (SSE) - When `stream: true` and `inference: true`, the JSON response includes a `request_id`. Refer to the sample implementations to best make use of streams. + When `stream: true` and `inference: true`, the response is a Server-Sent Events stream. Every `data:` field is a JSON object with a `type` discriminator. ### SSE Event Types - | Event | Format | Description | - |-------|--------|-------------| - | Text chunk | `data: \\n\\n` | Incremental text of the AI response. Plain text (not JSON). Newlines within text are escaped as `\\n`. | - | Tool start | `event: tool_start\\ndata: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"..."}}\\n\\n` | The AI agent is performing a knowledge base search. The `args.query` field contains the search query. | - | Tool end | `event: tool_end\\ndata: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}}\\n\\n` | A search completed. `tool_call_id` correlates with the preceding `tool_start`. `result_summary.resultCount` indicates how many results were found. | - | Complete | `event: complete\\ndata: {"type":"stream_complete"}\\n\\n` | Stream finished successfully. Close the connection after receiving this. | - | Error | `event: error\\ndata: {"type":"stream_error","error":"..."}\\n\\n` | An error occurred during generation. Close the connection. | + | `type` value | Schema | Description | + |---|---|---| + | `text.delta` | `QueryStreamTextEvent` | Incremental text chunk of the AI response. | + | `tool.start` | `QueryStreamToolStartEvent` | The agent is performing a knowledge-base search. | + | `tool.end` | `QueryStreamToolEndEvent` | A tool call completed. `tool_call_id` correlates with the preceding `tool.start`. | + | `stream_complete` | `QueryStreamCompleteEvent` | Stream finished successfully. Close the connection. | + | `stream_error` | `QueryStreamErrorEvent` | An error occurred. Close the connection. | ### Example SSE Stream ``` - event: tool_start - data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"revenue projections Q4"}} + data: {"type":"tool.start","seq":1,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","args":{"query":"revenue projections Q4"}} - event: tool_end - data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}} + data: {"type":"tool.end","seq":2,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","ok":true,"result_summary":{"resultCount":12}} - data: Based on the documents - data: provided, the revenue - data: projections for Q4 show - data: a 15% increase over Q3. + data: {"type":"text.delta","seq":3,"run_id":"run_abc","data":"Based on the documents"} + data: {"type":"text.delta","seq":4,"run_id":"run_abc","data":" provided, the revenue"} + data: {"type":"text.delta","seq":5,"run_id":"run_abc","data":" projections for Q4 show"} + data: {"type":"text.delta","seq":6,"run_id":"run_abc","data":" a 15% increase over Q3."} - event: tool_start - data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_2","args":{"query":"Q3 comparison metrics"}} - - event: tool_end - data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_2","ok":true,"result_summary":{"resultCount":8}} - - data: Compared to Q3, the key - data: drivers were operational - data: efficiency gains. - - event: complete - data: {"type":"stream_complete"} + data: {"type":"stream_complete","metadata":{"totalResults":12,"totalSearches":1},"stats":{"totalTokens":150}} ``` ### Notes - - The agent may perform multiple searches per query. Each search produces a `tool_start`/`tool_end` pair. + - The agent may perform multiple searches per query. Each search produces a `tool.start` / `tool.end` pair. - Text chunks are interleaved between tool events — text arrives after the agent has gathered results from a search. - Connect with `Accept: text/event-stream` and set a generous timeout (120s+) for long responses. Parameters ---------- collection_name : str - Name of the collection to query query : str The natural language query to search for - idempotency_key : typing.Optional[str] - UUID for request deduplication - inference : typing.Optional[bool] Enable LLM-generated answers based on the relevant sections retrieved. When false, returns raw search results. @@ -136,22 +118,21 @@ def collection_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.query.collection_v2( collection_name="my_documents", query="What are the key terms in the contract?", inference=False, stream=False, - top_k=10, rerank=True, + top_k=10, ) """ _response = self._raw_client.collection_v2( collection_name, query=query, - idempotency_key=idempotency_key, inference=inference, stream=stream, top_k=top_k, @@ -183,7 +164,6 @@ async def collection_v2( collection_name: str, *, query: str, - idempotency_key: typing.Optional[str] = None, inference: typing.Optional[bool] = OMIT, stream: typing.Optional[bool] = OMIT, top_k: typing.Optional[int] = OMIT, @@ -200,63 +180,46 @@ async def collection_v2( ## Streaming (SSE) - When `stream: true` and `inference: true`, the JSON response includes a `request_id`. Refer to the sample implementations to best make use of streams. + When `stream: true` and `inference: true`, the response is a Server-Sent Events stream. Every `data:` field is a JSON object with a `type` discriminator. ### SSE Event Types - | Event | Format | Description | - |-------|--------|-------------| - | Text chunk | `data: \\n\\n` | Incremental text of the AI response. Plain text (not JSON). Newlines within text are escaped as `\\n`. | - | Tool start | `event: tool_start\\ndata: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"..."}}\\n\\n` | The AI agent is performing a knowledge base search. The `args.query` field contains the search query. | - | Tool end | `event: tool_end\\ndata: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}}\\n\\n` | A search completed. `tool_call_id` correlates with the preceding `tool_start`. `result_summary.resultCount` indicates how many results were found. | - | Complete | `event: complete\\ndata: {"type":"stream_complete"}\\n\\n` | Stream finished successfully. Close the connection after receiving this. | - | Error | `event: error\\ndata: {"type":"stream_error","error":"..."}\\n\\n` | An error occurred during generation. Close the connection. | + | `type` value | Schema | Description | + |---|---|---| + | `text.delta` | `QueryStreamTextEvent` | Incremental text chunk of the AI response. | + | `tool.start` | `QueryStreamToolStartEvent` | The agent is performing a knowledge-base search. | + | `tool.end` | `QueryStreamToolEndEvent` | A tool call completed. `tool_call_id` correlates with the preceding `tool.start`. | + | `stream_complete` | `QueryStreamCompleteEvent` | Stream finished successfully. Close the connection. | + | `stream_error` | `QueryStreamErrorEvent` | An error occurred. Close the connection. | ### Example SSE Stream ``` - event: tool_start - data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"revenue projections Q4"}} + data: {"type":"tool.start","seq":1,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","args":{"query":"revenue projections Q4"}} - event: tool_end - data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}} + data: {"type":"tool.end","seq":2,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","ok":true,"result_summary":{"resultCount":12}} - data: Based on the documents - data: provided, the revenue - data: projections for Q4 show - data: a 15% increase over Q3. + data: {"type":"text.delta","seq":3,"run_id":"run_abc","data":"Based on the documents"} + data: {"type":"text.delta","seq":4,"run_id":"run_abc","data":" provided, the revenue"} + data: {"type":"text.delta","seq":5,"run_id":"run_abc","data":" projections for Q4 show"} + data: {"type":"text.delta","seq":6,"run_id":"run_abc","data":" a 15% increase over Q3."} - event: tool_start - data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_2","args":{"query":"Q3 comparison metrics"}} - - event: tool_end - data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_2","ok":true,"result_summary":{"resultCount":8}} - - data: Compared to Q3, the key - data: drivers were operational - data: efficiency gains. - - event: complete - data: {"type":"stream_complete"} + data: {"type":"stream_complete","metadata":{"totalResults":12,"totalSearches":1},"stats":{"totalTokens":150}} ``` ### Notes - - The agent may perform multiple searches per query. Each search produces a `tool_start`/`tool_end` pair. + - The agent may perform multiple searches per query. Each search produces a `tool.start` / `tool.end` pair. - Text chunks are interleaved between tool events — text arrives after the agent has gathered results from a search. - Connect with `Accept: text/event-stream` and set a generous timeout (120s+) for long responses. Parameters ---------- collection_name : str - Name of the collection to query query : str The natural language query to search for - idempotency_key : typing.Optional[str] - UUID for request deduplication - inference : typing.Optional[bool] Enable LLM-generated answers based on the relevant sections retrieved. When false, returns raw search results. @@ -290,8 +253,8 @@ async def collection_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) @@ -301,8 +264,8 @@ async def main() -> None: query="What are the key terms in the contract?", inference=False, stream=False, - top_k=10, rerank=True, + top_k=10, ) @@ -311,7 +274,6 @@ async def main() -> None: _response = await self._raw_client.collection_v2( collection_name, query=query, - idempotency_key=idempotency_key, inference=inference, stream=stream, top_k=top_k, diff --git a/src/runcaptain/query/raw_client.py b/src/runcaptain/query/raw_client.py index 1c53391..d948a4d 100644 --- a/src/runcaptain/query/raw_client.py +++ b/src/runcaptain/query/raw_client.py @@ -24,7 +24,6 @@ def collection_v2( collection_name: str, *, query: str, - idempotency_key: typing.Optional[str] = None, inference: typing.Optional[bool] = OMIT, stream: typing.Optional[bool] = OMIT, top_k: typing.Optional[int] = OMIT, @@ -41,63 +40,46 @@ def collection_v2( ## Streaming (SSE) - When `stream: true` and `inference: true`, the JSON response includes a `request_id`. Refer to the sample implementations to best make use of streams. + When `stream: true` and `inference: true`, the response is a Server-Sent Events stream. Every `data:` field is a JSON object with a `type` discriminator. ### SSE Event Types - | Event | Format | Description | - |-------|--------|-------------| - | Text chunk | `data: \\n\\n` | Incremental text of the AI response. Plain text (not JSON). Newlines within text are escaped as `\\n`. | - | Tool start | `event: tool_start\\ndata: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"..."}}\\n\\n` | The AI agent is performing a knowledge base search. The `args.query` field contains the search query. | - | Tool end | `event: tool_end\\ndata: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}}\\n\\n` | A search completed. `tool_call_id` correlates with the preceding `tool_start`. `result_summary.resultCount` indicates how many results were found. | - | Complete | `event: complete\\ndata: {"type":"stream_complete"}\\n\\n` | Stream finished successfully. Close the connection after receiving this. | - | Error | `event: error\\ndata: {"type":"stream_error","error":"..."}\\n\\n` | An error occurred during generation. Close the connection. | + | `type` value | Schema | Description | + |---|---|---| + | `text.delta` | `QueryStreamTextEvent` | Incremental text chunk of the AI response. | + | `tool.start` | `QueryStreamToolStartEvent` | The agent is performing a knowledge-base search. | + | `tool.end` | `QueryStreamToolEndEvent` | A tool call completed. `tool_call_id` correlates with the preceding `tool.start`. | + | `stream_complete` | `QueryStreamCompleteEvent` | Stream finished successfully. Close the connection. | + | `stream_error` | `QueryStreamErrorEvent` | An error occurred. Close the connection. | ### Example SSE Stream ``` - event: tool_start - data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"revenue projections Q4"}} + data: {"type":"tool.start","seq":1,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","args":{"query":"revenue projections Q4"}} - event: tool_end - data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}} + data: {"type":"tool.end","seq":2,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","ok":true,"result_summary":{"resultCount":12}} - data: Based on the documents - data: provided, the revenue - data: projections for Q4 show - data: a 15% increase over Q3. + data: {"type":"text.delta","seq":3,"run_id":"run_abc","data":"Based on the documents"} + data: {"type":"text.delta","seq":4,"run_id":"run_abc","data":" provided, the revenue"} + data: {"type":"text.delta","seq":5,"run_id":"run_abc","data":" projections for Q4 show"} + data: {"type":"text.delta","seq":6,"run_id":"run_abc","data":" a 15% increase over Q3."} - event: tool_start - data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_2","args":{"query":"Q3 comparison metrics"}} - - event: tool_end - data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_2","ok":true,"result_summary":{"resultCount":8}} - - data: Compared to Q3, the key - data: drivers were operational - data: efficiency gains. - - event: complete - data: {"type":"stream_complete"} + data: {"type":"stream_complete","metadata":{"totalResults":12,"totalSearches":1},"stats":{"totalTokens":150}} ``` ### Notes - - The agent may perform multiple searches per query. Each search produces a `tool_start`/`tool_end` pair. + - The agent may perform multiple searches per query. Each search produces a `tool.start` / `tool.end` pair. - Text chunks are interleaved between tool events — text arrives after the agent has gathered results from a search. - Connect with `Accept: text/event-stream` and set a generous timeout (120s+) for long responses. Parameters ---------- collection_name : str - Name of the collection to query query : str The natural language query to search for - idempotency_key : typing.Optional[str] - UUID for request deduplication - inference : typing.Optional[bool] Enable LLM-generated answers based on the relevant sections retrieved. When false, returns raw search results. @@ -138,7 +120,6 @@ def collection_v2( }, headers={ "content-type": "application/json", - "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None, }, request_options=request_options, omit=OMIT, @@ -168,7 +149,6 @@ async def collection_v2( collection_name: str, *, query: str, - idempotency_key: typing.Optional[str] = None, inference: typing.Optional[bool] = OMIT, stream: typing.Optional[bool] = OMIT, top_k: typing.Optional[int] = OMIT, @@ -185,63 +165,46 @@ async def collection_v2( ## Streaming (SSE) - When `stream: true` and `inference: true`, the JSON response includes a `request_id`. Refer to the sample implementations to best make use of streams. + When `stream: true` and `inference: true`, the response is a Server-Sent Events stream. Every `data:` field is a JSON object with a `type` discriminator. ### SSE Event Types - | Event | Format | Description | - |-------|--------|-------------| - | Text chunk | `data: \\n\\n` | Incremental text of the AI response. Plain text (not JSON). Newlines within text are escaped as `\\n`. | - | Tool start | `event: tool_start\\ndata: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"..."}}\\n\\n` | The AI agent is performing a knowledge base search. The `args.query` field contains the search query. | - | Tool end | `event: tool_end\\ndata: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}}\\n\\n` | A search completed. `tool_call_id` correlates with the preceding `tool_start`. `result_summary.resultCount` indicates how many results were found. | - | Complete | `event: complete\\ndata: {"type":"stream_complete"}\\n\\n` | Stream finished successfully. Close the connection after receiving this. | - | Error | `event: error\\ndata: {"type":"stream_error","error":"..."}\\n\\n` | An error occurred during generation. Close the connection. | + | `type` value | Schema | Description | + |---|---|---| + | `text.delta` | `QueryStreamTextEvent` | Incremental text chunk of the AI response. | + | `tool.start` | `QueryStreamToolStartEvent` | The agent is performing a knowledge-base search. | + | `tool.end` | `QueryStreamToolEndEvent` | A tool call completed. `tool_call_id` correlates with the preceding `tool.start`. | + | `stream_complete` | `QueryStreamCompleteEvent` | Stream finished successfully. Close the connection. | + | `stream_error` | `QueryStreamErrorEvent` | An error occurred. Close the connection. | ### Example SSE Stream ``` - event: tool_start - data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"revenue projections Q4"}} + data: {"type":"tool.start","seq":1,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","args":{"query":"revenue projections Q4"}} - event: tool_end - data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}} + data: {"type":"tool.end","seq":2,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","ok":true,"result_summary":{"resultCount":12}} - data: Based on the documents - data: provided, the revenue - data: projections for Q4 show - data: a 15% increase over Q3. + data: {"type":"text.delta","seq":3,"run_id":"run_abc","data":"Based on the documents"} + data: {"type":"text.delta","seq":4,"run_id":"run_abc","data":" provided, the revenue"} + data: {"type":"text.delta","seq":5,"run_id":"run_abc","data":" projections for Q4 show"} + data: {"type":"text.delta","seq":6,"run_id":"run_abc","data":" a 15% increase over Q3."} - event: tool_start - data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_2","args":{"query":"Q3 comparison metrics"}} - - event: tool_end - data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_2","ok":true,"result_summary":{"resultCount":8}} - - data: Compared to Q3, the key - data: drivers were operational - data: efficiency gains. - - event: complete - data: {"type":"stream_complete"} + data: {"type":"stream_complete","metadata":{"totalResults":12,"totalSearches":1},"stats":{"totalTokens":150}} ``` ### Notes - - The agent may perform multiple searches per query. Each search produces a `tool_start`/`tool_end` pair. + - The agent may perform multiple searches per query. Each search produces a `tool.start` / `tool.end` pair. - Text chunks are interleaved between tool events — text arrives after the agent has gathered results from a search. - Connect with `Accept: text/event-stream` and set a generous timeout (120s+) for long responses. Parameters ---------- collection_name : str - Name of the collection to query query : str The natural language query to search for - idempotency_key : typing.Optional[str] - UUID for request deduplication - inference : typing.Optional[bool] Enable LLM-generated answers based on the relevant sections retrieved. When false, returns raw search results. @@ -282,7 +245,6 @@ async def collection_v2( }, headers={ "content-type": "application/json", - "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None, }, request_options=request_options, omit=OMIT, diff --git a/src/runcaptain/types/__init__.py b/src/runcaptain/types/__init__.py index c362b46..8058902 100644 --- a/src/runcaptain/types/__init__.py +++ b/src/runcaptain/types/__init__.py @@ -14,9 +14,7 @@ from .collection_list_response_v2 import CollectionListResponseV2 from .collection_response_v2 import CollectionResponseV2 from .dataset_article_response import DatasetArticleResponse - from .dataset_article_response_dataset import DatasetArticleResponseDataset from .dataset_search_response import DatasetSearchResponse - from .dataset_search_response_dataset import DatasetSearchResponseDataset from .dataset_search_result import DatasetSearchResult from .document_delete_response_v2 import DocumentDeleteResponseV2 from .document_item_v2 import DocumentItemV2 @@ -37,6 +35,19 @@ from .job_status_response_v2job_type import JobStatusResponseV2JobType from .job_status_response_v2status import JobStatusResponseV2Status from .query_response_v2 import QueryResponseV2 + from .query_stream_complete_event import QueryStreamCompleteEvent + from .query_stream_error_event import QueryStreamErrorEvent + from .query_stream_event import ( + QueryStreamEvent, + QueryStreamEvent_StreamComplete, + QueryStreamEvent_StreamError, + QueryStreamEvent_TextDelta, + QueryStreamEvent_ToolEnd, + QueryStreamEvent_ToolStart, + ) + from .query_stream_text_event import QueryStreamTextEvent + from .query_stream_tool_end_event import QueryStreamToolEndEvent + from .query_stream_tool_start_event import QueryStreamToolStartEvent from .relevant_document_v2 import RelevantDocumentV2 from .search_result import SearchResult from .standard_response_v2 import StandardResponseV2 @@ -52,9 +63,7 @@ "CollectionListResponseV2": ".collection_list_response_v2", "CollectionResponseV2": ".collection_response_v2", "DatasetArticleResponse": ".dataset_article_response", - "DatasetArticleResponseDataset": ".dataset_article_response_dataset", "DatasetSearchResponse": ".dataset_search_response", - "DatasetSearchResponseDataset": ".dataset_search_response_dataset", "DatasetSearchResult": ".dataset_search_result", "DocumentDeleteResponseV2": ".document_delete_response_v2", "DocumentItemV2": ".document_item_v2", @@ -75,6 +84,17 @@ "JobStatusResponseV2JobType": ".job_status_response_v2job_type", "JobStatusResponseV2Status": ".job_status_response_v2status", "QueryResponseV2": ".query_response_v2", + "QueryStreamCompleteEvent": ".query_stream_complete_event", + "QueryStreamErrorEvent": ".query_stream_error_event", + "QueryStreamEvent": ".query_stream_event", + "QueryStreamEvent_StreamComplete": ".query_stream_event", + "QueryStreamEvent_StreamError": ".query_stream_event", + "QueryStreamEvent_TextDelta": ".query_stream_event", + "QueryStreamEvent_ToolEnd": ".query_stream_event", + "QueryStreamEvent_ToolStart": ".query_stream_event", + "QueryStreamTextEvent": ".query_stream_text_event", + "QueryStreamToolEndEvent": ".query_stream_tool_end_event", + "QueryStreamToolStartEvent": ".query_stream_tool_start_event", "RelevantDocumentV2": ".relevant_document_v2", "SearchResult": ".search_result", "StandardResponseV2": ".standard_response_v2", @@ -114,9 +134,7 @@ def __dir__(): "CollectionListResponseV2", "CollectionResponseV2", "DatasetArticleResponse", - "DatasetArticleResponseDataset", "DatasetSearchResponse", - "DatasetSearchResponseDataset", "DatasetSearchResult", "DocumentDeleteResponseV2", "DocumentItemV2", @@ -137,6 +155,17 @@ def __dir__(): "JobStatusResponseV2JobType", "JobStatusResponseV2Status", "QueryResponseV2", + "QueryStreamCompleteEvent", + "QueryStreamErrorEvent", + "QueryStreamEvent", + "QueryStreamEvent_StreamComplete", + "QueryStreamEvent_StreamError", + "QueryStreamEvent_TextDelta", + "QueryStreamEvent_ToolEnd", + "QueryStreamEvent_ToolStart", + "QueryStreamTextEvent", + "QueryStreamToolEndEvent", + "QueryStreamToolStartEvent", "RelevantDocumentV2", "SearchResult", "StandardResponseV2", diff --git a/src/runcaptain/types/collection_item_v2.py b/src/runcaptain/types/collection_item_v2.py index d7c9be3..15b105d 100644 --- a/src/runcaptain/types/collection_item_v2.py +++ b/src/runcaptain/types/collection_item_v2.py @@ -7,19 +7,9 @@ class CollectionItemV2(UniversalBaseModel): - collection_id: str = pydantic.Field() - """ - Unique identifier for the collection - """ - - collection_name: str = pydantic.Field() - """ - Name of the collection - """ - environment: typing.Optional[str] = pydantic.Field(default=None) """ - Environment the collection belongs to (e.g. production, staging, development) + Environment the collection belongs to """ is_active: typing.Optional[bool] = pydantic.Field(default=None) @@ -37,9 +27,14 @@ class CollectionItemV2(UniversalBaseModel): Total number of API requests made against this collection """ - document_count: typing.Optional[int] = pydantic.Field(default=None) + database_name: str = pydantic.Field() + """ + Name of the collection database + """ + + file_count: int = pydantic.Field() """ - Total number of documents indexed in this collection + Total number of files indexed in this collection """ if IS_PYDANTIC_V2: diff --git a/src/runcaptain/types/dataset_article_response.py b/src/runcaptain/types/dataset_article_response.py index 580d51c..5b8da14 100644 --- a/src/runcaptain/types/dataset_article_response.py +++ b/src/runcaptain/types/dataset_article_response.py @@ -4,7 +4,6 @@ import pydantic from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel -from .dataset_article_response_dataset import DatasetArticleResponseDataset class DatasetArticleResponse(UniversalBaseModel): @@ -12,7 +11,7 @@ class DatasetArticleResponse(UniversalBaseModel): Response containing an article from a news dataset """ - dataset: DatasetArticleResponseDataset = pydantic.Field() + dataset: str = pydantic.Field() """ The dataset the article was retrieved from """ diff --git a/src/runcaptain/types/dataset_article_response_dataset.py b/src/runcaptain/types/dataset_article_response_dataset.py deleted file mode 100644 index de232c2..0000000 --- a/src/runcaptain/types/dataset_article_response_dataset.py +++ /dev/null @@ -1,5 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -DatasetArticleResponseDataset = typing.Union[typing.Literal["nytimes", "washpost", "sfstandard"], typing.Any] diff --git a/src/runcaptain/types/dataset_search_response.py b/src/runcaptain/types/dataset_search_response.py index ce38390..547d6e9 100644 --- a/src/runcaptain/types/dataset_search_response.py +++ b/src/runcaptain/types/dataset_search_response.py @@ -4,7 +4,6 @@ import pydantic from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel -from .dataset_search_response_dataset import DatasetSearchResponseDataset from .dataset_search_result import DatasetSearchResult @@ -13,7 +12,7 @@ class DatasetSearchResponse(UniversalBaseModel): Response containing search results from a news dataset """ - dataset: DatasetSearchResponseDataset = pydantic.Field() + dataset: str = pydantic.Field() """ The dataset that was searched """ diff --git a/src/runcaptain/types/dataset_search_response_dataset.py b/src/runcaptain/types/dataset_search_response_dataset.py deleted file mode 100644 index d9bf19d..0000000 --- a/src/runcaptain/types/dataset_search_response_dataset.py +++ /dev/null @@ -1,5 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -DatasetSearchResponseDataset = typing.Union[typing.Literal["nytimes", "washpost", "sfstandard"], typing.Any] diff --git a/src/runcaptain/types/document_item_v2.py b/src/runcaptain/types/document_item_v2.py index 716b59a..3951731 100644 --- a/src/runcaptain/types/document_item_v2.py +++ b/src/runcaptain/types/document_item_v2.py @@ -7,7 +7,7 @@ class DocumentItemV2(UniversalBaseModel): - document_id: str = pydantic.Field() + file_id: str = pydantic.Field() """ Unique identifier for the document """ diff --git a/src/runcaptain/types/job_status_response_v2job_type.py b/src/runcaptain/types/job_status_response_v2job_type.py index 49b5a91..960edf2 100644 --- a/src/runcaptain/types/job_status_response_v2job_type.py +++ b/src/runcaptain/types/job_status_response_v2job_type.py @@ -4,7 +4,15 @@ JobStatusResponseV2JobType = typing.Union[ typing.Literal[ - "index_s3", "index_s3_file", "index_s3_directory", "index_gcs", "index_gcs_file", "index_gcs_directory" + "index_s3", + "index_s3_file", + "index_s3_directory", + "index_gcs", + "index_gcs_file", + "index_gcs_directory", + "index_r2", + "index_r2_file", + "index_r2_directory", ], typing.Any, ] diff --git a/src/runcaptain/types/query_stream_complete_event.py b/src/runcaptain/types/query_stream_complete_event.py new file mode 100644 index 0000000..40829d0 --- /dev/null +++ b/src/runcaptain/types/query_stream_complete_event.py @@ -0,0 +1,31 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel + + +class QueryStreamCompleteEvent(UniversalBaseModel): + """ + Emitted when the stream finishes successfully. Close the connection after receiving this. + """ + + metadata: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(default=None) + """ + Stream metadata (e.g. totalResults, totalSearches) + """ + + stats: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(default=None) + """ + Token and tool-call statistics + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/runcaptain/types/query_stream_error_event.py b/src/runcaptain/types/query_stream_error_event.py new file mode 100644 index 0000000..c7091ee --- /dev/null +++ b/src/runcaptain/types/query_stream_error_event.py @@ -0,0 +1,26 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel + + +class QueryStreamErrorEvent(UniversalBaseModel): + """ + Emitted when an error occurs during generation. Close the connection after receiving this. + """ + + error: str = pydantic.Field() + """ + Human-readable error message + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/runcaptain/types/query_stream_event.py b/src/runcaptain/types/query_stream_event.py new file mode 100644 index 0000000..9c3d2a0 --- /dev/null +++ b/src/runcaptain/types/query_stream_event.py @@ -0,0 +1,103 @@ +# This file was auto-generated by Fern from our API Definition. + +from __future__ import annotations + +import typing + +import pydantic +import typing_extensions +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel + + +class QueryStreamEvent_TextDelta(UniversalBaseModel): + type: typing.Literal["text.delta"] = "text.delta" + seq: typing.Optional[int] = None + run_id: typing.Optional[str] = None + data: str + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + +class QueryStreamEvent_ToolStart(UniversalBaseModel): + type: typing.Literal["tool.start"] = "tool.start" + seq: typing.Optional[int] = None + run_id: typing.Optional[str] = None + tool_call_id: str + name: str + args: typing.Optional[typing.Dict[str, typing.Any]] = None + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + +class QueryStreamEvent_ToolEnd(UniversalBaseModel): + type: typing.Literal["tool.end"] = "tool.end" + seq: typing.Optional[int] = None + run_id: typing.Optional[str] = None + tool_call_id: str + name: str + ok: bool + result_summary: typing.Optional[typing.Dict[str, typing.Any]] = None + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + +class QueryStreamEvent_StreamComplete(UniversalBaseModel): + type: typing.Literal["stream_complete"] = "stream_complete" + metadata: typing.Optional[typing.Dict[str, typing.Any]] = None + stats: typing.Optional[typing.Dict[str, typing.Any]] = None + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + +class QueryStreamEvent_StreamError(UniversalBaseModel): + type: typing.Literal["stream_error"] = "stream_error" + error: str + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + +QueryStreamEvent = typing_extensions.Annotated[ + typing.Union[ + QueryStreamEvent_TextDelta, + QueryStreamEvent_ToolStart, + QueryStreamEvent_ToolEnd, + QueryStreamEvent_StreamComplete, + QueryStreamEvent_StreamError, + ], + pydantic.Field(discriminator="type"), +] diff --git a/src/runcaptain/types/query_stream_text_event.py b/src/runcaptain/types/query_stream_text_event.py new file mode 100644 index 0000000..c29909c --- /dev/null +++ b/src/runcaptain/types/query_stream_text_event.py @@ -0,0 +1,36 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel + + +class QueryStreamTextEvent(UniversalBaseModel): + """ + Incremental text chunk of the AI response. + """ + + seq: typing.Optional[int] = pydantic.Field(default=None) + """ + Monotonically increasing sequence number within the stream + """ + + run_id: typing.Optional[str] = pydantic.Field(default=None) + """ + Identifier for the current agent run + """ + + data: str = pydantic.Field() + """ + Text fragment of the AI-generated response + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/runcaptain/types/query_stream_tool_end_event.py b/src/runcaptain/types/query_stream_tool_end_event.py new file mode 100644 index 0000000..58bd3bb --- /dev/null +++ b/src/runcaptain/types/query_stream_tool_end_event.py @@ -0,0 +1,51 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel + + +class QueryStreamToolEndEvent(UniversalBaseModel): + """ + Emitted when a tool call completes. + """ + + seq: typing.Optional[int] = pydantic.Field(default=None) + """ + Monotonically increasing sequence number within the stream + """ + + run_id: typing.Optional[str] = pydantic.Field(default=None) + """ + Identifier for the current agent run + """ + + tool_call_id: str = pydantic.Field() + """ + Correlates with the preceding tool.start event + """ + + name: str = pydantic.Field() + """ + Tool name + """ + + ok: bool = pydantic.Field() + """ + Whether the tool call succeeded + """ + + result_summary: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(default=None) + """ + Summary of the tool results (e.g. {"resultCount": 12, "hasResults": true}) + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/runcaptain/types/query_stream_tool_start_event.py b/src/runcaptain/types/query_stream_tool_start_event.py new file mode 100644 index 0000000..860bd03 --- /dev/null +++ b/src/runcaptain/types/query_stream_tool_start_event.py @@ -0,0 +1,46 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel + + +class QueryStreamToolStartEvent(UniversalBaseModel): + """ + Emitted when the AI agent begins a knowledge-base search. + """ + + seq: typing.Optional[int] = pydantic.Field(default=None) + """ + Monotonically increasing sequence number within the stream + """ + + run_id: typing.Optional[str] = pydantic.Field(default=None) + """ + Identifier for the current agent run + """ + + tool_call_id: str = pydantic.Field() + """ + Correlates this start with the corresponding tool.end event + """ + + name: str = pydantic.Field() + """ + Tool name, e.g. searchKnowledgeBase + """ + + args: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(default=None) + """ + Arguments passed to the tool (e.g. {"query": "...", "topK": 10}) + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow