diff --git a/.fern/metadata.json b/.fern/metadata.json index 3791815..3c88de1 100644 --- a/.fern/metadata.json +++ b/.fern/metadata.json @@ -1,9 +1,9 @@ { "cliVersion": "3.86.0", "generatorName": "fernapi/fern-python-sdk", - "generatorVersion": "4.59.0", + "generatorVersion": "4.59.4", "generatorConfig": { "client_class_name": "Captain" }, - "sdkVersion": "0.0.0" + "sdkVersion": "0.0.16" } \ No newline at end of file diff --git a/README.md b/README.md index 1a92543..21af996 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ The Runcaptain Python library provides convenient access to the Runcaptain APIs - [Usage](#usage) - [Async Client](#async-client) - [Exception Handling](#exception-handling) +- [Streaming](#streaming) - [Advanced](#advanced) - [Access Raw Response Data](#access-raw-response-data) - [Retries](#retries) @@ -37,15 +38,11 @@ Instantiate and use the client with the following: from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) -client.query.collection_v2( - collection_name="my_documents", - query="What are the key terms in the contract?", - inference=True, - stream=True, - rerank=True, +client.post_v2collections_collection_name_documents_wipe( + collection_name="collection_name", ) ``` @@ -59,18 +56,14 @@ import asyncio from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: - await client.query.collection_v2( - collection_name="my_documents", - query="What are the key terms in the contract?", - inference=True, - stream=True, - rerank=True, + await client.post_v2collections_collection_name_documents_wipe( + collection_name="collection_name", ) @@ -86,12 +79,31 @@ will be thrown. from runcaptain.core.api_error import ApiError try: - client.query.collection_v2(...) + client.post_v2collections_collection_name_documents_wipe(...) except ApiError as e: print(e.status_code) print(e.body) ``` +## Streaming + +The SDK supports streaming responses, as well, the response will be a generator that you can loop over. + +```python +from runcaptain import Captain + +client = Captain( + organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", +) +response = client.query.collection_v2stream( + collection_name="collection_name", + query="query", +) +for chunk in response.data: + yield chunk +``` + ## Advanced ### Access Raw Response Data @@ -105,10 +117,20 @@ from runcaptain import Captain client = Captain( ..., ) -response = client.query.with_raw_response.collection_v2(...) +response = ( + client.with_raw_response.post_v2collections_collection_name_documents_wipe( + ... + ) +) print(response.headers) # access the response headers print(response.status_code) # access the response status code print(response.data) # access the underlying object +with client.query.with_raw_response.collection_v2stream(...) as response: + print( + response.headers + ) # access the response headersprint(response.status_code) # access the response status code + for chunk in response.data: + print(chunk) # access the underlying object(s) ``` ### Retries @@ -126,7 +148,7 @@ A request is deemed retryable when any of the following HTTP status codes is ret Use the `max_retries` request option to configure this behavior. ```python -client.query.collection_v2(..., request_options={ +client.post_v2collections_collection_name_documents_wipe(..., request_options={ "max_retries": 1 }) ``` @@ -146,7 +168,7 @@ client = Captain( # Override timeout for a specific method -client.query.collection_v2(..., request_options={ +client.post_v2collections_collection_name_documents_wipe(..., request_options={ "timeout_in_seconds": 1 }) ``` diff --git a/poetry.lock b/poetry.lock index 2f8666b..83fdc8b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -38,13 +38,13 @@ trio = ["trio (>=0.26.1)"] [[package]] name = "certifi" -version = "2026.1.4" +version = "2026.2.25" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.7" files = [ - {file = "certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c"}, - {file = "certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120"}, + {file = "certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa"}, + {file = "certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7"}, ] [[package]] diff --git a/pyproject.toml b/pyproject.toml index ac15859..88be80c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ dynamic = ["version"] [tool.poetry] name = "captain-sdk" -version = "0.0.0" +version = "0.0.16" description = "" readme = "README.md" authors = [] diff --git a/reference.md b/reference.md index a89bbf8..c21ba8a 100644 --- a/reference.md +++ b/reference.md @@ -1,10 +1,9 @@ # Reference -## Collections -
client.collections.list_collections_v2(...) -> AsyncHttpResponse[CollectionListResponseV2] +
client.post_v2collections_collection_name_documents_wipe(...) -> AsyncHttpResponse[None]
-#### 📝 Description +#### 🔌 Usage
@@ -12,14 +11,55 @@
-List all collections for an organization. +```python +from runcaptain import Captain -Returns an array of collection objects with collection_name, collection_id, and document_count. +client = Captain( + organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", +) +client.post_v2collections_collection_name_documents_wipe( + collection_name="collection_name", +) + +``` +
+
+
+
+ +#### ⚙️ Parameters + +
+
+ +
+
+ +**collection_name:** `str` + +
+
+ +
+
+ +**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration. +
+ +
+
+
+ +
client.post_v2datasets_search() -> AsyncHttpResponse[None] +
+
+ #### 🔌 Usage
@@ -32,10 +72,10 @@ Returns an array of collection objects with collection_name, collection_id, and from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) -client.collections.list_collections_v2() +client.post_v2datasets_search() ```
@@ -51,18 +91,66 @@ client.collections.list_collections_v2()
-**limit:** `typing.Optional[int]` — Maximum number of collections to return +**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
+ +
+ + + + +
+ +## Collections +
client.collections.list_collections_v2() -> AsyncHttpResponse[CollectionListResponseV2] +
+
+ +#### 📝 Description
-**offset:** `typing.Optional[int]` — Pagination offset - +
+
+ +List all collections for an organization. + +Returns an array of collection objects with collection_name, collection_id, and document_count.
+
+
+ +#### 🔌 Usage + +
+
+ +
+
+ +```python +from runcaptain import Captain + +client = Captain( + organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", +) +client.collections.list_collections_v2() + +``` +
+
+
+
+ +#### ⚙️ Parameters + +
+
@@ -109,11 +197,11 @@ Create a new collection (idempotent). Returns 201 if created, 200 if already exi from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.collections.create_collection_v2( - collection_name="my_documents", + collection_name="collection_name", ) ``` @@ -130,7 +218,7 @@ client.collections.create_collection_v2(
-**collection_name:** `str` — Name of the collection to create +**collection_name:** `str`
@@ -188,11 +276,11 @@ Delete a collection and all its indexed documents. from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.collections.delete_collection_v2( - collection_name="my_documents", + collection_name="collection_name", ) ``` @@ -209,7 +297,7 @@ client.collections.delete_collection_v2(
-**collection_name:** `str` — Name of the collection to delete +**collection_name:** `str`
@@ -271,12 +359,12 @@ All files, indexed data, and vector embeddings are preserved. The collection's i from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.collections.change_collection_environment_v2( - collection_name="my_documents", - new_environment="production", + collection_name="collection_name", + new_environment="development", ) ``` @@ -293,7 +381,7 @@ client.collections.change_collection_environment_v2(
-**collection_name:** `str` — Name of the collection to move +**collection_name:** `str`
@@ -351,13 +439,12 @@ List all documents in a collection with pagination support. from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.collections.list_documents_v2( - collection_name="my_documents", - limit=100, - offset=0, + collection_name="collection_name", + offset=1, ) ``` @@ -374,15 +461,7 @@ client.collections.list_documents_v2(
-**collection_name:** `str` — Name of the collection - -
-
- -
-
- -**limit:** `typing.Optional[int]` — Maximum number of documents to return +**collection_name:** `str`
@@ -440,8 +519,8 @@ Remove all documents from a collection while keeping the collection structure. from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.collections.wipe_collection_documents_v2( collection_name="collection_name", @@ -511,8 +590,8 @@ Delete a specific document from a collection. from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.collections.delete_document_v2( collection_name="collection_name", @@ -533,7 +612,7 @@ client.collections.delete_document_v2(
-**collection_name:** `str` — Name of the collection +**collection_name:** `str`
@@ -541,7 +620,7 @@ client.collections.delete_document_v2(
-**document_id:** `str` — ID of the document to delete +**document_id:** `str`
@@ -562,7 +641,7 @@ client.collections.delete_document_v2(
## Query -
client.query.collection_v2(...) -> AsyncHttpResponse[QueryResponseV2] +
client.query.collection_v2stream(...) -> typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[QueryStreamEvent]]]
@@ -581,49 +660,36 @@ When `inference=false`, returns raw search results with content and metadata. ## Streaming (SSE) -When `stream: true` and `inference: true`, the JSON response includes a `request_id`. Refer to the sample implementations to best make use of streams. +When `stream: true` and `inference: true`, the response is a Server-Sent Events stream. Every `data:` field is a JSON object with a `type` discriminator. ### SSE Event Types -| Event | Format | Description | -|-------|--------|-------------| -| Text chunk | `data: \n\n` | Incremental text of the AI response. Plain text (not JSON). Newlines within text are escaped as `\n`. | -| Tool start | `event: tool_start\ndata: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"..."}}\n\n` | The AI agent is performing a knowledge base search. The `args.query` field contains the search query. | -| Tool end | `event: tool_end\ndata: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}}\n\n` | A search completed. `tool_call_id` correlates with the preceding `tool_start`. `result_summary.resultCount` indicates how many results were found. | -| Complete | `event: complete\ndata: {"type":"stream_complete"}\n\n` | Stream finished successfully. Close the connection after receiving this. | -| Error | `event: error\ndata: {"type":"stream_error","error":"..."}\n\n` | An error occurred during generation. Close the connection. | +| `type` value | Schema | Description | +|---|---|---| +| `text` | `QueryStreamTextEvent` | Incremental text chunk of the AI response. | +| `tool.start` | `QueryStreamToolStartEvent` | The agent is performing a knowledge-base search. | +| `tool.end` | `QueryStreamToolEndEvent` | A tool call completed. `tool_call_id` correlates with the preceding `tool.start`. | +| `stream_complete` | `QueryStreamCompleteEvent` | Stream finished successfully. Close the connection. | +| `stream_error` | `QueryStreamErrorEvent` | An error occurred. Close the connection. | ### Example SSE Stream ``` -event: tool_start -data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"revenue projections Q4"}} - -event: tool_end -data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}} +data: {"type":"tool.start","seq":1,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","args":{"query":"revenue projections Q4"}} -data: Based on the documents -data: provided, the revenue -data: projections for Q4 show -data: a 15% increase over Q3. +data: {"type":"tool.end","seq":2,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","ok":true,"result_summary":{"resultCount":12}} -event: tool_start -data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_2","args":{"query":"Q3 comparison metrics"}} +data: {"type":"text","content":"Based on the documents"} +data: {"type":"text","content":" provided, the revenue"} +data: {"type":"text","content":" projections for Q4 show"} +data: {"type":"text","content":" a 15% increase over Q3."} -event: tool_end -data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_2","ok":true,"result_summary":{"resultCount":8}} - -data: Compared to Q3, the key -data: drivers were operational -data: efficiency gains. - -event: complete -data: {"type":"stream_complete"} +data: {"type":"stream_complete","metadata":{"totalResults":12},"stats":{"totalTokens":150}} ``` ### Notes -- The agent may perform multiple searches per query. Each search produces a `tool_start`/`tool_end` pair. +- The agent may perform multiple searches per query. Each search produces a `tool.start` / `tool.end` pair. - Text chunks are interleaved between tool events — text arrives after the agent has gathered results from a search. - Connect with `Accept: text/event-stream` and set a generous timeout (120s+) for long responses.
@@ -643,16 +709,15 @@ data: {"type":"stream_complete"} from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) -client.query.collection_v2( - collection_name="my_documents", - query="What are the key terms in the contract?", - inference=True, - stream=True, - rerank=True, +response = client.query.collection_v2stream( + collection_name="collection_name", + query="query", ) +for chunk in response.data: + yield chunk ``` @@ -668,7 +733,7 @@ client.query.collection_v2(
-**collection_name:** `str` — Name of the collection to query +**collection_name:** `str`
@@ -684,7 +749,7 @@ client.query.collection_v2(
-**idempotency_key:** `typing.Optional[str]` — UUID for request deduplication +**inference:** `typing.Optional[bool]` — Enable LLM-generated answers based on the relevant sections retrieved. When false, returns raw search results.
@@ -692,15 +757,157 @@ client.query.collection_v2(
-**inference:** `typing.Optional[bool]` — Enable LLM-generated answers based on the relevant sections retrieved. When false, returns raw search results. +**top_k:** `typing.Optional[int]` — Number of results to return. Only valid when inference=false. Not supported when inference=true (the agent controls its own search strategy). + +
+
+ +
+
+ +**rerank:** `typing.Optional[bool]` — Enable Voyage AI rerank-2.5 reranking for improved relevance ordering. Adds ~100-300ms latency. + +
+
+ +
+
+ +**metadata_filter:** `typing.Optional[typing.Dict[str, typing.Any]]` — Filter expression for vector search. Supports: $eq, $ne, $gt, $gte, $lt, $lte, $in, $nin, $and, $or + +
+
+ +
+
+ +**custom_prompt:** `typing.Optional[str]` — Custom system prompt to override the default RAG prompt when inference=true. Allows customizing how the LLM processes and responds to the query with the retrieved context. + +
+
+ +
+
+ +**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
+ +
+ + + + +
+ +
client.query.collection_v2(...) -> AsyncHttpResponse[typing.Any] +
+
+ +#### 📝 Description + +
+
+ +
+
+ +Execute a natural language query against a collection. + +When `inference=true`, returns an AI-generated response with relevant documents. +When `inference=false`, returns raw search results with content and metadata. + +## Streaming (SSE) + +When `stream: true` and `inference: true`, the response is a Server-Sent Events stream. Every `data:` field is a JSON object with a `type` discriminator. + +### SSE Event Types + +| `type` value | Schema | Description | +|---|---|---| +| `text` | `QueryStreamTextEvent` | Incremental text chunk of the AI response. | +| `tool.start` | `QueryStreamToolStartEvent` | The agent is performing a knowledge-base search. | +| `tool.end` | `QueryStreamToolEndEvent` | A tool call completed. `tool_call_id` correlates with the preceding `tool.start`. | +| `stream_complete` | `QueryStreamCompleteEvent` | Stream finished successfully. Close the connection. | +| `stream_error` | `QueryStreamErrorEvent` | An error occurred. Close the connection. | + +### Example SSE Stream + +``` +data: {"type":"tool.start","seq":1,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","args":{"query":"revenue projections Q4"}} + +data: {"type":"tool.end","seq":2,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","ok":true,"result_summary":{"resultCount":12}} + +data: {"type":"text","content":"Based on the documents"} +data: {"type":"text","content":" provided, the revenue"} +data: {"type":"text","content":" projections for Q4 show"} +data: {"type":"text","content":" a 15% increase over Q3."} + +data: {"type":"stream_complete","metadata":{"totalResults":12},"stats":{"totalTokens":150}} +``` + +### Notes + +- The agent may perform multiple searches per query. Each search produces a `tool.start` / `tool.end` pair. +- Text chunks are interleaved between tool events — text arrives after the agent has gathered results from a search. +- Connect with `Accept: text/event-stream` and set a generous timeout (120s+) for long responses. +
+
+
+
+ +#### 🔌 Usage + +
+
-**stream:** `typing.Optional[bool]` — Enable real-time streaming of the response +```python +from runcaptain import Captain + +client = Captain( + organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", +) +client.query.collection_v2( + collection_name="collection_name", + query="query", +) + +``` +
+
+
+
+ +#### ⚙️ Parameters + +
+
+ +
+
+ +**collection_name:** `str` + +
+
+ +
+
+ +**query:** `str` — The natural language query to search for + +
+
+ +
+
+ +**inference:** `typing.Optional[bool]` — Enable LLM-generated answers based on the relevant sections retrieved. When false, returns raw search results.
@@ -783,14 +990,14 @@ Index all files from an S3 bucket into a collection. Returns a job_id for tracki from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_s3bucket_v2( - collection_name="my_documents", - bucket_name="my-s3-bucket", + collection_name="collection_name", + bucket_name="my-company-docs", aws_access_key_id="AKIAIOSFODNN7EXAMPLE", - aws_secret_access_key="your_secret_key", + aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", bucket_region="us-east-1", processing_type="advanced", ) @@ -809,7 +1016,7 @@ client.indexing.index_s3bucket_v2(
-**collection_name:** `str` — Name of the collection to index into +**collection_name:** `str`
@@ -849,14 +1056,6 @@ client.indexing.index_s3bucket_v2(
-**idempotency_key:** `typing.Optional[str]` — UUID for request deduplication - -
-
- -
-
- **bucket_region:** `typing.Optional[str]` — AWS region where the bucket is located
@@ -931,15 +1130,15 @@ Index a single file from an S3 bucket into a collection. Returns a job_id for tr from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_s3file_v2( - collection_name="my_documents", - bucket_name="my-s3-bucket", - file_uri="s3://my-s3-bucket/contracts/acme_contract.pdf", + collection_name="collection_name", + bucket_name="my-company-docs", + file_uri="s3://my-company-docs/contracts/acme_contract.pdf", aws_access_key_id="AKIAIOSFODNN7EXAMPLE", - aws_secret_access_key="your_secret_key", + aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", bucket_region="us-east-1", processing_type="advanced", ) @@ -958,7 +1157,7 @@ client.indexing.index_s3file_v2(
-**collection_name:** `str` — Name of the collection to index into +**collection_name:** `str`
@@ -1064,12 +1263,12 @@ Index all files from a Google Cloud Storage bucket into a collection. Returns a from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_gcs_bucket_v2( - collection_name="my_documents", - bucket_name="my-gcs-bucket", + collection_name="collection_name", + bucket_name="my-company-docs", service_account_json='{"type":"service_account","project_id":"my-project",...}', processing_type="advanced", ) @@ -1088,7 +1287,7 @@ client.indexing.index_gcs_bucket_v2(
-**collection_name:** `str` — Name of the collection to index into +**collection_name:** `str`
@@ -1186,8 +1385,8 @@ Index a single file from a GCS bucket into a collection. Returns a job_id for tr from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_gcs_file_v2( collection_name="collection_name", @@ -1211,7 +1410,7 @@ client.indexing.index_gcs_file_v2(
-**collection_name:** `str` — Name of the collection to index into +**collection_name:** `str`
@@ -1301,15 +1500,15 @@ Index all files from a specific directory in an S3 bucket into a collection. Use from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_s3directory_v2( - collection_name="my_documents", - bucket_name="my-s3-bucket", + collection_name="collection_name", + bucket_name="my-company-docs", directory_path="reports/2024/january", aws_access_key_id="AKIAIOSFODNN7EXAMPLE", - aws_secret_access_key="your_secret_key", + aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", bucket_region="us-east-1", processing_type="advanced", ) @@ -1328,7 +1527,7 @@ client.indexing.index_s3directory_v2(
-**collection_name:** `str` — Name of the collection to index into +**collection_name:** `str`
@@ -1376,14 +1575,6 @@ client.indexing.index_s3directory_v2(
-**idempotency_key:** `typing.Optional[str]` — UUID for request deduplication - -
-
- -
-
- **bucket_region:** `typing.Optional[str]` — AWS region where the bucket is located
@@ -1458,12 +1649,12 @@ Index all files from a specific directory in a GCS bucket into a collection. Use from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_gcs_directory_v2( - collection_name="my_documents", - bucket_name="my-gcs-bucket", + collection_name="collection_name", + bucket_name="my-company-docs", directory_path="reports/2024/january", service_account_json='{"type":"service_account","project_id":"my-project",...}', processing_type="advanced", @@ -1483,7 +1674,7 @@ client.indexing.index_gcs_directory_v2(
-**collection_name:** `str` — Name of the collection to index into +**collection_name:** `str`
@@ -1523,14 +1714,6 @@ client.indexing.index_gcs_directory_v2(
-**idempotency_key:** `typing.Optional[str]` — UUID for request deduplication - -
-
- -
-
- **max_files:** `typing.Optional[int]` — Maximum number of files to index (optional)
@@ -1597,14 +1780,14 @@ Index all files from an Azure Blob Storage container into a collection. Returns from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_azure_container_v2( - collection_name="my_documents", + collection_name="collection_name", container_name="my-container", account_name="mystorageaccount", - account_key="your_account_key", + account_key="your_account_key_base64", processing_type="advanced", ) @@ -1622,7 +1805,7 @@ client.indexing.index_azure_container_v2(
-**collection_name:** `str` — Name of the collection to index into +**collection_name:** `str`
@@ -1662,14 +1845,6 @@ client.indexing.index_azure_container_v2(
-**idempotency_key:** `typing.Optional[str]` — UUID for request deduplication - -
-
- -
-
- **max_files:** `typing.Optional[int]` — Maximum number of files to index (optional)
@@ -1736,15 +1911,15 @@ Index a single file from an Azure Blob Storage container into a collection. Retu from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_azure_file_v2( - collection_name="my_documents", + collection_name="collection_name", container_name="my-container", file_uri="https://mystorageaccount.blob.core.windows.net/my-container/contracts/acme_contract.pdf", account_name="mystorageaccount", - account_key="your_account_key", + account_key="your_account_key_base64", processing_type="advanced", ) @@ -1762,7 +1937,7 @@ client.indexing.index_azure_file_v2(
-**collection_name:** `str` — Name of the collection to index into +**collection_name:** `str`
@@ -1860,15 +2035,15 @@ Index all files from a specific directory (prefix) in an Azure Blob Storage cont from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_azure_directory_v2( - collection_name="my_documents", + collection_name="collection_name", container_name="my-container", directory_path="reports/2024/january", account_name="mystorageaccount", - account_key="your_account_key", + account_key="your_account_key_base64", processing_type="advanced", ) @@ -1886,7 +2061,7 @@ client.indexing.index_azure_directory_v2(
-**collection_name:** `str` — Name of the collection to index into +**collection_name:** `str`
@@ -1934,14 +2109,6 @@ client.indexing.index_azure_directory_v2(
-**idempotency_key:** `typing.Optional[str]` — UUID for request deduplication - -
-
- -
-
- **max_files:** `typing.Optional[int]` — Maximum number of files to index (optional)
@@ -2034,11 +2201,11 @@ Each file in the `files` array has a status: from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.jobs.get_job_status_v2( - job_id="abc123xyz-1234567890", + job_id="job_id", ) ``` @@ -2055,7 +2222,7 @@ client.jobs.get_job_status_v2(
-**job_id:** `str` — The job ID returned from an indexing request +**job_id:** `str`
@@ -2109,11 +2276,11 @@ Behavior: from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.jobs.cancel_job_v2( - job_id="abc123xyz-1234567890", + job_id="job_id", ) ``` @@ -2130,7 +2297,7 @@ client.jobs.cancel_job_v2(
-**job_id:** `str` — The job ID to cancel +**job_id:** `str`
@@ -2191,12 +2358,13 @@ Returns a list of search results with title, URL, snippet, and date. from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.datasets.search_dataset( dataset="nytimes", - q="", + q="q", + limit=1, ) ``` @@ -2290,12 +2458,12 @@ Returns the full article content in markdown format, along with metadata like ti from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.datasets.get_dataset_article( dataset="nytimes", - url="https://www.washingtonpost.com/example/news_example.html", + url="url", ) ``` diff --git a/src/runcaptain/__init__.py b/src/runcaptain/__init__.py index b0d8097..528fe48 100644 --- a/src/runcaptain/__init__.py +++ b/src/runcaptain/__init__.py @@ -38,6 +38,17 @@ JobStatusResponseV2JobType, JobStatusResponseV2Status, QueryResponseV2, + QueryStreamCompleteEvent, + QueryStreamErrorEvent, + QueryStreamEvent, + QueryStreamEvent_StreamComplete, + QueryStreamEvent_StreamError, + QueryStreamEvent_Text, + QueryStreamEvent_ToolEnd, + QueryStreamEvent_ToolStart, + QueryStreamTextEvent, + QueryStreamToolEndEvent, + QueryStreamToolStartEvent, RelevantDocumentV2, SearchResult, StandardResponseV2, @@ -120,6 +131,17 @@ "JobStatusResponseV2Status": ".types", "NotFoundError": ".errors", "QueryResponseV2": ".types", + "QueryStreamCompleteEvent": ".types", + "QueryStreamErrorEvent": ".types", + "QueryStreamEvent": ".types", + "QueryStreamEvent_StreamComplete": ".types", + "QueryStreamEvent_StreamError": ".types", + "QueryStreamEvent_Text": ".types", + "QueryStreamEvent_ToolEnd": ".types", + "QueryStreamEvent_ToolStart": ".types", + "QueryStreamTextEvent": ".types", + "QueryStreamToolEndEvent": ".types", + "QueryStreamToolStartEvent": ".types", "RelevantDocumentV2": ".types", "SearchDatasetRequestDataset": ".datasets", "SearchResult": ".types", @@ -209,6 +231,17 @@ def __dir__(): "JobStatusResponseV2Status", "NotFoundError", "QueryResponseV2", + "QueryStreamCompleteEvent", + "QueryStreamErrorEvent", + "QueryStreamEvent", + "QueryStreamEvent_StreamComplete", + "QueryStreamEvent_StreamError", + "QueryStreamEvent_Text", + "QueryStreamEvent_ToolEnd", + "QueryStreamEvent_ToolStart", + "QueryStreamTextEvent", + "QueryStreamToolEndEvent", + "QueryStreamToolStartEvent", "RelevantDocumentV2", "SearchDatasetRequestDataset", "SearchResult", diff --git a/src/runcaptain/client.py b/src/runcaptain/client.py index 4c84d40..ff87d3b 100644 --- a/src/runcaptain/client.py +++ b/src/runcaptain/client.py @@ -2,12 +2,16 @@ from __future__ import annotations +import os import typing import httpx +from .core.api_error import ApiError from .core.client_wrapper import AsyncClientWrapper, SyncClientWrapper from .core.logging import LogConfig, Logger +from .core.request_options import RequestOptions from .environment import CaptainEnvironment +from .raw_client import AsyncRawCaptain, RawCaptain if typing.TYPE_CHECKING: from .collections.client import AsyncCollectionsClient, CollectionsClient @@ -35,8 +39,8 @@ class Captain: - authorization : str organization_id : typing.Optional[str] + key : typing.Optional[typing.Union[str, typing.Callable[[], str]]] headers : typing.Optional[typing.Dict[str, str]] Additional headers to send with every request. @@ -57,8 +61,8 @@ class Captain: from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) """ @@ -67,8 +71,8 @@ def __init__( *, base_url: typing.Optional[str] = None, environment: CaptainEnvironment = CaptainEnvironment.DEFAULT, - authorization: str, - organization_id: typing.Optional[str] = None, + organization_id: typing.Optional[str] = os.getenv("CAPTAIN_ORGANIZATION_ID"), + key: typing.Optional[typing.Union[str, typing.Callable[[], str]]] = os.getenv("CAPTAIN_API_KEY"), headers: typing.Optional[typing.Dict[str, str]] = None, timeout: typing.Optional[float] = None, follow_redirects: typing.Optional[bool] = True, @@ -78,10 +82,16 @@ def __init__( _defaulted_timeout = ( timeout if timeout is not None else 60 if httpx_client is None else httpx_client.timeout.read ) + if organization_id is None: + raise ApiError( + body="The client must be instantiated be either passing in organization_id or setting CAPTAIN_ORGANIZATION_ID" + ) + if key is None: + raise ApiError(body="The client must be instantiated be either passing in key or setting CAPTAIN_API_KEY") self._client_wrapper = SyncClientWrapper( base_url=_get_base_url(base_url=base_url, environment=environment), - authorization=authorization, organization_id=organization_id, + key=key, headers=headers, httpx_client=httpx_client if httpx_client is not None @@ -91,12 +101,80 @@ def __init__( timeout=_defaulted_timeout, logging=logging, ) + self._raw_client = RawCaptain(client_wrapper=self._client_wrapper) self._collections: typing.Optional[CollectionsClient] = None self._query: typing.Optional[QueryClient] = None self._indexing: typing.Optional[IndexingClient] = None self._jobs: typing.Optional[JobsClient] = None self._datasets: typing.Optional[DatasetsClient] = None + @property + def with_raw_response(self) -> RawCaptain: + """ + Retrieves a raw implementation of this client that returns raw responses. + + Returns + ------- + RawCaptain + """ + return self._raw_client + + def post_v2collections_collection_name_documents_wipe( + self, collection_name: str, *, request_options: typing.Optional[RequestOptions] = None + ) -> None: + """ + Parameters + ---------- + collection_name : str + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + None + + Examples + -------- + from runcaptain import Captain + + client = Captain( + organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", + ) + client.post_v2collections_collection_name_documents_wipe( + collection_name="collection_name", + ) + """ + _response = self._raw_client.post_v2collections_collection_name_documents_wipe( + collection_name, request_options=request_options + ) + return _response.data + + def post_v2datasets_search(self, *, request_options: typing.Optional[RequestOptions] = None) -> None: + """ + Parameters + ---------- + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + None + + Examples + -------- + from runcaptain import Captain + + client = Captain( + organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", + ) + client.post_v2datasets_search() + """ + _response = self._raw_client.post_v2datasets_search(request_options=request_options) + return _response.data + @property def collections(self): if self._collections is None: @@ -156,8 +234,8 @@ class AsyncCaptain: - authorization : str organization_id : typing.Optional[str] + key : typing.Optional[typing.Union[str, typing.Callable[[], str]]] headers : typing.Optional[typing.Dict[str, str]] Additional headers to send with every request. @@ -178,8 +256,8 @@ class AsyncCaptain: from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) """ @@ -188,8 +266,8 @@ def __init__( *, base_url: typing.Optional[str] = None, environment: CaptainEnvironment = CaptainEnvironment.DEFAULT, - authorization: str, - organization_id: typing.Optional[str] = None, + organization_id: typing.Optional[str] = os.getenv("CAPTAIN_ORGANIZATION_ID"), + key: typing.Optional[typing.Union[str, typing.Callable[[], str]]] = os.getenv("CAPTAIN_API_KEY"), headers: typing.Optional[typing.Dict[str, str]] = None, timeout: typing.Optional[float] = None, follow_redirects: typing.Optional[bool] = True, @@ -199,10 +277,16 @@ def __init__( _defaulted_timeout = ( timeout if timeout is not None else 60 if httpx_client is None else httpx_client.timeout.read ) + if organization_id is None: + raise ApiError( + body="The client must be instantiated be either passing in organization_id or setting CAPTAIN_ORGANIZATION_ID" + ) + if key is None: + raise ApiError(body="The client must be instantiated be either passing in key or setting CAPTAIN_API_KEY") self._client_wrapper = AsyncClientWrapper( base_url=_get_base_url(base_url=base_url, environment=environment), - authorization=authorization, organization_id=organization_id, + key=key, headers=headers, httpx_client=httpx_client if httpx_client is not None @@ -212,12 +296,96 @@ def __init__( timeout=_defaulted_timeout, logging=logging, ) + self._raw_client = AsyncRawCaptain(client_wrapper=self._client_wrapper) self._collections: typing.Optional[AsyncCollectionsClient] = None self._query: typing.Optional[AsyncQueryClient] = None self._indexing: typing.Optional[AsyncIndexingClient] = None self._jobs: typing.Optional[AsyncJobsClient] = None self._datasets: typing.Optional[AsyncDatasetsClient] = None + @property + def with_raw_response(self) -> AsyncRawCaptain: + """ + Retrieves a raw implementation of this client that returns raw responses. + + Returns + ------- + AsyncRawCaptain + """ + return self._raw_client + + async def post_v2collections_collection_name_documents_wipe( + self, collection_name: str, *, request_options: typing.Optional[RequestOptions] = None + ) -> None: + """ + Parameters + ---------- + collection_name : str + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + None + + Examples + -------- + import asyncio + + from runcaptain import AsyncCaptain + + client = AsyncCaptain( + organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", + ) + + + async def main() -> None: + await client.post_v2collections_collection_name_documents_wipe( + collection_name="collection_name", + ) + + + asyncio.run(main()) + """ + _response = await self._raw_client.post_v2collections_collection_name_documents_wipe( + collection_name, request_options=request_options + ) + return _response.data + + async def post_v2datasets_search(self, *, request_options: typing.Optional[RequestOptions] = None) -> None: + """ + Parameters + ---------- + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + None + + Examples + -------- + import asyncio + + from runcaptain import AsyncCaptain + + client = AsyncCaptain( + organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", + ) + + + async def main() -> None: + await client.post_v2datasets_search() + + + asyncio.run(main()) + """ + _response = await self._raw_client.post_v2datasets_search(request_options=request_options) + return _response.data + @property def collections(self): if self._collections is None: diff --git a/src/runcaptain/collections/client.py b/src/runcaptain/collections/client.py index d043171..6fc6efe 100644 --- a/src/runcaptain/collections/client.py +++ b/src/runcaptain/collections/client.py @@ -33,11 +33,7 @@ def with_raw_response(self) -> RawCollectionsClient: return self._raw_client def list_collections_v2( - self, - *, - limit: typing.Optional[int] = None, - offset: typing.Optional[int] = None, - request_options: typing.Optional[RequestOptions] = None, + self, *, request_options: typing.Optional[RequestOptions] = None ) -> CollectionListResponseV2: """ List all collections for an organization. @@ -46,12 +42,6 @@ def list_collections_v2( Parameters ---------- - limit : typing.Optional[int] - Maximum number of collections to return - - offset : typing.Optional[int] - Pagination offset - request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -65,12 +55,12 @@ def list_collections_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.collections.list_collections_v2() """ - _response = self._raw_client.list_collections_v2(limit=limit, offset=offset, request_options=request_options) + _response = self._raw_client.list_collections_v2(request_options=request_options) return _response.data def create_collection_v2( @@ -86,7 +76,6 @@ def create_collection_v2( Parameters ---------- collection_name : str - Name of the collection to create description : typing.Optional[str] @@ -103,11 +92,11 @@ def create_collection_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.collections.create_collection_v2( - collection_name="my_documents", + collection_name="collection_name", ) """ _response = self._raw_client.create_collection_v2( @@ -124,7 +113,6 @@ def delete_collection_v2( Parameters ---------- collection_name : str - Name of the collection to delete request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -139,11 +127,11 @@ def delete_collection_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.collections.delete_collection_v2( - collection_name="my_documents", + collection_name="collection_name", ) """ _response = self._raw_client.delete_collection_v2(collection_name, request_options=request_options) @@ -174,7 +162,6 @@ def change_collection_environment_v2( Parameters ---------- collection_name : str - Name of the collection to move new_environment : ChangeEnvironmentRequestV2NewEnvironment The target environment to move the collection to @@ -192,12 +179,12 @@ def change_collection_environment_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.collections.change_collection_environment_v2( - collection_name="my_documents", - new_environment="production", + collection_name="collection_name", + new_environment="development", ) """ _response = self._raw_client.change_collection_environment_v2( @@ -209,7 +196,6 @@ def list_documents_v2( self, collection_name: str, *, - limit: typing.Optional[int] = None, offset: typing.Optional[int] = None, request_options: typing.Optional[RequestOptions] = None, ) -> DocumentListResponseV2: @@ -219,10 +205,6 @@ def list_documents_v2( Parameters ---------- collection_name : str - Name of the collection - - limit : typing.Optional[int] - Maximum number of documents to return offset : typing.Optional[int] Pagination offset @@ -240,18 +222,15 @@ def list_documents_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.collections.list_documents_v2( - collection_name="my_documents", - limit=100, - offset=0, + collection_name="collection_name", + offset=1, ) """ - _response = self._raw_client.list_documents_v2( - collection_name, limit=limit, offset=offset, request_options=request_options - ) + _response = self._raw_client.list_documents_v2(collection_name, offset=offset, request_options=request_options) return _response.data def wipe_collection_documents_v2( @@ -278,8 +257,8 @@ def wipe_collection_documents_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.collections.wipe_collection_documents_v2( collection_name="collection_name", @@ -297,10 +276,8 @@ def delete_document_v2( Parameters ---------- collection_name : str - Name of the collection document_id : str - ID of the document to delete request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -315,8 +292,8 @@ def delete_document_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.collections.delete_document_v2( collection_name="collection_name", @@ -343,11 +320,7 @@ def with_raw_response(self) -> AsyncRawCollectionsClient: return self._raw_client async def list_collections_v2( - self, - *, - limit: typing.Optional[int] = None, - offset: typing.Optional[int] = None, - request_options: typing.Optional[RequestOptions] = None, + self, *, request_options: typing.Optional[RequestOptions] = None ) -> CollectionListResponseV2: """ List all collections for an organization. @@ -356,12 +329,6 @@ async def list_collections_v2( Parameters ---------- - limit : typing.Optional[int] - Maximum number of collections to return - - offset : typing.Optional[int] - Pagination offset - request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -377,8 +344,8 @@ async def list_collections_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) @@ -388,9 +355,7 @@ async def main() -> None: asyncio.run(main()) """ - _response = await self._raw_client.list_collections_v2( - limit=limit, offset=offset, request_options=request_options - ) + _response = await self._raw_client.list_collections_v2(request_options=request_options) return _response.data async def create_collection_v2( @@ -406,7 +371,6 @@ async def create_collection_v2( Parameters ---------- collection_name : str - Name of the collection to create description : typing.Optional[str] @@ -425,14 +389,14 @@ async def create_collection_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.collections.create_collection_v2( - collection_name="my_documents", + collection_name="collection_name", ) @@ -452,7 +416,6 @@ async def delete_collection_v2( Parameters ---------- collection_name : str - Name of the collection to delete request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -469,14 +432,14 @@ async def delete_collection_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.collections.delete_collection_v2( - collection_name="my_documents", + collection_name="collection_name", ) @@ -510,7 +473,6 @@ async def change_collection_environment_v2( Parameters ---------- collection_name : str - Name of the collection to move new_environment : ChangeEnvironmentRequestV2NewEnvironment The target environment to move the collection to @@ -530,15 +492,15 @@ async def change_collection_environment_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.collections.change_collection_environment_v2( - collection_name="my_documents", - new_environment="production", + collection_name="collection_name", + new_environment="development", ) @@ -553,7 +515,6 @@ async def list_documents_v2( self, collection_name: str, *, - limit: typing.Optional[int] = None, offset: typing.Optional[int] = None, request_options: typing.Optional[RequestOptions] = None, ) -> DocumentListResponseV2: @@ -563,10 +524,6 @@ async def list_documents_v2( Parameters ---------- collection_name : str - Name of the collection - - limit : typing.Optional[int] - Maximum number of documents to return offset : typing.Optional[int] Pagination offset @@ -586,23 +543,22 @@ async def list_documents_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.collections.list_documents_v2( - collection_name="my_documents", - limit=100, - offset=0, + collection_name="collection_name", + offset=1, ) asyncio.run(main()) """ _response = await self._raw_client.list_documents_v2( - collection_name, limit=limit, offset=offset, request_options=request_options + collection_name, offset=offset, request_options=request_options ) return _response.data @@ -632,8 +588,8 @@ async def wipe_collection_documents_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) @@ -659,10 +615,8 @@ async def delete_document_v2( Parameters ---------- collection_name : str - Name of the collection document_id : str - ID of the document to delete request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -679,8 +633,8 @@ async def delete_document_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) diff --git a/src/runcaptain/collections/raw_client.py b/src/runcaptain/collections/raw_client.py index 8db600a..8453bf2 100644 --- a/src/runcaptain/collections/raw_client.py +++ b/src/runcaptain/collections/raw_client.py @@ -29,11 +29,7 @@ def __init__(self, *, client_wrapper: SyncClientWrapper): self._client_wrapper = client_wrapper def list_collections_v2( - self, - *, - limit: typing.Optional[int] = None, - offset: typing.Optional[int] = None, - request_options: typing.Optional[RequestOptions] = None, + self, *, request_options: typing.Optional[RequestOptions] = None ) -> HttpResponse[CollectionListResponseV2]: """ List all collections for an organization. @@ -42,12 +38,6 @@ def list_collections_v2( Parameters ---------- - limit : typing.Optional[int] - Maximum number of collections to return - - offset : typing.Optional[int] - Pagination offset - request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -59,10 +49,6 @@ def list_collections_v2( _response = self._client_wrapper.httpx_client.request( "v2/collections", method="GET", - params={ - "limit": limit, - "offset": offset, - }, request_options=request_options, ) try: @@ -93,7 +79,6 @@ def create_collection_v2( Parameters ---------- collection_name : str - Name of the collection to create description : typing.Optional[str] @@ -141,7 +126,6 @@ def delete_collection_v2( Parameters ---------- collection_name : str - Name of the collection to delete request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -207,7 +191,6 @@ def change_collection_environment_v2( Parameters ---------- collection_name : str - Name of the collection to move new_environment : ChangeEnvironmentRequestV2NewEnvironment The target environment to move the collection to @@ -284,7 +267,6 @@ def list_documents_v2( self, collection_name: str, *, - limit: typing.Optional[int] = None, offset: typing.Optional[int] = None, request_options: typing.Optional[RequestOptions] = None, ) -> HttpResponse[DocumentListResponseV2]: @@ -294,10 +276,6 @@ def list_documents_v2( Parameters ---------- collection_name : str - Name of the collection - - limit : typing.Optional[int] - Maximum number of documents to return offset : typing.Optional[int] Pagination offset @@ -314,7 +292,6 @@ def list_documents_v2( f"v2/collections/{jsonable_encoder(collection_name)}/documents", method="GET", params={ - "limit": limit, "offset": offset, }, request_options=request_options, @@ -382,10 +359,8 @@ def delete_document_v2( Parameters ---------- collection_name : str - Name of the collection document_id : str - ID of the document to delete request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -432,11 +407,7 @@ def __init__(self, *, client_wrapper: AsyncClientWrapper): self._client_wrapper = client_wrapper async def list_collections_v2( - self, - *, - limit: typing.Optional[int] = None, - offset: typing.Optional[int] = None, - request_options: typing.Optional[RequestOptions] = None, + self, *, request_options: typing.Optional[RequestOptions] = None ) -> AsyncHttpResponse[CollectionListResponseV2]: """ List all collections for an organization. @@ -445,12 +416,6 @@ async def list_collections_v2( Parameters ---------- - limit : typing.Optional[int] - Maximum number of collections to return - - offset : typing.Optional[int] - Pagination offset - request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -462,10 +427,6 @@ async def list_collections_v2( _response = await self._client_wrapper.httpx_client.request( "v2/collections", method="GET", - params={ - "limit": limit, - "offset": offset, - }, request_options=request_options, ) try: @@ -496,7 +457,6 @@ async def create_collection_v2( Parameters ---------- collection_name : str - Name of the collection to create description : typing.Optional[str] @@ -544,7 +504,6 @@ async def delete_collection_v2( Parameters ---------- collection_name : str - Name of the collection to delete request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -610,7 +569,6 @@ async def change_collection_environment_v2( Parameters ---------- collection_name : str - Name of the collection to move new_environment : ChangeEnvironmentRequestV2NewEnvironment The target environment to move the collection to @@ -687,7 +645,6 @@ async def list_documents_v2( self, collection_name: str, *, - limit: typing.Optional[int] = None, offset: typing.Optional[int] = None, request_options: typing.Optional[RequestOptions] = None, ) -> AsyncHttpResponse[DocumentListResponseV2]: @@ -697,10 +654,6 @@ async def list_documents_v2( Parameters ---------- collection_name : str - Name of the collection - - limit : typing.Optional[int] - Maximum number of documents to return offset : typing.Optional[int] Pagination offset @@ -717,7 +670,6 @@ async def list_documents_v2( f"v2/collections/{jsonable_encoder(collection_name)}/documents", method="GET", params={ - "limit": limit, "offset": offset, }, request_options=request_options, @@ -785,10 +737,8 @@ async def delete_document_v2( Parameters ---------- collection_name : str - Name of the collection document_id : str - ID of the document to delete request_options : typing.Optional[RequestOptions] Request-specific configuration. diff --git a/src/runcaptain/core/client_wrapper.py b/src/runcaptain/core/client_wrapper.py index 8b7c254..7a3c74a 100644 --- a/src/runcaptain/core/client_wrapper.py +++ b/src/runcaptain/core/client_wrapper.py @@ -11,15 +11,15 @@ class BaseClientWrapper: def __init__( self, *, - authorization: str, - organization_id: typing.Optional[str] = None, + organization_id: str, + key: typing.Union[str, typing.Callable[[], str]], headers: typing.Optional[typing.Dict[str, str]] = None, base_url: str, timeout: typing.Optional[float] = None, logging: typing.Optional[typing.Union[LogConfig, Logger]] = None, ): - self._authorization = authorization self._organization_id = organization_id + self._key = key self._headers = headers self._base_url = base_url self._timeout = timeout @@ -29,19 +29,24 @@ def get_headers(self) -> typing.Dict[str, str]: import platform headers: typing.Dict[str, str] = { - "User-Agent": "captain-sdk/0.0.0", + "User-Agent": "captain-sdk/0.0.1", "X-Fern-Language": "Python", "X-Fern-Runtime": f"python/{platform.python_version()}", "X-Fern-Platform": f"{platform.system().lower()}/{platform.release()}", "X-Fern-SDK-Name": "captain-sdk", - "X-Fern-SDK-Version": "0.0.0", + "X-Fern-SDK-Version": "0.0.16", **(self.get_custom_headers() or {}), } - headers["Authorization"] = self._authorization - if self._organization_id is not None: - headers["X-Organization-ID"] = self._organization_id + headers["X-Organization-ID"] = self._organization_id + headers["Authorization"] = f"Bearer {self._get_key()}" return headers + def _get_key(self) -> str: + if isinstance(self._key, str): + return self._key + else: + return self._key() + def get_custom_headers(self) -> typing.Optional[typing.Dict[str, str]]: return self._headers @@ -56,8 +61,8 @@ class SyncClientWrapper(BaseClientWrapper): def __init__( self, *, - authorization: str, - organization_id: typing.Optional[str] = None, + organization_id: str, + key: typing.Union[str, typing.Callable[[], str]], headers: typing.Optional[typing.Dict[str, str]] = None, base_url: str, timeout: typing.Optional[float] = None, @@ -65,8 +70,8 @@ def __init__( httpx_client: httpx.Client, ): super().__init__( - authorization=authorization, organization_id=organization_id, + key=key, headers=headers, base_url=base_url, timeout=timeout, @@ -85,8 +90,8 @@ class AsyncClientWrapper(BaseClientWrapper): def __init__( self, *, - authorization: str, - organization_id: typing.Optional[str] = None, + organization_id: str, + key: typing.Union[str, typing.Callable[[], str]], headers: typing.Optional[typing.Dict[str, str]] = None, base_url: str, timeout: typing.Optional[float] = None, @@ -95,8 +100,8 @@ def __init__( httpx_client: httpx.AsyncClient, ): super().__init__( - authorization=authorization, organization_id=organization_id, + key=key, headers=headers, base_url=base_url, timeout=timeout, diff --git a/src/runcaptain/datasets/client.py b/src/runcaptain/datasets/client.py index e20d30b..e46bc4d 100644 --- a/src/runcaptain/datasets/client.py +++ b/src/runcaptain/datasets/client.py @@ -71,13 +71,13 @@ def search_dataset( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.datasets.search_dataset( dataset="nytimes", - q="artificial intelligence", - limit=5, + q="q", + limit=1, ) """ _response = self._raw_client.search_dataset(dataset, q=q, limit=limit, request_options=request_options) @@ -125,12 +125,12 @@ def get_dataset_article( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.datasets.get_dataset_article( dataset="nytimes", - url="https://www.nytimes.com/example/news_example.html", + url="url", ) """ _response = self._raw_client.get_dataset_article(dataset, url, request_options=request_options) @@ -199,16 +199,16 @@ async def search_dataset( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.datasets.search_dataset( dataset="nytimes", - q="artificial intelligence", - limit=5, + q="q", + limit=1, ) @@ -261,15 +261,15 @@ async def get_dataset_article( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.datasets.get_dataset_article( dataset="nytimes", - url="https://www.nytimes.com/example/news_example.html", + url="url", ) diff --git a/src/runcaptain/indexing/client.py b/src/runcaptain/indexing/client.py index 41b804c..73d31ae 100644 --- a/src/runcaptain/indexing/client.py +++ b/src/runcaptain/indexing/client.py @@ -43,7 +43,6 @@ def index_s3bucket_v2( aws_access_key_id: str, aws_secret_access_key: str, processing_type: IndexS3RequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, bucket_region: typing.Optional[str] = OMIT, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, @@ -56,7 +55,6 @@ def index_s3bucket_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the S3 bucket @@ -70,9 +68,6 @@ def index_s3bucket_v2( processing_type : IndexS3RequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - bucket_region : typing.Optional[str] AWS region where the bucket is located @@ -98,14 +93,14 @@ def index_s3bucket_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_s3bucket_v2( - collection_name="my_documents", - bucket_name="my-s3-bucket", + collection_name="collection_name", + bucket_name="my-company-docs", aws_access_key_id="AKIAIOSFODNN7EXAMPLE", - aws_secret_access_key="your_secret_key", + aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", bucket_region="us-east-1", processing_type="advanced", ) @@ -116,7 +111,6 @@ def index_s3bucket_v2( aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, processing_type=processing_type, - idempotency_key=idempotency_key, bucket_region=bucket_region, max_files=max_files, skip_existing=skip_existing, @@ -144,7 +138,6 @@ def index_s3file_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the S3 bucket @@ -180,15 +173,15 @@ def index_s3file_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_s3file_v2( - collection_name="my_documents", - bucket_name="my-s3-bucket", - file_uri="s3://my-s3-bucket/contracts/acme_contract.pdf", + collection_name="collection_name", + bucket_name="my-company-docs", + file_uri="s3://my-company-docs/contracts/acme_contract.pdf", aws_access_key_id="AKIAIOSFODNN7EXAMPLE", - aws_secret_access_key="your_secret_key", + aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", bucket_region="us-east-1", processing_type="advanced", ) @@ -224,7 +217,6 @@ def index_gcs_bucket_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the GCS bucket @@ -257,12 +249,12 @@ def index_gcs_bucket_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_gcs_bucket_v2( - collection_name="my_documents", - bucket_name="my-gcs-bucket", + collection_name="collection_name", + bucket_name="my-company-docs", service_account_json='{"type":"service_account","project_id":"my-project",...}', processing_type="advanced", ) @@ -296,7 +288,6 @@ def index_gcs_file_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the GCS bucket @@ -326,8 +317,8 @@ def index_gcs_file_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_gcs_file_v2( collection_name="collection_name", @@ -357,7 +348,6 @@ def index_s3directory_v2( aws_access_key_id: str, aws_secret_access_key: str, processing_type: IndexS3DirectoryRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, bucket_region: typing.Optional[str] = OMIT, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, @@ -370,7 +360,6 @@ def index_s3directory_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the S3 bucket @@ -387,9 +376,6 @@ def index_s3directory_v2( processing_type : IndexS3DirectoryRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - bucket_region : typing.Optional[str] AWS region where the bucket is located @@ -415,15 +401,15 @@ def index_s3directory_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_s3directory_v2( - collection_name="my_documents", - bucket_name="my-s3-bucket", + collection_name="collection_name", + bucket_name="my-company-docs", directory_path="reports/2024/january", aws_access_key_id="AKIAIOSFODNN7EXAMPLE", - aws_secret_access_key="your_secret_key", + aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", bucket_region="us-east-1", processing_type="advanced", ) @@ -435,7 +421,6 @@ def index_s3directory_v2( aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, processing_type=processing_type, - idempotency_key=idempotency_key, bucket_region=bucket_region, max_files=max_files, skip_existing=skip_existing, @@ -452,7 +437,6 @@ def index_gcs_directory_v2( directory_path: str, service_account_json: str, processing_type: IndexGcsDirectoryRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, @@ -464,7 +448,6 @@ def index_gcs_directory_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the GCS bucket @@ -478,9 +461,6 @@ def index_gcs_directory_v2( processing_type : IndexGcsDirectoryRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - max_files : typing.Optional[int] Maximum number of files to index (optional) @@ -503,12 +483,12 @@ def index_gcs_directory_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_gcs_directory_v2( - collection_name="my_documents", - bucket_name="my-gcs-bucket", + collection_name="collection_name", + bucket_name="my-company-docs", directory_path="reports/2024/january", service_account_json='{"type":"service_account","project_id":"my-project",...}', processing_type="advanced", @@ -520,7 +500,6 @@ def index_gcs_directory_v2( directory_path=directory_path, service_account_json=service_account_json, processing_type=processing_type, - idempotency_key=idempotency_key, max_files=max_files, skip_existing=skip_existing, custom_metadata=custom_metadata, @@ -536,7 +515,6 @@ def index_azure_container_v2( account_name: str, account_key: str, processing_type: IndexAzureRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, @@ -548,7 +526,6 @@ def index_azure_container_v2( Parameters ---------- collection_name : str - Name of the collection to index into container_name : str Name of the Azure Blob Storage container @@ -562,9 +539,6 @@ def index_azure_container_v2( processing_type : IndexAzureRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - max_files : typing.Optional[int] Maximum number of files to index (optional) @@ -587,14 +561,14 @@ def index_azure_container_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_azure_container_v2( - collection_name="my_documents", + collection_name="collection_name", container_name="my-container", account_name="mystorageaccount", - account_key="your_account_key", + account_key="your_account_key_base64", processing_type="advanced", ) """ @@ -604,7 +578,6 @@ def index_azure_container_v2( account_name=account_name, account_key=account_key, processing_type=processing_type, - idempotency_key=idempotency_key, max_files=max_files, skip_existing=skip_existing, custom_metadata=custom_metadata, @@ -630,7 +603,6 @@ def index_azure_file_v2( Parameters ---------- collection_name : str - Name of the collection to index into container_name : str Name of the Azure Blob Storage container @@ -663,15 +635,15 @@ def index_azure_file_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_azure_file_v2( - collection_name="my_documents", + collection_name="collection_name", container_name="my-container", file_uri="https://mystorageaccount.blob.core.windows.net/my-container/contracts/acme_contract.pdf", account_name="mystorageaccount", - account_key="your_account_key", + account_key="your_account_key_base64", processing_type="advanced", ) """ @@ -696,7 +668,6 @@ def index_azure_directory_v2( account_name: str, account_key: str, processing_type: IndexAzureDirectoryRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, @@ -708,7 +679,6 @@ def index_azure_directory_v2( Parameters ---------- collection_name : str - Name of the collection to index into container_name : str Name of the Azure Blob Storage container @@ -725,9 +695,6 @@ def index_azure_directory_v2( processing_type : IndexAzureDirectoryRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - max_files : typing.Optional[int] Maximum number of files to index (optional) @@ -750,15 +717,15 @@ def index_azure_directory_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.indexing.index_azure_directory_v2( - collection_name="my_documents", + collection_name="collection_name", container_name="my-container", directory_path="reports/2024/january", account_name="mystorageaccount", - account_key="your_account_key", + account_key="your_account_key_base64", processing_type="advanced", ) """ @@ -769,7 +736,6 @@ def index_azure_directory_v2( account_name=account_name, account_key=account_key, processing_type=processing_type, - idempotency_key=idempotency_key, max_files=max_files, skip_existing=skip_existing, custom_metadata=custom_metadata, @@ -801,7 +767,6 @@ async def index_s3bucket_v2( aws_access_key_id: str, aws_secret_access_key: str, processing_type: IndexS3RequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, bucket_region: typing.Optional[str] = OMIT, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, @@ -814,7 +779,6 @@ async def index_s3bucket_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the S3 bucket @@ -828,9 +792,6 @@ async def index_s3bucket_v2( processing_type : IndexS3RequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - bucket_region : typing.Optional[str] AWS region where the bucket is located @@ -858,17 +819,17 @@ async def index_s3bucket_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.indexing.index_s3bucket_v2( - collection_name="my_documents", - bucket_name="my-s3-bucket", + collection_name="collection_name", + bucket_name="my-company-docs", aws_access_key_id="AKIAIOSFODNN7EXAMPLE", - aws_secret_access_key="your_secret_key", + aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", bucket_region="us-east-1", processing_type="advanced", ) @@ -882,7 +843,6 @@ async def main() -> None: aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, processing_type=processing_type, - idempotency_key=idempotency_key, bucket_region=bucket_region, max_files=max_files, skip_existing=skip_existing, @@ -910,7 +870,6 @@ async def index_s3file_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the S3 bucket @@ -948,18 +907,18 @@ async def index_s3file_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.indexing.index_s3file_v2( - collection_name="my_documents", - bucket_name="my-s3-bucket", - file_uri="s3://my-s3-bucket/contracts/acme_contract.pdf", + collection_name="collection_name", + bucket_name="my-company-docs", + file_uri="s3://my-company-docs/contracts/acme_contract.pdf", aws_access_key_id="AKIAIOSFODNN7EXAMPLE", - aws_secret_access_key="your_secret_key", + aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", bucket_region="us-east-1", processing_type="advanced", ) @@ -998,7 +957,6 @@ async def index_gcs_bucket_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the GCS bucket @@ -1033,15 +991,15 @@ async def index_gcs_bucket_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.indexing.index_gcs_bucket_v2( - collection_name="my_documents", - bucket_name="my-gcs-bucket", + collection_name="collection_name", + bucket_name="my-company-docs", service_account_json='{"type":"service_account","project_id":"my-project",...}', processing_type="advanced", ) @@ -1078,7 +1036,6 @@ async def index_gcs_file_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the GCS bucket @@ -1110,8 +1067,8 @@ async def index_gcs_file_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) @@ -1147,7 +1104,6 @@ async def index_s3directory_v2( aws_access_key_id: str, aws_secret_access_key: str, processing_type: IndexS3DirectoryRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, bucket_region: typing.Optional[str] = OMIT, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, @@ -1160,7 +1116,6 @@ async def index_s3directory_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the S3 bucket @@ -1177,9 +1132,6 @@ async def index_s3directory_v2( processing_type : IndexS3DirectoryRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - bucket_region : typing.Optional[str] AWS region where the bucket is located @@ -1207,18 +1159,18 @@ async def index_s3directory_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.indexing.index_s3directory_v2( - collection_name="my_documents", - bucket_name="my-s3-bucket", + collection_name="collection_name", + bucket_name="my-company-docs", directory_path="reports/2024/january", aws_access_key_id="AKIAIOSFODNN7EXAMPLE", - aws_secret_access_key="your_secret_key", + aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", bucket_region="us-east-1", processing_type="advanced", ) @@ -1233,7 +1185,6 @@ async def main() -> None: aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, processing_type=processing_type, - idempotency_key=idempotency_key, bucket_region=bucket_region, max_files=max_files, skip_existing=skip_existing, @@ -1250,7 +1201,6 @@ async def index_gcs_directory_v2( directory_path: str, service_account_json: str, processing_type: IndexGcsDirectoryRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, @@ -1262,7 +1212,6 @@ async def index_gcs_directory_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the GCS bucket @@ -1276,9 +1225,6 @@ async def index_gcs_directory_v2( processing_type : IndexGcsDirectoryRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - max_files : typing.Optional[int] Maximum number of files to index (optional) @@ -1303,15 +1249,15 @@ async def index_gcs_directory_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.indexing.index_gcs_directory_v2( - collection_name="my_documents", - bucket_name="my-gcs-bucket", + collection_name="collection_name", + bucket_name="my-company-docs", directory_path="reports/2024/january", service_account_json='{"type":"service_account","project_id":"my-project",...}', processing_type="advanced", @@ -1326,7 +1272,6 @@ async def main() -> None: directory_path=directory_path, service_account_json=service_account_json, processing_type=processing_type, - idempotency_key=idempotency_key, max_files=max_files, skip_existing=skip_existing, custom_metadata=custom_metadata, @@ -1342,7 +1287,6 @@ async def index_azure_container_v2( account_name: str, account_key: str, processing_type: IndexAzureRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, @@ -1354,7 +1298,6 @@ async def index_azure_container_v2( Parameters ---------- collection_name : str - Name of the collection to index into container_name : str Name of the Azure Blob Storage container @@ -1368,9 +1311,6 @@ async def index_azure_container_v2( processing_type : IndexAzureRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - max_files : typing.Optional[int] Maximum number of files to index (optional) @@ -1395,17 +1335,17 @@ async def index_azure_container_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.indexing.index_azure_container_v2( - collection_name="my_documents", + collection_name="collection_name", container_name="my-container", account_name="mystorageaccount", - account_key="your_account_key", + account_key="your_account_key_base64", processing_type="advanced", ) @@ -1418,7 +1358,6 @@ async def main() -> None: account_name=account_name, account_key=account_key, processing_type=processing_type, - idempotency_key=idempotency_key, max_files=max_files, skip_existing=skip_existing, custom_metadata=custom_metadata, @@ -1444,7 +1383,6 @@ async def index_azure_file_v2( Parameters ---------- collection_name : str - Name of the collection to index into container_name : str Name of the Azure Blob Storage container @@ -1479,18 +1417,18 @@ async def index_azure_file_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.indexing.index_azure_file_v2( - collection_name="my_documents", + collection_name="collection_name", container_name="my-container", file_uri="https://mystorageaccount.blob.core.windows.net/my-container/contracts/acme_contract.pdf", account_name="mystorageaccount", - account_key="your_account_key", + account_key="your_account_key_base64", processing_type="advanced", ) @@ -1518,7 +1456,6 @@ async def index_azure_directory_v2( account_name: str, account_key: str, processing_type: IndexAzureDirectoryRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, @@ -1530,7 +1467,6 @@ async def index_azure_directory_v2( Parameters ---------- collection_name : str - Name of the collection to index into container_name : str Name of the Azure Blob Storage container @@ -1547,9 +1483,6 @@ async def index_azure_directory_v2( processing_type : IndexAzureDirectoryRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - max_files : typing.Optional[int] Maximum number of files to index (optional) @@ -1574,18 +1507,18 @@ async def index_azure_directory_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.indexing.index_azure_directory_v2( - collection_name="my_documents", + collection_name="collection_name", container_name="my-container", directory_path="reports/2024/january", account_name="mystorageaccount", - account_key="your_account_key", + account_key="your_account_key_base64", processing_type="advanced", ) @@ -1599,7 +1532,6 @@ async def main() -> None: account_name=account_name, account_key=account_key, processing_type=processing_type, - idempotency_key=idempotency_key, max_files=max_files, skip_existing=skip_existing, custom_metadata=custom_metadata, diff --git a/src/runcaptain/indexing/raw_client.py b/src/runcaptain/indexing/raw_client.py index bb8f09b..db63203 100644 --- a/src/runcaptain/indexing/raw_client.py +++ b/src/runcaptain/indexing/raw_client.py @@ -36,7 +36,6 @@ def index_s3bucket_v2( aws_access_key_id: str, aws_secret_access_key: str, processing_type: IndexS3RequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, bucket_region: typing.Optional[str] = OMIT, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, @@ -49,7 +48,6 @@ def index_s3bucket_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the S3 bucket @@ -63,9 +61,6 @@ def index_s3bucket_v2( processing_type : IndexS3RequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - bucket_region : typing.Optional[str] AWS region where the bucket is located @@ -101,7 +96,6 @@ def index_s3bucket_v2( }, headers={ "content-type": "application/json", - "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None, }, request_options=request_options, omit=OMIT, @@ -140,7 +134,6 @@ def index_s3file_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the S3 bucket @@ -222,7 +215,6 @@ def index_gcs_bucket_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the GCS bucket @@ -299,7 +291,6 @@ def index_gcs_file_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the GCS bucket @@ -364,7 +355,6 @@ def index_s3directory_v2( aws_access_key_id: str, aws_secret_access_key: str, processing_type: IndexS3DirectoryRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, bucket_region: typing.Optional[str] = OMIT, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, @@ -377,7 +367,6 @@ def index_s3directory_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the S3 bucket @@ -394,9 +383,6 @@ def index_s3directory_v2( processing_type : IndexS3DirectoryRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - bucket_region : typing.Optional[str] AWS region where the bucket is located @@ -433,7 +419,6 @@ def index_s3directory_v2( }, headers={ "content-type": "application/json", - "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None, }, request_options=request_options, omit=OMIT, @@ -461,7 +446,6 @@ def index_gcs_directory_v2( directory_path: str, service_account_json: str, processing_type: IndexGcsDirectoryRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, @@ -473,7 +457,6 @@ def index_gcs_directory_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the GCS bucket @@ -487,9 +470,6 @@ def index_gcs_directory_v2( processing_type : IndexGcsDirectoryRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - max_files : typing.Optional[int] Maximum number of files to index (optional) @@ -521,7 +501,6 @@ def index_gcs_directory_v2( }, headers={ "content-type": "application/json", - "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None, }, request_options=request_options, omit=OMIT, @@ -549,7 +528,6 @@ def index_azure_container_v2( account_name: str, account_key: str, processing_type: IndexAzureRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, @@ -561,7 +539,6 @@ def index_azure_container_v2( Parameters ---------- collection_name : str - Name of the collection to index into container_name : str Name of the Azure Blob Storage container @@ -575,9 +552,6 @@ def index_azure_container_v2( processing_type : IndexAzureRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - max_files : typing.Optional[int] Maximum number of files to index (optional) @@ -609,7 +583,6 @@ def index_azure_container_v2( }, headers={ "content-type": "application/json", - "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None, }, request_options=request_options, omit=OMIT, @@ -647,7 +620,6 @@ def index_azure_file_v2( Parameters ---------- collection_name : str - Name of the collection to index into container_name : str Name of the Azure Blob Storage container @@ -716,7 +688,6 @@ def index_azure_directory_v2( account_name: str, account_key: str, processing_type: IndexAzureDirectoryRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, @@ -728,7 +699,6 @@ def index_azure_directory_v2( Parameters ---------- collection_name : str - Name of the collection to index into container_name : str Name of the Azure Blob Storage container @@ -745,9 +715,6 @@ def index_azure_directory_v2( processing_type : IndexAzureDirectoryRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - max_files : typing.Optional[int] Maximum number of files to index (optional) @@ -780,7 +747,6 @@ def index_azure_directory_v2( }, headers={ "content-type": "application/json", - "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None, }, request_options=request_options, omit=OMIT, @@ -813,7 +779,6 @@ async def index_s3bucket_v2( aws_access_key_id: str, aws_secret_access_key: str, processing_type: IndexS3RequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, bucket_region: typing.Optional[str] = OMIT, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, @@ -826,7 +791,6 @@ async def index_s3bucket_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the S3 bucket @@ -840,9 +804,6 @@ async def index_s3bucket_v2( processing_type : IndexS3RequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - bucket_region : typing.Optional[str] AWS region where the bucket is located @@ -878,7 +839,6 @@ async def index_s3bucket_v2( }, headers={ "content-type": "application/json", - "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None, }, request_options=request_options, omit=OMIT, @@ -917,7 +877,6 @@ async def index_s3file_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the S3 bucket @@ -999,7 +958,6 @@ async def index_gcs_bucket_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the GCS bucket @@ -1076,7 +1034,6 @@ async def index_gcs_file_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the GCS bucket @@ -1141,7 +1098,6 @@ async def index_s3directory_v2( aws_access_key_id: str, aws_secret_access_key: str, processing_type: IndexS3DirectoryRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, bucket_region: typing.Optional[str] = OMIT, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, @@ -1154,7 +1110,6 @@ async def index_s3directory_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the S3 bucket @@ -1171,9 +1126,6 @@ async def index_s3directory_v2( processing_type : IndexS3DirectoryRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - bucket_region : typing.Optional[str] AWS region where the bucket is located @@ -1210,7 +1162,6 @@ async def index_s3directory_v2( }, headers={ "content-type": "application/json", - "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None, }, request_options=request_options, omit=OMIT, @@ -1238,7 +1189,6 @@ async def index_gcs_directory_v2( directory_path: str, service_account_json: str, processing_type: IndexGcsDirectoryRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, @@ -1250,7 +1200,6 @@ async def index_gcs_directory_v2( Parameters ---------- collection_name : str - Name of the collection to index into bucket_name : str Name of the GCS bucket @@ -1264,9 +1213,6 @@ async def index_gcs_directory_v2( processing_type : IndexGcsDirectoryRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - max_files : typing.Optional[int] Maximum number of files to index (optional) @@ -1298,7 +1244,6 @@ async def index_gcs_directory_v2( }, headers={ "content-type": "application/json", - "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None, }, request_options=request_options, omit=OMIT, @@ -1326,7 +1271,6 @@ async def index_azure_container_v2( account_name: str, account_key: str, processing_type: IndexAzureRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, @@ -1338,7 +1282,6 @@ async def index_azure_container_v2( Parameters ---------- collection_name : str - Name of the collection to index into container_name : str Name of the Azure Blob Storage container @@ -1352,9 +1295,6 @@ async def index_azure_container_v2( processing_type : IndexAzureRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - max_files : typing.Optional[int] Maximum number of files to index (optional) @@ -1386,7 +1326,6 @@ async def index_azure_container_v2( }, headers={ "content-type": "application/json", - "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None, }, request_options=request_options, omit=OMIT, @@ -1424,7 +1363,6 @@ async def index_azure_file_v2( Parameters ---------- collection_name : str - Name of the collection to index into container_name : str Name of the Azure Blob Storage container @@ -1493,7 +1431,6 @@ async def index_azure_directory_v2( account_name: str, account_key: str, processing_type: IndexAzureDirectoryRequestV2ProcessingType, - idempotency_key: typing.Optional[str] = None, max_files: typing.Optional[int] = OMIT, skip_existing: typing.Optional[bool] = OMIT, custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, @@ -1505,7 +1442,6 @@ async def index_azure_directory_v2( Parameters ---------- collection_name : str - Name of the collection to index into container_name : str Name of the Azure Blob Storage container @@ -1522,9 +1458,6 @@ async def index_azure_directory_v2( processing_type : IndexAzureDirectoryRequestV2ProcessingType Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing. - idempotency_key : typing.Optional[str] - UUID for request deduplication - max_files : typing.Optional[int] Maximum number of files to index (optional) @@ -1557,7 +1490,6 @@ async def index_azure_directory_v2( }, headers={ "content-type": "application/json", - "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None, }, request_options=request_options, omit=OMIT, diff --git a/src/runcaptain/jobs/client.py b/src/runcaptain/jobs/client.py index cd021dd..10f5a5b 100644 --- a/src/runcaptain/jobs/client.py +++ b/src/runcaptain/jobs/client.py @@ -58,7 +58,6 @@ def get_job_status_v2( Parameters ---------- job_id : str - The job ID returned from an indexing request request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -73,11 +72,11 @@ def get_job_status_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.jobs.get_job_status_v2( - job_id="abc123xyz-1234567890", + job_id="job_id", ) """ _response = self._raw_client.get_job_status_v2(job_id, request_options=request_options) @@ -96,7 +95,6 @@ def cancel_job_v2( Parameters ---------- job_id : str - The job ID to cancel request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -111,11 +109,11 @@ def cancel_job_v2( from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.jobs.cancel_job_v2( - job_id="abc123xyz-1234567890", + job_id="job_id", ) """ _response = self._raw_client.cancel_job_v2(job_id, request_options=request_options) @@ -171,7 +169,6 @@ async def get_job_status_v2( Parameters ---------- job_id : str - The job ID returned from an indexing request request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -188,14 +185,14 @@ async def get_job_status_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.jobs.get_job_status_v2( - job_id="abc123xyz-1234567890", + job_id="job_id", ) @@ -217,7 +214,6 @@ async def cancel_job_v2( Parameters ---------- job_id : str - The job ID to cancel request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -234,14 +230,14 @@ async def cancel_job_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.jobs.cancel_job_v2( - job_id="abc123xyz-1234567890", + job_id="job_id", ) diff --git a/src/runcaptain/jobs/raw_client.py b/src/runcaptain/jobs/raw_client.py index 8f15a15..74cd595 100644 --- a/src/runcaptain/jobs/raw_client.py +++ b/src/runcaptain/jobs/raw_client.py @@ -52,7 +52,6 @@ def get_job_status_v2( Parameters ---------- job_id : str - The job ID returned from an indexing request request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -106,7 +105,6 @@ def cancel_job_v2( Parameters ---------- job_id : str - The job ID to cancel request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -175,7 +173,6 @@ async def get_job_status_v2( Parameters ---------- job_id : str - The job ID returned from an indexing request request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -229,7 +226,6 @@ async def cancel_job_v2( Parameters ---------- job_id : str - The job ID to cancel request_options : typing.Optional[RequestOptions] Request-specific configuration. diff --git a/src/runcaptain/query/client.py b/src/runcaptain/query/client.py index a923999..f73849a 100644 --- a/src/runcaptain/query/client.py +++ b/src/runcaptain/query/client.py @@ -4,7 +4,7 @@ from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper from ..core.request_options import RequestOptions -from ..types.query_response_v2 import QueryResponseV2 +from ..types.query_stream_event import QueryStreamEvent from .raw_client import AsyncRawQueryClient, RawQueryClient # this is used as the default value for optional parameters @@ -26,20 +26,18 @@ def with_raw_response(self) -> RawQueryClient: """ return self._raw_client - def collection_v2( + def collection_v2stream( self, collection_name: str, *, query: str, - idempotency_key: typing.Optional[str] = None, inference: typing.Optional[bool] = OMIT, - stream: typing.Optional[bool] = OMIT, top_k: typing.Optional[int] = OMIT, rerank: typing.Optional[bool] = OMIT, metadata_filter: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, custom_prompt: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, - ) -> QueryResponseV2: + ) -> typing.Iterator[QueryStreamEvent]: """ Execute a natural language query against a collection. @@ -48,69 +46,159 @@ def collection_v2( ## Streaming (SSE) - When `stream: true` and `inference: true`, the JSON response includes a `request_id`. Refer to the sample implementations to best make use of streams. + When `stream: true` and `inference: true`, the response is a Server-Sent Events stream. Every `data:` field is a JSON object with a `type` discriminator. ### SSE Event Types - | Event | Format | Description | - |-------|--------|-------------| - | Text chunk | `data: \\n\\n` | Incremental text of the AI response. Plain text (not JSON). Newlines within text are escaped as `\\n`. | - | Tool start | `event: tool_start\\ndata: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"..."}}\\n\\n` | The AI agent is performing a knowledge base search. The `args.query` field contains the search query. | - | Tool end | `event: tool_end\\ndata: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}}\\n\\n` | A search completed. `tool_call_id` correlates with the preceding `tool_start`. `result_summary.resultCount` indicates how many results were found. | - | Complete | `event: complete\\ndata: {"type":"stream_complete"}\\n\\n` | Stream finished successfully. Close the connection after receiving this. | - | Error | `event: error\\ndata: {"type":"stream_error","error":"..."}\\n\\n` | An error occurred during generation. Close the connection. | + | `type` value | Schema | Description | + |---|---|---| + | `text` | `QueryStreamTextEvent` | Incremental text chunk of the AI response. | + | `tool.start` | `QueryStreamToolStartEvent` | The agent is performing a knowledge-base search. | + | `tool.end` | `QueryStreamToolEndEvent` | A tool call completed. `tool_call_id` correlates with the preceding `tool.start`. | + | `stream_complete` | `QueryStreamCompleteEvent` | Stream finished successfully. Close the connection. | + | `stream_error` | `QueryStreamErrorEvent` | An error occurred. Close the connection. | ### Example SSE Stream ``` - event: tool_start - data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"revenue projections Q4"}} + data: {"type":"tool.start","seq":1,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","args":{"query":"revenue projections Q4"}} + + data: {"type":"tool.end","seq":2,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","ok":true,"result_summary":{"resultCount":12}} + + data: {"type":"text","content":"Based on the documents"} + data: {"type":"text","content":" provided, the revenue"} + data: {"type":"text","content":" projections for Q4 show"} + data: {"type":"text","content":" a 15% increase over Q3."} + + data: {"type":"stream_complete","metadata":{"totalResults":12},"stats":{"totalTokens":150}} + ``` + + ### Notes + + - The agent may perform multiple searches per query. Each search produces a `tool.start` / `tool.end` pair. + - Text chunks are interleaved between tool events — text arrives after the agent has gathered results from a search. + - Connect with `Accept: text/event-stream` and set a generous timeout (120s+) for long responses. + + Parameters + ---------- + collection_name : str + + query : str + The natural language query to search for + + inference : typing.Optional[bool] + Enable LLM-generated answers based on the relevant sections retrieved. When false, returns raw search results. + + top_k : typing.Optional[int] + Number of results to return. Only valid when inference=false. Not supported when inference=true (the agent controls its own search strategy). + + rerank : typing.Optional[bool] + Enable Voyage AI rerank-2.5 reranking for improved relevance ordering. Adds ~100-300ms latency. + + metadata_filter : typing.Optional[typing.Dict[str, typing.Any]] + Filter expression for vector search. Supports: $eq, $ne, $gt, $gte, $lt, $lte, $in, $nin, $and, $or + + custom_prompt : typing.Optional[str] + Custom system prompt to override the default RAG prompt when inference=true. Allows customizing how the LLM processes and responds to the query with the retrieved context. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Yields + ------ + typing.Iterator[QueryStreamEvent] + + + Examples + -------- + from runcaptain import Captain + + client = Captain( + organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", + ) + response = client.query.collection_v2stream( + collection_name="collection_name", + query="query", + ) + for chunk in response: + yield chunk + """ + with self._raw_client.collection_v2stream( + collection_name, + query=query, + inference=inference, + top_k=top_k, + rerank=rerank, + metadata_filter=metadata_filter, + custom_prompt=custom_prompt, + request_options=request_options, + ) as r: + yield from r.data - event: tool_end - data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}} + def collection_v2( + self, + collection_name: str, + *, + query: str, + inference: typing.Optional[bool] = OMIT, + top_k: typing.Optional[int] = OMIT, + rerank: typing.Optional[bool] = OMIT, + metadata_filter: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, + custom_prompt: typing.Optional[str] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> typing.Any: + """ + Execute a natural language query against a collection. - data: Based on the documents - data: provided, the revenue - data: projections for Q4 show - data: a 15% increase over Q3. + When `inference=true`, returns an AI-generated response with relevant documents. + When `inference=false`, returns raw search results with content and metadata. - event: tool_start - data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_2","args":{"query":"Q3 comparison metrics"}} + ## Streaming (SSE) - event: tool_end - data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_2","ok":true,"result_summary":{"resultCount":8}} + When `stream: true` and `inference: true`, the response is a Server-Sent Events stream. Every `data:` field is a JSON object with a `type` discriminator. - data: Compared to Q3, the key - data: drivers were operational - data: efficiency gains. + ### SSE Event Types + + | `type` value | Schema | Description | + |---|---|---| + | `text` | `QueryStreamTextEvent` | Incremental text chunk of the AI response. | + | `tool.start` | `QueryStreamToolStartEvent` | The agent is performing a knowledge-base search. | + | `tool.end` | `QueryStreamToolEndEvent` | A tool call completed. `tool_call_id` correlates with the preceding `tool.start`. | + | `stream_complete` | `QueryStreamCompleteEvent` | Stream finished successfully. Close the connection. | + | `stream_error` | `QueryStreamErrorEvent` | An error occurred. Close the connection. | + + ### Example SSE Stream - event: complete - data: {"type":"stream_complete"} + ``` + data: {"type":"tool.start","seq":1,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","args":{"query":"revenue projections Q4"}} + + data: {"type":"tool.end","seq":2,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","ok":true,"result_summary":{"resultCount":12}} + + data: {"type":"text","content":"Based on the documents"} + data: {"type":"text","content":" provided, the revenue"} + data: {"type":"text","content":" projections for Q4 show"} + data: {"type":"text","content":" a 15% increase over Q3."} + + data: {"type":"stream_complete","metadata":{"totalResults":12},"stats":{"totalTokens":150}} ``` ### Notes - - The agent may perform multiple searches per query. Each search produces a `tool_start`/`tool_end` pair. + - The agent may perform multiple searches per query. Each search produces a `tool.start` / `tool.end` pair. - Text chunks are interleaved between tool events — text arrives after the agent has gathered results from a search. - Connect with `Accept: text/event-stream` and set a generous timeout (120s+) for long responses. Parameters ---------- collection_name : str - Name of the collection to query query : str The natural language query to search for - idempotency_key : typing.Optional[str] - UUID for request deduplication - inference : typing.Optional[bool] Enable LLM-generated answers based on the relevant sections retrieved. When false, returns raw search results. - stream : typing.Optional[bool] - Enable real-time streaming of the response - top_k : typing.Optional[int] Number of results to return. Only valid when inference=false. Not supported when inference=true (the agent controls its own search strategy). @@ -128,32 +216,26 @@ def collection_v2( Returns ------- - QueryResponseV2 - Successful Response — returns JSON when `stream: false`, or SSE event stream when `stream: true`. + typing.Any + Examples -------- from runcaptain import Captain client = Captain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) client.query.collection_v2( - collection_name="my_documents", - query="What are the key terms in the contract?", - inference=False, - stream=False, - top_k=10, - rerank=True, + collection_name="collection_name", + query="query", ) """ _response = self._raw_client.collection_v2( collection_name, query=query, - idempotency_key=idempotency_key, inference=inference, - stream=stream, top_k=top_k, rerank=rerank, metadata_filter=metadata_filter, @@ -178,20 +260,18 @@ def with_raw_response(self) -> AsyncRawQueryClient: """ return self._raw_client - async def collection_v2( + async def collection_v2stream( self, collection_name: str, *, query: str, - idempotency_key: typing.Optional[str] = None, inference: typing.Optional[bool] = OMIT, - stream: typing.Optional[bool] = OMIT, top_k: typing.Optional[int] = OMIT, rerank: typing.Optional[bool] = OMIT, metadata_filter: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, custom_prompt: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, - ) -> QueryResponseV2: + ) -> typing.AsyncIterator[QueryStreamEvent]: """ Execute a natural language query against a collection. @@ -200,69 +280,168 @@ async def collection_v2( ## Streaming (SSE) - When `stream: true` and `inference: true`, the JSON response includes a `request_id`. Refer to the sample implementations to best make use of streams. + When `stream: true` and `inference: true`, the response is a Server-Sent Events stream. Every `data:` field is a JSON object with a `type` discriminator. ### SSE Event Types - | Event | Format | Description | - |-------|--------|-------------| - | Text chunk | `data: \\n\\n` | Incremental text of the AI response. Plain text (not JSON). Newlines within text are escaped as `\\n`. | - | Tool start | `event: tool_start\\ndata: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"..."}}\\n\\n` | The AI agent is performing a knowledge base search. The `args.query` field contains the search query. | - | Tool end | `event: tool_end\\ndata: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}}\\n\\n` | A search completed. `tool_call_id` correlates with the preceding `tool_start`. `result_summary.resultCount` indicates how many results were found. | - | Complete | `event: complete\\ndata: {"type":"stream_complete"}\\n\\n` | Stream finished successfully. Close the connection after receiving this. | - | Error | `event: error\\ndata: {"type":"stream_error","error":"..."}\\n\\n` | An error occurred during generation. Close the connection. | + | `type` value | Schema | Description | + |---|---|---| + | `text` | `QueryStreamTextEvent` | Incremental text chunk of the AI response. | + | `tool.start` | `QueryStreamToolStartEvent` | The agent is performing a knowledge-base search. | + | `tool.end` | `QueryStreamToolEndEvent` | A tool call completed. `tool_call_id` correlates with the preceding `tool.start`. | + | `stream_complete` | `QueryStreamCompleteEvent` | Stream finished successfully. Close the connection. | + | `stream_error` | `QueryStreamErrorEvent` | An error occurred. Close the connection. | ### Example SSE Stream ``` - event: tool_start - data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"revenue projections Q4"}} + data: {"type":"tool.start","seq":1,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","args":{"query":"revenue projections Q4"}} + + data: {"type":"tool.end","seq":2,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","ok":true,"result_summary":{"resultCount":12}} + + data: {"type":"text","content":"Based on the documents"} + data: {"type":"text","content":" provided, the revenue"} + data: {"type":"text","content":" projections for Q4 show"} + data: {"type":"text","content":" a 15% increase over Q3."} + + data: {"type":"stream_complete","metadata":{"totalResults":12},"stats":{"totalTokens":150}} + ``` + + ### Notes + + - The agent may perform multiple searches per query. Each search produces a `tool.start` / `tool.end` pair. + - Text chunks are interleaved between tool events — text arrives after the agent has gathered results from a search. + - Connect with `Accept: text/event-stream` and set a generous timeout (120s+) for long responses. + + Parameters + ---------- + collection_name : str + + query : str + The natural language query to search for + + inference : typing.Optional[bool] + Enable LLM-generated answers based on the relevant sections retrieved. When false, returns raw search results. + + top_k : typing.Optional[int] + Number of results to return. Only valid when inference=false. Not supported when inference=true (the agent controls its own search strategy). + + rerank : typing.Optional[bool] + Enable Voyage AI rerank-2.5 reranking for improved relevance ordering. Adds ~100-300ms latency. + + metadata_filter : typing.Optional[typing.Dict[str, typing.Any]] + Filter expression for vector search. Supports: $eq, $ne, $gt, $gte, $lt, $lte, $in, $nin, $and, $or + + custom_prompt : typing.Optional[str] + Custom system prompt to override the default RAG prompt when inference=true. Allows customizing how the LLM processes and responds to the query with the retrieved context. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Yields + ------ + typing.AsyncIterator[QueryStreamEvent] + + + Examples + -------- + import asyncio + + from runcaptain import AsyncCaptain + + client = AsyncCaptain( + organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", + ) - event: tool_end - data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}} - data: Based on the documents - data: provided, the revenue - data: projections for Q4 show - data: a 15% increase over Q3. + async def main() -> None: + response = await client.query.collection_v2stream( + collection_name="collection_name", + query="query", + ) + async for chunk in response: + yield chunk - event: tool_start - data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_2","args":{"query":"Q3 comparison metrics"}} - event: tool_end - data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_2","ok":true,"result_summary":{"resultCount":8}} + asyncio.run(main()) + """ + async with self._raw_client.collection_v2stream( + collection_name, + query=query, + inference=inference, + top_k=top_k, + rerank=rerank, + metadata_filter=metadata_filter, + custom_prompt=custom_prompt, + request_options=request_options, + ) as r: + async for _chunk in r.data: + yield _chunk - data: Compared to Q3, the key - data: drivers were operational - data: efficiency gains. + async def collection_v2( + self, + collection_name: str, + *, + query: str, + inference: typing.Optional[bool] = OMIT, + top_k: typing.Optional[int] = OMIT, + rerank: typing.Optional[bool] = OMIT, + metadata_filter: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, + custom_prompt: typing.Optional[str] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> typing.Any: + """ + Execute a natural language query against a collection. + + When `inference=true`, returns an AI-generated response with relevant documents. + When `inference=false`, returns raw search results with content and metadata. + + ## Streaming (SSE) + + When `stream: true` and `inference: true`, the response is a Server-Sent Events stream. Every `data:` field is a JSON object with a `type` discriminator. + + ### SSE Event Types - event: complete - data: {"type":"stream_complete"} + | `type` value | Schema | Description | + |---|---|---| + | `text` | `QueryStreamTextEvent` | Incremental text chunk of the AI response. | + | `tool.start` | `QueryStreamToolStartEvent` | The agent is performing a knowledge-base search. | + | `tool.end` | `QueryStreamToolEndEvent` | A tool call completed. `tool_call_id` correlates with the preceding `tool.start`. | + | `stream_complete` | `QueryStreamCompleteEvent` | Stream finished successfully. Close the connection. | + | `stream_error` | `QueryStreamErrorEvent` | An error occurred. Close the connection. | + + ### Example SSE Stream + + ``` + data: {"type":"tool.start","seq":1,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","args":{"query":"revenue projections Q4"}} + + data: {"type":"tool.end","seq":2,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","ok":true,"result_summary":{"resultCount":12}} + + data: {"type":"text","content":"Based on the documents"} + data: {"type":"text","content":" provided, the revenue"} + data: {"type":"text","content":" projections for Q4 show"} + data: {"type":"text","content":" a 15% increase over Q3."} + + data: {"type":"stream_complete","metadata":{"totalResults":12},"stats":{"totalTokens":150}} ``` ### Notes - - The agent may perform multiple searches per query. Each search produces a `tool_start`/`tool_end` pair. + - The agent may perform multiple searches per query. Each search produces a `tool.start` / `tool.end` pair. - Text chunks are interleaved between tool events — text arrives after the agent has gathered results from a search. - Connect with `Accept: text/event-stream` and set a generous timeout (120s+) for long responses. Parameters ---------- collection_name : str - Name of the collection to query query : str The natural language query to search for - idempotency_key : typing.Optional[str] - UUID for request deduplication - inference : typing.Optional[bool] Enable LLM-generated answers based on the relevant sections retrieved. When false, returns raw search results. - stream : typing.Optional[bool] - Enable real-time streaming of the response - top_k : typing.Optional[int] Number of results to return. Only valid when inference=false. Not supported when inference=true (the agent controls its own search strategy). @@ -280,8 +459,8 @@ async def collection_v2( Returns ------- - QueryResponseV2 - Successful Response — returns JSON when `stream: false`, or SSE event stream when `stream: true`. + typing.Any + Examples -------- @@ -290,19 +469,15 @@ async def collection_v2( from runcaptain import AsyncCaptain client = AsyncCaptain( - authorization="YOUR_AUTHORIZATION", organization_id="YOUR_ORGANIZATION_ID", + key="YOUR_KEY", ) async def main() -> None: await client.query.collection_v2( - collection_name="my_documents", - query="What are the key terms in the contract?", - inference=False, - stream=False, - top_k=10, - rerank=True, + collection_name="collection_name", + query="query", ) @@ -311,9 +486,7 @@ async def main() -> None: _response = await self._raw_client.collection_v2( collection_name, query=query, - idempotency_key=idempotency_key, inference=inference, - stream=stream, top_k=top_k, rerank=rerank, metadata_filter=metadata_filter, diff --git a/src/runcaptain/query/raw_client.py b/src/runcaptain/query/raw_client.py index 1c53391..8b56e1e 100644 --- a/src/runcaptain/query/raw_client.py +++ b/src/runcaptain/query/raw_client.py @@ -1,15 +1,18 @@ # This file was auto-generated by Fern from our API Definition. +import contextlib import typing from json.decoder import JSONDecodeError +from logging import error, warning from ..core.api_error import ApiError from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper from ..core.http_response import AsyncHttpResponse, HttpResponse +from ..core.http_sse._api import EventSource from ..core.jsonable_encoder import jsonable_encoder -from ..core.pydantic_utilities import parse_obj_as +from ..core.pydantic_utilities import parse_obj_as, parse_sse_obj from ..core.request_options import RequestOptions -from ..types.query_response_v2 import QueryResponseV2 +from ..types.query_stream_event import QueryStreamEvent # this is used as the default value for optional parameters OMIT = typing.cast(typing.Any, ...) @@ -19,20 +22,19 @@ class RawQueryClient: def __init__(self, *, client_wrapper: SyncClientWrapper): self._client_wrapper = client_wrapper - def collection_v2( + @contextlib.contextmanager + def collection_v2stream( self, collection_name: str, *, query: str, - idempotency_key: typing.Optional[str] = None, inference: typing.Optional[bool] = OMIT, - stream: typing.Optional[bool] = OMIT, top_k: typing.Optional[int] = OMIT, rerank: typing.Optional[bool] = OMIT, metadata_filter: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, custom_prompt: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, - ) -> HttpResponse[QueryResponseV2]: + ) -> typing.Iterator[HttpResponse[typing.Iterator[QueryStreamEvent]]]: """ Execute a natural language query against a collection. @@ -41,69 +43,191 @@ def collection_v2( ## Streaming (SSE) - When `stream: true` and `inference: true`, the JSON response includes a `request_id`. Refer to the sample implementations to best make use of streams. + When `stream: true` and `inference: true`, the response is a Server-Sent Events stream. Every `data:` field is a JSON object with a `type` discriminator. ### SSE Event Types - | Event | Format | Description | - |-------|--------|-------------| - | Text chunk | `data: \\n\\n` | Incremental text of the AI response. Plain text (not JSON). Newlines within text are escaped as `\\n`. | - | Tool start | `event: tool_start\\ndata: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"..."}}\\n\\n` | The AI agent is performing a knowledge base search. The `args.query` field contains the search query. | - | Tool end | `event: tool_end\\ndata: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}}\\n\\n` | A search completed. `tool_call_id` correlates with the preceding `tool_start`. `result_summary.resultCount` indicates how many results were found. | - | Complete | `event: complete\\ndata: {"type":"stream_complete"}\\n\\n` | Stream finished successfully. Close the connection after receiving this. | - | Error | `event: error\\ndata: {"type":"stream_error","error":"..."}\\n\\n` | An error occurred during generation. Close the connection. | + | `type` value | Schema | Description | + |---|---|---| + | `text` | `QueryStreamTextEvent` | Incremental text chunk of the AI response. | + | `tool.start` | `QueryStreamToolStartEvent` | The agent is performing a knowledge-base search. | + | `tool.end` | `QueryStreamToolEndEvent` | A tool call completed. `tool_call_id` correlates with the preceding `tool.start`. | + | `stream_complete` | `QueryStreamCompleteEvent` | Stream finished successfully. Close the connection. | + | `stream_error` | `QueryStreamErrorEvent` | An error occurred. Close the connection. | ### Example SSE Stream ``` - event: tool_start - data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"revenue projections Q4"}} + data: {"type":"tool.start","seq":1,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","args":{"query":"revenue projections Q4"}} + + data: {"type":"tool.end","seq":2,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","ok":true,"result_summary":{"resultCount":12}} + + data: {"type":"text","content":"Based on the documents"} + data: {"type":"text","content":" provided, the revenue"} + data: {"type":"text","content":" projections for Q4 show"} + data: {"type":"text","content":" a 15% increase over Q3."} + + data: {"type":"stream_complete","metadata":{"totalResults":12},"stats":{"totalTokens":150}} + ``` + + ### Notes + + - The agent may perform multiple searches per query. Each search produces a `tool.start` / `tool.end` pair. + - Text chunks are interleaved between tool events — text arrives after the agent has gathered results from a search. + - Connect with `Accept: text/event-stream` and set a generous timeout (120s+) for long responses. + + Parameters + ---------- + collection_name : str + + query : str + The natural language query to search for + + inference : typing.Optional[bool] + Enable LLM-generated answers based on the relevant sections retrieved. When false, returns raw search results. + + top_k : typing.Optional[int] + Number of results to return. Only valid when inference=false. Not supported when inference=true (the agent controls its own search strategy). + + rerank : typing.Optional[bool] + Enable Voyage AI rerank-2.5 reranking for improved relevance ordering. Adds ~100-300ms latency. + + metadata_filter : typing.Optional[typing.Dict[str, typing.Any]] + Filter expression for vector search. Supports: $eq, $ne, $gt, $gte, $lt, $lte, $in, $nin, $and, $or + + custom_prompt : typing.Optional[str] + Custom system prompt to override the default RAG prompt when inference=true. Allows customizing how the LLM processes and responds to the query with the retrieved context. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Yields + ------ + typing.Iterator[HttpResponse[typing.Iterator[QueryStreamEvent]]] + + """ + with self._client_wrapper.httpx_client.stream( + f"v2/collections/{jsonable_encoder(collection_name)}/query", + method="POST", + json={ + "query": query, + "inference": inference, + "top_k": top_k, + "rerank": rerank, + "metadata_filter": metadata_filter, + "custom_prompt": custom_prompt, + "stream": True, + }, + headers={ + "content-type": "application/json", + }, + request_options=request_options, + omit=OMIT, + ) as _response: + + def _stream() -> HttpResponse[typing.Iterator[QueryStreamEvent]]: + try: + if 200 <= _response.status_code < 300: + + def _iter(): + _event_source = EventSource(_response) + for _sse in _event_source.iter_sse(): + if _sse.data == None: + return + try: + yield typing.cast( + QueryStreamEvent, + parse_sse_obj( + sse=_sse, + type_=QueryStreamEvent, # type: ignore + ), + ) + except JSONDecodeError as e: + warning(f"Skipping SSE event with invalid JSON: {e}, sse: {_sse!r}") + except (TypeError, ValueError, KeyError, AttributeError) as e: + warning( + f"Skipping SSE event due to model construction error: {type(e).__name__}: {e}, sse: {_sse!r}" + ) + except Exception as e: + error( + f"Unexpected error processing SSE event: {type(e).__name__}: {e}, sse: {_sse!r}" + ) + return + + return HttpResponse(response=_response, data=_iter()) + _response.read() + _response_json = _response.json() + except JSONDecodeError: + raise ApiError( + status_code=_response.status_code, headers=dict(_response.headers), body=_response.text + ) + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) + + yield _stream() + + def collection_v2( + self, + collection_name: str, + *, + query: str, + inference: typing.Optional[bool] = OMIT, + top_k: typing.Optional[int] = OMIT, + rerank: typing.Optional[bool] = OMIT, + metadata_filter: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, + custom_prompt: typing.Optional[str] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> HttpResponse[typing.Any]: + """ + Execute a natural language query against a collection. + + When `inference=true`, returns an AI-generated response with relevant documents. + When `inference=false`, returns raw search results with content and metadata. - event: tool_end - data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}} + ## Streaming (SSE) + + When `stream: true` and `inference: true`, the response is a Server-Sent Events stream. Every `data:` field is a JSON object with a `type` discriminator. + + ### SSE Event Types + + | `type` value | Schema | Description | + |---|---|---| + | `text` | `QueryStreamTextEvent` | Incremental text chunk of the AI response. | + | `tool.start` | `QueryStreamToolStartEvent` | The agent is performing a knowledge-base search. | + | `tool.end` | `QueryStreamToolEndEvent` | A tool call completed. `tool_call_id` correlates with the preceding `tool.start`. | + | `stream_complete` | `QueryStreamCompleteEvent` | Stream finished successfully. Close the connection. | + | `stream_error` | `QueryStreamErrorEvent` | An error occurred. Close the connection. | - data: Based on the documents - data: provided, the revenue - data: projections for Q4 show - data: a 15% increase over Q3. + ### Example SSE Stream - event: tool_start - data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_2","args":{"query":"Q3 comparison metrics"}} + ``` + data: {"type":"tool.start","seq":1,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","args":{"query":"revenue projections Q4"}} - event: tool_end - data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_2","ok":true,"result_summary":{"resultCount":8}} + data: {"type":"tool.end","seq":2,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","ok":true,"result_summary":{"resultCount":12}} - data: Compared to Q3, the key - data: drivers were operational - data: efficiency gains. + data: {"type":"text","content":"Based on the documents"} + data: {"type":"text","content":" provided, the revenue"} + data: {"type":"text","content":" projections for Q4 show"} + data: {"type":"text","content":" a 15% increase over Q3."} - event: complete - data: {"type":"stream_complete"} + data: {"type":"stream_complete","metadata":{"totalResults":12},"stats":{"totalTokens":150}} ``` ### Notes - - The agent may perform multiple searches per query. Each search produces a `tool_start`/`tool_end` pair. + - The agent may perform multiple searches per query. Each search produces a `tool.start` / `tool.end` pair. - Text chunks are interleaved between tool events — text arrives after the agent has gathered results from a search. - Connect with `Accept: text/event-stream` and set a generous timeout (120s+) for long responses. Parameters ---------- collection_name : str - Name of the collection to query query : str The natural language query to search for - idempotency_key : typing.Optional[str] - UUID for request deduplication - inference : typing.Optional[bool] Enable LLM-generated answers based on the relevant sections retrieved. When false, returns raw search results. - stream : typing.Optional[bool] - Enable real-time streaming of the response - top_k : typing.Optional[int] Number of results to return. Only valid when inference=false. Not supported when inference=true (the agent controls its own search strategy). @@ -121,8 +245,8 @@ def collection_v2( Returns ------- - HttpResponse[QueryResponseV2] - Successful Response — returns JSON when `stream: false`, or SSE event stream when `stream: true`. + HttpResponse[typing.Any] + """ _response = self._client_wrapper.httpx_client.request( f"v2/collections/{jsonable_encoder(collection_name)}/query", @@ -130,25 +254,26 @@ def collection_v2( json={ "query": query, "inference": inference, - "stream": stream, "top_k": top_k, "rerank": rerank, "metadata_filter": metadata_filter, "custom_prompt": custom_prompt, + "stream": False, }, headers={ "content-type": "application/json", - "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None, }, request_options=request_options, omit=OMIT, ) try: + if _response is None or not _response.text.strip(): + return HttpResponse(response=_response, data=None) if 200 <= _response.status_code < 300: _data = typing.cast( - QueryResponseV2, + typing.Any, parse_obj_as( - type_=QueryResponseV2, # type: ignore + type_=typing.Any, # type: ignore object_=_response.json(), ), ) @@ -163,20 +288,19 @@ class AsyncRawQueryClient: def __init__(self, *, client_wrapper: AsyncClientWrapper): self._client_wrapper = client_wrapper - async def collection_v2( + @contextlib.asynccontextmanager + async def collection_v2stream( self, collection_name: str, *, query: str, - idempotency_key: typing.Optional[str] = None, inference: typing.Optional[bool] = OMIT, - stream: typing.Optional[bool] = OMIT, top_k: typing.Optional[int] = OMIT, rerank: typing.Optional[bool] = OMIT, metadata_filter: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, custom_prompt: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, - ) -> AsyncHttpResponse[QueryResponseV2]: + ) -> typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[QueryStreamEvent]]]: """ Execute a natural language query against a collection. @@ -185,69 +309,191 @@ async def collection_v2( ## Streaming (SSE) - When `stream: true` and `inference: true`, the JSON response includes a `request_id`. Refer to the sample implementations to best make use of streams. + When `stream: true` and `inference: true`, the response is a Server-Sent Events stream. Every `data:` field is a JSON object with a `type` discriminator. ### SSE Event Types - | Event | Format | Description | - |-------|--------|-------------| - | Text chunk | `data: \\n\\n` | Incremental text of the AI response. Plain text (not JSON). Newlines within text are escaped as `\\n`. | - | Tool start | `event: tool_start\\ndata: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"..."}}\\n\\n` | The AI agent is performing a knowledge base search. The `args.query` field contains the search query. | - | Tool end | `event: tool_end\\ndata: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}}\\n\\n` | A search completed. `tool_call_id` correlates with the preceding `tool_start`. `result_summary.resultCount` indicates how many results were found. | - | Complete | `event: complete\\ndata: {"type":"stream_complete"}\\n\\n` | Stream finished successfully. Close the connection after receiving this. | - | Error | `event: error\\ndata: {"type":"stream_error","error":"..."}\\n\\n` | An error occurred during generation. Close the connection. | + | `type` value | Schema | Description | + |---|---|---| + | `text` | `QueryStreamTextEvent` | Incremental text chunk of the AI response. | + | `tool.start` | `QueryStreamToolStartEvent` | The agent is performing a knowledge-base search. | + | `tool.end` | `QueryStreamToolEndEvent` | A tool call completed. `tool_call_id` correlates with the preceding `tool.start`. | + | `stream_complete` | `QueryStreamCompleteEvent` | Stream finished successfully. Close the connection. | + | `stream_error` | `QueryStreamErrorEvent` | An error occurred. Close the connection. | ### Example SSE Stream ``` - event: tool_start - data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"revenue projections Q4"}} + data: {"type":"tool.start","seq":1,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","args":{"query":"revenue projections Q4"}} + + data: {"type":"tool.end","seq":2,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","ok":true,"result_summary":{"resultCount":12}} + + data: {"type":"text","content":"Based on the documents"} + data: {"type":"text","content":" provided, the revenue"} + data: {"type":"text","content":" projections for Q4 show"} + data: {"type":"text","content":" a 15% increase over Q3."} + + data: {"type":"stream_complete","metadata":{"totalResults":12},"stats":{"totalTokens":150}} + ``` + + ### Notes + + - The agent may perform multiple searches per query. Each search produces a `tool.start` / `tool.end` pair. + - Text chunks are interleaved between tool events — text arrives after the agent has gathered results from a search. + - Connect with `Accept: text/event-stream` and set a generous timeout (120s+) for long responses. + + Parameters + ---------- + collection_name : str + + query : str + The natural language query to search for + + inference : typing.Optional[bool] + Enable LLM-generated answers based on the relevant sections retrieved. When false, returns raw search results. + + top_k : typing.Optional[int] + Number of results to return. Only valid when inference=false. Not supported when inference=true (the agent controls its own search strategy). + + rerank : typing.Optional[bool] + Enable Voyage AI rerank-2.5 reranking for improved relevance ordering. Adds ~100-300ms latency. + + metadata_filter : typing.Optional[typing.Dict[str, typing.Any]] + Filter expression for vector search. Supports: $eq, $ne, $gt, $gte, $lt, $lte, $in, $nin, $and, $or + + custom_prompt : typing.Optional[str] + Custom system prompt to override the default RAG prompt when inference=true. Allows customizing how the LLM processes and responds to the query with the retrieved context. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Yields + ------ + typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[QueryStreamEvent]]] + + """ + async with self._client_wrapper.httpx_client.stream( + f"v2/collections/{jsonable_encoder(collection_name)}/query", + method="POST", + json={ + "query": query, + "inference": inference, + "top_k": top_k, + "rerank": rerank, + "metadata_filter": metadata_filter, + "custom_prompt": custom_prompt, + "stream": True, + }, + headers={ + "content-type": "application/json", + }, + request_options=request_options, + omit=OMIT, + ) as _response: + + async def _stream() -> AsyncHttpResponse[typing.AsyncIterator[QueryStreamEvent]]: + try: + if 200 <= _response.status_code < 300: + + async def _iter(): + _event_source = EventSource(_response) + async for _sse in _event_source.aiter_sse(): + if _sse.data == None: + return + try: + yield typing.cast( + QueryStreamEvent, + parse_sse_obj( + sse=_sse, + type_=QueryStreamEvent, # type: ignore + ), + ) + except JSONDecodeError as e: + warning(f"Skipping SSE event with invalid JSON: {e}, sse: {_sse!r}") + except (TypeError, ValueError, KeyError, AttributeError) as e: + warning( + f"Skipping SSE event due to model construction error: {type(e).__name__}: {e}, sse: {_sse!r}" + ) + except Exception as e: + error( + f"Unexpected error processing SSE event: {type(e).__name__}: {e}, sse: {_sse!r}" + ) + return + + return AsyncHttpResponse(response=_response, data=_iter()) + await _response.aread() + _response_json = _response.json() + except JSONDecodeError: + raise ApiError( + status_code=_response.status_code, headers=dict(_response.headers), body=_response.text + ) + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) + + yield await _stream() + + async def collection_v2( + self, + collection_name: str, + *, + query: str, + inference: typing.Optional[bool] = OMIT, + top_k: typing.Optional[int] = OMIT, + rerank: typing.Optional[bool] = OMIT, + metadata_filter: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, + custom_prompt: typing.Optional[str] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> AsyncHttpResponse[typing.Any]: + """ + Execute a natural language query against a collection. + + When `inference=true`, returns an AI-generated response with relevant documents. + When `inference=false`, returns raw search results with content and metadata. - event: tool_end - data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}} + ## Streaming (SSE) + + When `stream: true` and `inference: true`, the response is a Server-Sent Events stream. Every `data:` field is a JSON object with a `type` discriminator. + + ### SSE Event Types + + | `type` value | Schema | Description | + |---|---|---| + | `text` | `QueryStreamTextEvent` | Incremental text chunk of the AI response. | + | `tool.start` | `QueryStreamToolStartEvent` | The agent is performing a knowledge-base search. | + | `tool.end` | `QueryStreamToolEndEvent` | A tool call completed. `tool_call_id` correlates with the preceding `tool.start`. | + | `stream_complete` | `QueryStreamCompleteEvent` | Stream finished successfully. Close the connection. | + | `stream_error` | `QueryStreamErrorEvent` | An error occurred. Close the connection. | - data: Based on the documents - data: provided, the revenue - data: projections for Q4 show - data: a 15% increase over Q3. + ### Example SSE Stream - event: tool_start - data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_2","args":{"query":"Q3 comparison metrics"}} + ``` + data: {"type":"tool.start","seq":1,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","args":{"query":"revenue projections Q4"}} - event: tool_end - data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_2","ok":true,"result_summary":{"resultCount":8}} + data: {"type":"tool.end","seq":2,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","ok":true,"result_summary":{"resultCount":12}} - data: Compared to Q3, the key - data: drivers were operational - data: efficiency gains. + data: {"type":"text","content":"Based on the documents"} + data: {"type":"text","content":" provided, the revenue"} + data: {"type":"text","content":" projections for Q4 show"} + data: {"type":"text","content":" a 15% increase over Q3."} - event: complete - data: {"type":"stream_complete"} + data: {"type":"stream_complete","metadata":{"totalResults":12},"stats":{"totalTokens":150}} ``` ### Notes - - The agent may perform multiple searches per query. Each search produces a `tool_start`/`tool_end` pair. + - The agent may perform multiple searches per query. Each search produces a `tool.start` / `tool.end` pair. - Text chunks are interleaved between tool events — text arrives after the agent has gathered results from a search. - Connect with `Accept: text/event-stream` and set a generous timeout (120s+) for long responses. Parameters ---------- collection_name : str - Name of the collection to query query : str The natural language query to search for - idempotency_key : typing.Optional[str] - UUID for request deduplication - inference : typing.Optional[bool] Enable LLM-generated answers based on the relevant sections retrieved. When false, returns raw search results. - stream : typing.Optional[bool] - Enable real-time streaming of the response - top_k : typing.Optional[int] Number of results to return. Only valid when inference=false. Not supported when inference=true (the agent controls its own search strategy). @@ -265,8 +511,8 @@ async def collection_v2( Returns ------- - AsyncHttpResponse[QueryResponseV2] - Successful Response — returns JSON when `stream: false`, or SSE event stream when `stream: true`. + AsyncHttpResponse[typing.Any] + """ _response = await self._client_wrapper.httpx_client.request( f"v2/collections/{jsonable_encoder(collection_name)}/query", @@ -274,25 +520,26 @@ async def collection_v2( json={ "query": query, "inference": inference, - "stream": stream, "top_k": top_k, "rerank": rerank, "metadata_filter": metadata_filter, "custom_prompt": custom_prompt, + "stream": False, }, headers={ "content-type": "application/json", - "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None, }, request_options=request_options, omit=OMIT, ) try: + if _response is None or not _response.text.strip(): + return AsyncHttpResponse(response=_response, data=None) if 200 <= _response.status_code < 300: _data = typing.cast( - QueryResponseV2, + typing.Any, parse_obj_as( - type_=QueryResponseV2, # type: ignore + type_=typing.Any, # type: ignore object_=_response.json(), ), ) diff --git a/src/runcaptain/raw_client.py b/src/runcaptain/raw_client.py new file mode 100644 index 0000000..b59019a --- /dev/null +++ b/src/runcaptain/raw_client.py @@ -0,0 +1,126 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing +from json.decoder import JSONDecodeError + +from .core.api_error import ApiError +from .core.client_wrapper import AsyncClientWrapper, SyncClientWrapper +from .core.http_response import AsyncHttpResponse, HttpResponse +from .core.jsonable_encoder import jsonable_encoder +from .core.request_options import RequestOptions + + +class RawCaptain: + def __init__(self, *, client_wrapper: SyncClientWrapper): + self._client_wrapper = client_wrapper + + def post_v2collections_collection_name_documents_wipe( + self, collection_name: str, *, request_options: typing.Optional[RequestOptions] = None + ) -> HttpResponse[None]: + """ + Parameters + ---------- + collection_name : str + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + HttpResponse[None] + """ + _response = self._client_wrapper.httpx_client.request( + f"v2/collections/{jsonable_encoder(collection_name)}/documents/wipe", + method="POST", + request_options=request_options, + ) + try: + if 200 <= _response.status_code < 300: + return HttpResponse(response=_response, data=None) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) + + def post_v2datasets_search(self, *, request_options: typing.Optional[RequestOptions] = None) -> HttpResponse[None]: + """ + Parameters + ---------- + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + HttpResponse[None] + """ + _response = self._client_wrapper.httpx_client.request( + "v2/datasets/search", + method="POST", + request_options=request_options, + ) + try: + if 200 <= _response.status_code < 300: + return HttpResponse(response=_response, data=None) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) + + +class AsyncRawCaptain: + def __init__(self, *, client_wrapper: AsyncClientWrapper): + self._client_wrapper = client_wrapper + + async def post_v2collections_collection_name_documents_wipe( + self, collection_name: str, *, request_options: typing.Optional[RequestOptions] = None + ) -> AsyncHttpResponse[None]: + """ + Parameters + ---------- + collection_name : str + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + AsyncHttpResponse[None] + """ + _response = await self._client_wrapper.httpx_client.request( + f"v2/collections/{jsonable_encoder(collection_name)}/documents/wipe", + method="POST", + request_options=request_options, + ) + try: + if 200 <= _response.status_code < 300: + return AsyncHttpResponse(response=_response, data=None) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) + + async def post_v2datasets_search( + self, *, request_options: typing.Optional[RequestOptions] = None + ) -> AsyncHttpResponse[None]: + """ + Parameters + ---------- + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + AsyncHttpResponse[None] + """ + _response = await self._client_wrapper.httpx_client.request( + "v2/datasets/search", + method="POST", + request_options=request_options, + ) + try: + if 200 <= _response.status_code < 300: + return AsyncHttpResponse(response=_response, data=None) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) diff --git a/src/runcaptain/types/__init__.py b/src/runcaptain/types/__init__.py index c362b46..35c7fb3 100644 --- a/src/runcaptain/types/__init__.py +++ b/src/runcaptain/types/__init__.py @@ -37,6 +37,19 @@ from .job_status_response_v2job_type import JobStatusResponseV2JobType from .job_status_response_v2status import JobStatusResponseV2Status from .query_response_v2 import QueryResponseV2 + from .query_stream_complete_event import QueryStreamCompleteEvent + from .query_stream_error_event import QueryStreamErrorEvent + from .query_stream_event import ( + QueryStreamEvent, + QueryStreamEvent_StreamComplete, + QueryStreamEvent_StreamError, + QueryStreamEvent_Text, + QueryStreamEvent_ToolEnd, + QueryStreamEvent_ToolStart, + ) + from .query_stream_text_event import QueryStreamTextEvent + from .query_stream_tool_end_event import QueryStreamToolEndEvent + from .query_stream_tool_start_event import QueryStreamToolStartEvent from .relevant_document_v2 import RelevantDocumentV2 from .search_result import SearchResult from .standard_response_v2 import StandardResponseV2 @@ -75,6 +88,17 @@ "JobStatusResponseV2JobType": ".job_status_response_v2job_type", "JobStatusResponseV2Status": ".job_status_response_v2status", "QueryResponseV2": ".query_response_v2", + "QueryStreamCompleteEvent": ".query_stream_complete_event", + "QueryStreamErrorEvent": ".query_stream_error_event", + "QueryStreamEvent": ".query_stream_event", + "QueryStreamEvent_StreamComplete": ".query_stream_event", + "QueryStreamEvent_StreamError": ".query_stream_event", + "QueryStreamEvent_Text": ".query_stream_event", + "QueryStreamEvent_ToolEnd": ".query_stream_event", + "QueryStreamEvent_ToolStart": ".query_stream_event", + "QueryStreamTextEvent": ".query_stream_text_event", + "QueryStreamToolEndEvent": ".query_stream_tool_end_event", + "QueryStreamToolStartEvent": ".query_stream_tool_start_event", "RelevantDocumentV2": ".relevant_document_v2", "SearchResult": ".search_result", "StandardResponseV2": ".standard_response_v2", @@ -137,6 +161,17 @@ def __dir__(): "JobStatusResponseV2JobType", "JobStatusResponseV2Status", "QueryResponseV2", + "QueryStreamCompleteEvent", + "QueryStreamErrorEvent", + "QueryStreamEvent", + "QueryStreamEvent_StreamComplete", + "QueryStreamEvent_StreamError", + "QueryStreamEvent_Text", + "QueryStreamEvent_ToolEnd", + "QueryStreamEvent_ToolStart", + "QueryStreamTextEvent", + "QueryStreamToolEndEvent", + "QueryStreamToolStartEvent", "RelevantDocumentV2", "SearchResult", "StandardResponseV2", diff --git a/src/runcaptain/types/collection_item_v2.py b/src/runcaptain/types/collection_item_v2.py index d7c9be3..15b105d 100644 --- a/src/runcaptain/types/collection_item_v2.py +++ b/src/runcaptain/types/collection_item_v2.py @@ -7,19 +7,9 @@ class CollectionItemV2(UniversalBaseModel): - collection_id: str = pydantic.Field() - """ - Unique identifier for the collection - """ - - collection_name: str = pydantic.Field() - """ - Name of the collection - """ - environment: typing.Optional[str] = pydantic.Field(default=None) """ - Environment the collection belongs to (e.g. production, staging, development) + Environment the collection belongs to """ is_active: typing.Optional[bool] = pydantic.Field(default=None) @@ -37,9 +27,14 @@ class CollectionItemV2(UniversalBaseModel): Total number of API requests made against this collection """ - document_count: typing.Optional[int] = pydantic.Field(default=None) + database_name: str = pydantic.Field() + """ + Name of the collection database + """ + + file_count: int = pydantic.Field() """ - Total number of documents indexed in this collection + Total number of files indexed in this collection """ if IS_PYDANTIC_V2: diff --git a/src/runcaptain/types/document_item_v2.py b/src/runcaptain/types/document_item_v2.py index 716b59a..3951731 100644 --- a/src/runcaptain/types/document_item_v2.py +++ b/src/runcaptain/types/document_item_v2.py @@ -7,7 +7,7 @@ class DocumentItemV2(UniversalBaseModel): - document_id: str = pydantic.Field() + file_id: str = pydantic.Field() """ Unique identifier for the document """ diff --git a/src/runcaptain/types/query_stream_complete_event.py b/src/runcaptain/types/query_stream_complete_event.py new file mode 100644 index 0000000..40829d0 --- /dev/null +++ b/src/runcaptain/types/query_stream_complete_event.py @@ -0,0 +1,31 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel + + +class QueryStreamCompleteEvent(UniversalBaseModel): + """ + Emitted when the stream finishes successfully. Close the connection after receiving this. + """ + + metadata: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(default=None) + """ + Stream metadata (e.g. totalResults, totalSearches) + """ + + stats: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(default=None) + """ + Token and tool-call statistics + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/runcaptain/types/query_stream_error_event.py b/src/runcaptain/types/query_stream_error_event.py new file mode 100644 index 0000000..c7091ee --- /dev/null +++ b/src/runcaptain/types/query_stream_error_event.py @@ -0,0 +1,26 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel + + +class QueryStreamErrorEvent(UniversalBaseModel): + """ + Emitted when an error occurs during generation. Close the connection after receiving this. + """ + + error: str = pydantic.Field() + """ + Human-readable error message + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/runcaptain/types/query_stream_event.py b/src/runcaptain/types/query_stream_event.py new file mode 100644 index 0000000..37737dd --- /dev/null +++ b/src/runcaptain/types/query_stream_event.py @@ -0,0 +1,101 @@ +# This file was auto-generated by Fern from our API Definition. + +from __future__ import annotations + +import typing + +import pydantic +import typing_extensions +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel + + +class QueryStreamEvent_Text(UniversalBaseModel): + type: typing.Literal["text"] = "text" + content: str + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + +class QueryStreamEvent_ToolStart(UniversalBaseModel): + type: typing.Literal["tool.start"] = "tool.start" + seq: typing.Optional[int] = None + run_id: typing.Optional[str] = None + tool_call_id: str + name: str + args: typing.Optional[typing.Dict[str, typing.Any]] = None + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + +class QueryStreamEvent_ToolEnd(UniversalBaseModel): + type: typing.Literal["tool.end"] = "tool.end" + seq: typing.Optional[int] = None + run_id: typing.Optional[str] = None + tool_call_id: str + name: str + ok: bool + result_summary: typing.Optional[typing.Dict[str, typing.Any]] = None + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + +class QueryStreamEvent_StreamComplete(UniversalBaseModel): + type: typing.Literal["stream_complete"] = "stream_complete" + metadata: typing.Optional[typing.Dict[str, typing.Any]] = None + stats: typing.Optional[typing.Dict[str, typing.Any]] = None + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + +class QueryStreamEvent_StreamError(UniversalBaseModel): + type: typing.Literal["stream_error"] = "stream_error" + error: str + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + +QueryStreamEvent = typing_extensions.Annotated[ + typing.Union[ + QueryStreamEvent_Text, + QueryStreamEvent_ToolStart, + QueryStreamEvent_ToolEnd, + QueryStreamEvent_StreamComplete, + QueryStreamEvent_StreamError, + ], + pydantic.Field(discriminator="type"), +] diff --git a/src/runcaptain/types/query_stream_text_event.py b/src/runcaptain/types/query_stream_text_event.py new file mode 100644 index 0000000..b2d54a8 --- /dev/null +++ b/src/runcaptain/types/query_stream_text_event.py @@ -0,0 +1,26 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel + + +class QueryStreamTextEvent(UniversalBaseModel): + """ + Incremental text chunk of the AI response. + """ + + content: str = pydantic.Field() + """ + Text fragment of the AI-generated response + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/runcaptain/types/query_stream_tool_end_event.py b/src/runcaptain/types/query_stream_tool_end_event.py new file mode 100644 index 0000000..58bd3bb --- /dev/null +++ b/src/runcaptain/types/query_stream_tool_end_event.py @@ -0,0 +1,51 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel + + +class QueryStreamToolEndEvent(UniversalBaseModel): + """ + Emitted when a tool call completes. + """ + + seq: typing.Optional[int] = pydantic.Field(default=None) + """ + Monotonically increasing sequence number within the stream + """ + + run_id: typing.Optional[str] = pydantic.Field(default=None) + """ + Identifier for the current agent run + """ + + tool_call_id: str = pydantic.Field() + """ + Correlates with the preceding tool.start event + """ + + name: str = pydantic.Field() + """ + Tool name + """ + + ok: bool = pydantic.Field() + """ + Whether the tool call succeeded + """ + + result_summary: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(default=None) + """ + Summary of the tool results (e.g. {"resultCount": 12, "hasResults": true}) + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/runcaptain/types/query_stream_tool_start_event.py b/src/runcaptain/types/query_stream_tool_start_event.py new file mode 100644 index 0000000..860bd03 --- /dev/null +++ b/src/runcaptain/types/query_stream_tool_start_event.py @@ -0,0 +1,46 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel + + +class QueryStreamToolStartEvent(UniversalBaseModel): + """ + Emitted when the AI agent begins a knowledge-base search. + """ + + seq: typing.Optional[int] = pydantic.Field(default=None) + """ + Monotonically increasing sequence number within the stream + """ + + run_id: typing.Optional[str] = pydantic.Field(default=None) + """ + Identifier for the current agent run + """ + + tool_call_id: str = pydantic.Field() + """ + Correlates this start with the corresponding tool.end event + """ + + name: str = pydantic.Field() + """ + Tool name, e.g. searchKnowledgeBase + """ + + args: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(default=None) + """ + Arguments passed to the tool (e.g. {"query": "...", "topK": 10}) + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow