diff --git a/.fern/metadata.json b/.fern/metadata.json
index 3791815..3c88de1 100644
--- a/.fern/metadata.json
+++ b/.fern/metadata.json
@@ -1,9 +1,9 @@
 {
   "cliVersion": "3.86.0",
   "generatorName": "fernapi/fern-python-sdk",
-  "generatorVersion": "4.59.0",
+  "generatorVersion": "4.59.4",
   "generatorConfig": {
     "client_class_name": "Captain"
   },
-  "sdkVersion": "0.0.0"
+  "sdkVersion": "0.0.16"
 }
\ No newline at end of file
diff --git a/README.md b/README.md
index 1a92543..21af996 100644
--- a/README.md
+++ b/README.md
@@ -12,6 +12,7 @@ The Runcaptain Python library provides convenient access to the Runcaptain APIs
 - [Usage](#usage)
 - [Async Client](#async-client)
 - [Exception Handling](#exception-handling)
+- [Streaming](#streaming)
 - [Advanced](#advanced)
   - [Access Raw Response Data](#access-raw-response-data)
   - [Retries](#retries)
@@ -37,15 +38,11 @@ Instantiate and use the client with the following:
 from runcaptain import Captain
 
 client = Captain(
-    authorization="YOUR_AUTHORIZATION",
     organization_id="YOUR_ORGANIZATION_ID",
+    key="YOUR_KEY",
 )
-client.query.collection_v2(
-    collection_name="my_documents",
-    query="What are the key terms in the contract?",
-    inference=True,
-    stream=True,
-    rerank=True,
+client.post_v2collections_collection_name_documents_wipe(
+    collection_name="collection_name",
 )
 ```
 
@@ -59,18 +56,14 @@ import asyncio
 from runcaptain import AsyncCaptain
 
 client = AsyncCaptain(
-    authorization="YOUR_AUTHORIZATION",
     organization_id="YOUR_ORGANIZATION_ID",
+    key="YOUR_KEY",
 )
 
 
 async def main() -> None:
-    await client.query.collection_v2(
-        collection_name="my_documents",
-        query="What are the key terms in the contract?",
-        inference=True,
-        stream=True,
-        rerank=True,
+    await client.post_v2collections_collection_name_documents_wipe(
+        collection_name="collection_name",
     )
 
 
@@ -86,12 +79,31 @@ will be thrown.
 from runcaptain.core.api_error import ApiError
 
 try:
-    client.query.collection_v2(...)
+    client.post_v2collections_collection_name_documents_wipe(...)
 except ApiError as e:
     print(e.status_code)
     print(e.body)
 ```
 
+## Streaming
+
+The SDK supports streaming responses, as well, the response will be a generator that you can loop over.
+
+```python
+from runcaptain import Captain
+
+client = Captain(
+    organization_id="YOUR_ORGANIZATION_ID",
+    key="YOUR_KEY",
+)
+response = client.query.collection_v2stream(
+    collection_name="collection_name",
+    query="query",
+)
+for chunk in response.data:
+    yield chunk
+```
+
 ## Advanced
 
 ### Access Raw Response Data
@@ -105,10 +117,20 @@ from runcaptain import Captain
 client = Captain(
     ...,
 )
-response = client.query.with_raw_response.collection_v2(...)
+response = (
+    client.with_raw_response.post_v2collections_collection_name_documents_wipe(
+        ...
+    )
+)
 print(response.headers)  # access the response headers
 print(response.status_code)  # access the response status code
 print(response.data)  # access the underlying object
+with client.query.with_raw_response.collection_v2stream(...) as response:
+    print(
+        response.headers
+    )  # access the response headersprint(response.status_code)  # access the response status code
+    for chunk in response.data:
+        print(chunk)  # access the underlying object(s)
 ```
 
 ### Retries
@@ -126,7 +148,7 @@ A request is deemed retryable when any of the following HTTP status codes is ret
 Use the `max_retries` request option to configure this behavior.
 
 ```python
-client.query.collection_v2(..., request_options={
+client.post_v2collections_collection_name_documents_wipe(..., request_options={
     "max_retries": 1
 })
 ```
@@ -146,7 +168,7 @@ client = Captain(
 
 
 # Override timeout for a specific method
-client.query.collection_v2(..., request_options={
+client.post_v2collections_collection_name_documents_wipe(..., request_options={
     "timeout_in_seconds": 1
 })
 ```
diff --git a/poetry.lock b/poetry.lock
index 2f8666b..83fdc8b 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -38,13 +38,13 @@ trio = ["trio (>=0.26.1)"]
 
 [[package]]
 name = "certifi"
-version = "2026.1.4"
+version = "2026.2.25"
 description = "Python package for providing Mozilla's CA Bundle."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c"},
-    {file = "certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120"},
+    {file = "certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa"},
+    {file = "certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7"},
 ]
 
 [[package]]
diff --git a/pyproject.toml b/pyproject.toml
index ac15859..88be80c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ dynamic = ["version"]
 
 [tool.poetry]
 name = "captain-sdk"
-version = "0.0.0"
+version = "0.0.16"
 description = ""
 readme = "README.md"
 authors = []
diff --git a/reference.md b/reference.md
index a89bbf8..c21ba8a 100644
--- a/reference.md
+++ b/reference.md
@@ -1,10 +1,9 @@
 # Reference
-## Collections
-<details><summary><code>client.collections.<a href="src/runcaptain/collections/client.py">list_collections_v2</a>(...) -&gt; AsyncHttpResponse[CollectionListResponseV2]</code></summary>
+<details><summary><code>client.<a href="src/runcaptain/client.py">post_v2collections_collection_name_documents_wipe</a>(...) -&gt; AsyncHttpResponse[None]</code></summary>
 <dl>
 <dd>
 
-#### 📝 Description
+#### 🔌 Usage
 
 <dl>
 <dd>
@@ -12,14 +11,55 @@
 <dl>
 <dd>
 
-List all collections for an organization.
+```python
+from runcaptain import Captain
 
-Returns an array of collection objects with collection_name, collection_id, and document_count.
+client = Captain(
+    organization_id="YOUR_ORGANIZATION_ID",
+    key="YOUR_KEY",
+)
+client.post_v2collections_collection_name_documents_wipe(
+    collection_name="collection_name",
+)
+
+```
+</dd>
+</dl>
+</dd>
+</dl>
+
+#### ⚙️ Parameters
+
+<dl>
+<dd>
+
+<dl>
+<dd>
+
+**collection_name:** `str` 
+    
+</dd>
+</dl>
+
+<dl>
+<dd>
+
+**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
+    
 </dd>
 </dl>
 </dd>
 </dl>
 
+
+</dd>
+</dl>
+</details>
+
+<details><summary><code>client.<a href="src/runcaptain/client.py">post_v2datasets_search</a>() -&gt; AsyncHttpResponse[None]</code></summary>
+<dl>
+<dd>
+
 #### 🔌 Usage
 
 <dl>
@@ -32,10 +72,10 @@ Returns an array of collection objects with collection_name, collection_id, and
 from runcaptain import Captain
 
 client = Captain(
-    authorization="YOUR_AUTHORIZATION",
     organization_id="YOUR_ORGANIZATION_ID",
+    key="YOUR_KEY",
 )
-client.collections.list_collections_v2()
+client.post_v2datasets_search()
 
 ```
 </dd>
@@ -51,18 +91,66 @@ client.collections.list_collections_v2()
 <dl>
 <dd>
 
-**limit:** `typing.Optional[int]` — Maximum number of collections to return
+**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
     
 </dd>
 </dl>
+</dd>
+</dl>
+
+
+</dd>
+</dl>
+</details>
+
+## Collections
+<details><summary><code>client.collections.<a href="src/runcaptain/collections/client.py">list_collections_v2</a>() -&gt; AsyncHttpResponse[CollectionListResponseV2]</code></summary>
+<dl>
+<dd>
+
+#### 📝 Description
 
 <dl>
 <dd>
 
-**offset:** `typing.Optional[int]` — Pagination offset
-    
+<dl>
+<dd>
+
+List all collections for an organization.
+
+Returns an array of collection objects with collection_name, collection_id, and document_count.
 </dd>
 </dl>
+</dd>
+</dl>
+
+#### 🔌 Usage
+
+<dl>
+<dd>
+
+<dl>
+<dd>
+
+```python
+from runcaptain import Captain
+
+client = Captain(
+    organization_id="YOUR_ORGANIZATION_ID",
+    key="YOUR_KEY",
+)
+client.collections.list_collections_v2()
+
+```
+</dd>
+</dl>
+</dd>
+</dl>
+
+#### ⚙️ Parameters
+
+<dl>
+<dd>
 
 <dl>
 <dd>
@@ -109,11 +197,11 @@ Create a new collection (idempotent). Returns 201 if created, 200 if already exi
 from runcaptain import Captain
 
 client = Captain(
-    authorization="YOUR_AUTHORIZATION",
     organization_id="YOUR_ORGANIZATION_ID",
+    key="YOUR_KEY",
 )
 client.collections.create_collection_v2(
-    collection_name="my_documents",
+    collection_name="collection_name",
 )
 
 ```
@@ -130,7 +218,7 @@ client.collections.create_collection_v2(
 <dl>
 <dd>
 
-**collection_name:** `str` — Name of the collection to create
+**collection_name:** `str` 
     
 </dd>
 </dl>
@@ -188,11 +276,11 @@ Delete a collection and all its indexed documents.
 from runcaptain import Captain
 
 client = Captain(
-    authorization="YOUR_AUTHORIZATION",
     organization_id="YOUR_ORGANIZATION_ID",
+    key="YOUR_KEY",
 )
 client.collections.delete_collection_v2(
-    collection_name="my_documents",
+    collection_name="collection_name",
 )
 
 ```
@@ -209,7 +297,7 @@ client.collections.delete_collection_v2(
 <dl>
 <dd>
 
-**collection_name:** `str` — Name of the collection to delete
+**collection_name:** `str` 
     
 </dd>
 </dl>
@@ -271,12 +359,12 @@ All files, indexed data, and vector embeddings are preserved. The collection's i
 from runcaptain import Captain
 
 client = Captain(
-    authorization="YOUR_AUTHORIZATION",
     organization_id="YOUR_ORGANIZATION_ID",
+    key="YOUR_KEY",
 )
 client.collections.change_collection_environment_v2(
-    collection_name="my_documents",
-    new_environment="production",
+    collection_name="collection_name",
+    new_environment="development",
 )
 
 ```
@@ -293,7 +381,7 @@ client.collections.change_collection_environment_v2(
 <dl>
 <dd>
 
-**collection_name:** `str` — Name of the collection to move
+**collection_name:** `str` 
     
 </dd>
 </dl>
@@ -351,13 +439,12 @@ List all documents in a collection with pagination support.
 from runcaptain import Captain
 
 client = Captain(
-    authorization="YOUR_AUTHORIZATION",
     organization_id="YOUR_ORGANIZATION_ID",
+    key="YOUR_KEY",
 )
 client.collections.list_documents_v2(
-    collection_name="my_documents",
-    limit=100,
-    offset=0,
+    collection_name="collection_name",
+    offset=1,
 )
 
 ```
@@ -374,15 +461,7 @@ client.collections.list_documents_v2(
 <dl>
 <dd>
 
-**collection_name:** `str` — Name of the collection
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**limit:** `typing.Optional[int]` — Maximum number of documents to return
+**collection_name:** `str` 
     
 </dd>
 </dl>
@@ -440,8 +519,8 @@ Remove all documents from a collection while keeping the collection structure.
 from runcaptain import Captain
 
 client = Captain(
-    authorization="YOUR_AUTHORIZATION",
     organization_id="YOUR_ORGANIZATION_ID",
+    key="YOUR_KEY",
 )
 client.collections.wipe_collection_documents_v2(
     collection_name="collection_name",
@@ -511,8 +590,8 @@ Delete a specific document from a collection.
 from runcaptain import Captain
 
 client = Captain(
-    authorization="YOUR_AUTHORIZATION",
     organization_id="YOUR_ORGANIZATION_ID",
+    key="YOUR_KEY",
 )
 client.collections.delete_document_v2(
     collection_name="collection_name",
@@ -533,7 +612,7 @@ client.collections.delete_document_v2(
 <dl>
 <dd>
 
-**collection_name:** `str` — Name of the collection
+**collection_name:** `str` 
     
 </dd>
 </dl>
@@ -541,7 +620,7 @@ client.collections.delete_document_v2(
 <dl>
 <dd>
 
-**document_id:** `str` — ID of the document to delete
+**document_id:** `str` 
     
 </dd>
 </dl>
@@ -562,7 +641,7 @@ client.collections.delete_document_v2(
 </details>
 
 ## Query
-<details><summary><code>client.query.<a href="src/runcaptain/query/client.py">collection_v2</a>(...) -&gt; AsyncHttpResponse[QueryResponseV2]</code></summary>
+<details><summary><code>client.query.<a href="src/runcaptain/query/client.py">collection_v2stream</a>(...) -&gt; typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[QueryStreamEvent]]]</code></summary>
 <dl>
 <dd>
 
@@ -581,49 +660,36 @@ When `inference=false`, returns raw search results with content and metadata.
 
 ## Streaming (SSE)
 
-When `stream: true` and `inference: true`, the JSON response includes a `request_id`. Refer to the sample implementations to best make use of streams.
+When `stream: true` and `inference: true`, the response is a Server-Sent Events stream. Every `data:` field is a JSON object with a `type` discriminator.
 
 ### SSE Event Types
 
-| Event | Format | Description |
-|-------|--------|-------------|
-| Text chunk | `data: <text>\n\n` | Incremental text of the AI response. Plain text (not JSON). Newlines within text are escaped as `\n`. |
-| Tool start | `event: tool_start\ndata: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"..."}}\n\n` | The AI agent is performing a knowledge base search. The `args.query` field contains the search query. |
-| Tool end | `event: tool_end\ndata: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}}\n\n` | A search completed. `tool_call_id` correlates with the preceding `tool_start`. `result_summary.resultCount` indicates how many results were found. |
-| Complete | `event: complete\ndata: {"type":"stream_complete"}\n\n` | Stream finished successfully. Close the connection after receiving this. |
-| Error | `event: error\ndata: {"type":"stream_error","error":"..."}\n\n` | An error occurred during generation. Close the connection. |
+| `type` value | Schema | Description |
+|---|---|---|
+| `text` | `QueryStreamTextEvent` | Incremental text chunk of the AI response. |
+| `tool.start` | `QueryStreamToolStartEvent` | The agent is performing a knowledge-base search. |
+| `tool.end` | `QueryStreamToolEndEvent` | A tool call completed. `tool_call_id` correlates with the preceding `tool.start`. |
+| `stream_complete` | `QueryStreamCompleteEvent` | Stream finished successfully. Close the connection. |
+| `stream_error` | `QueryStreamErrorEvent` | An error occurred. Close the connection. |
 
 ### Example SSE Stream
 
 ```
-event: tool_start
-data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"revenue projections Q4"}}
-
-event: tool_end
-data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}}
+data: {"type":"tool.start","seq":1,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","args":{"query":"revenue projections Q4"}}
 
-data: Based on the documents
-data:  provided, the revenue
-data:  projections for Q4 show
-data:  a 15% increase over Q3.
+data: {"type":"tool.end","seq":2,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","ok":true,"result_summary":{"resultCount":12}}
 
-event: tool_start
-data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_2","args":{"query":"Q3 comparison metrics"}}
+data: {"type":"text","content":"Based on the documents"}
+data: {"type":"text","content":" provided, the revenue"}
+data: {"type":"text","content":" projections for Q4 show"}
+data: {"type":"text","content":" a 15% increase over Q3."}
 
-event: tool_end
-data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_2","ok":true,"result_summary":{"resultCount":8}}
-
-data:  Compared to Q3, the key
-data:  drivers were operational
-data:  efficiency gains.
-
-event: complete
-data: {"type":"stream_complete"}
+data: {"type":"stream_complete","metadata":{"totalResults":12},"stats":{"totalTokens":150}}
 ```
 
 ### Notes
 
-- The agent may perform multiple searches per query. Each search produces a `tool_start`/`tool_end` pair.
+- The agent may perform multiple searches per query. Each search produces a `tool.start` / `tool.end` pair.
 - Text chunks are interleaved between tool events — text arrives after the agent has gathered results from a search.
 - Connect with `Accept: text/event-stream` and set a generous timeout (120s+) for long responses.
 </dd>
@@ -643,16 +709,15 @@ data: {"type":"stream_complete"}
 from runcaptain import Captain
 
 client = Captain(
-    authorization="YOUR_AUTHORIZATION",
     organization_id="YOUR_ORGANIZATION_ID",
+    key="YOUR_KEY",
 )
-client.query.collection_v2(
-    collection_name="my_documents",
-    query="What are the key terms in the contract?",
-    inference=True,
-    stream=True,
-    rerank=True,
+response = client.query.collection_v2stream(
+    collection_name="collection_name",
+    query="query",
 )
+for chunk in response.data:
+    yield chunk
 
 ```
 </dd>
@@ -668,7 +733,7 @@ client.query.collection_v2(
 <dl>
 <dd>
 
-**collection_name:** `str` — Name of the collection to query
+**collection_name:** `str` 
     
 </dd>
 </dl>
@@ -684,7 +749,7 @@ client.query.collection_v2(
 <dl>
 <dd>
 
-**idempotency_key:** `typing.Optional[str]` — UUID for request deduplication
+**inference:** `typing.Optional[bool]` — Enable LLM-generated answers based on the relevant sections retrieved. When false, returns raw search results.
     
 </dd>
 </dl>
@@ -692,15 +757,157 @@ client.query.collection_v2(
 <dl>
 <dd>
 
-**inference:** `typing.Optional[bool]` — Enable LLM-generated answers based on the relevant sections retrieved. When false, returns raw search results.
+**top_k:** `typing.Optional[int]` — Number of results to return. Only valid when inference=false. Not supported when inference=true (the agent controls its own search strategy).
+    
+</dd>
+</dl>
+
+<dl>
+<dd>
+
+**rerank:** `typing.Optional[bool]` — Enable Voyage AI rerank-2.5 reranking for improved relevance ordering. Adds ~100-300ms latency.
+    
+</dd>
+</dl>
+
+<dl>
+<dd>
+
+**metadata_filter:** `typing.Optional[typing.Dict[str, typing.Any]]` — Filter expression for vector search. Supports: $eq, $ne, $gt, $gte, $lt, $lte, $in, $nin, $and, $or
+    
+</dd>
+</dl>
+
+<dl>
+<dd>
+
+**custom_prompt:** `typing.Optional[str]` — Custom system prompt to override the default RAG prompt when inference=true. Allows customizing how the LLM processes and responds to the query with the retrieved context.
+    
+</dd>
+</dl>
+
+<dl>
+<dd>
+
+**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
     
 </dd>
 </dl>
+</dd>
+</dl>
+
+
+</dd>
+</dl>
+</details>
+
+<details><summary><code>client.query.<a href="src/runcaptain/query/client.py">collection_v2</a>(...) -&gt; AsyncHttpResponse[typing.Any]</code></summary>
+<dl>
+<dd>
+
+#### 📝 Description
+
+<dl>
+<dd>
+
+<dl>
+<dd>
+
+Execute a natural language query against a collection.
+
+When `inference=true`, returns an AI-generated response with relevant documents.
+When `inference=false`, returns raw search results with content and metadata.
+
+## Streaming (SSE)
+
+When `stream: true` and `inference: true`, the response is a Server-Sent Events stream. Every `data:` field is a JSON object with a `type` discriminator.
+
+### SSE Event Types
+
+| `type` value | Schema | Description |
+|---|---|---|
+| `text` | `QueryStreamTextEvent` | Incremental text chunk of the AI response. |
+| `tool.start` | `QueryStreamToolStartEvent` | The agent is performing a knowledge-base search. |
+| `tool.end` | `QueryStreamToolEndEvent` | A tool call completed. `tool_call_id` correlates with the preceding `tool.start`. |
+| `stream_complete` | `QueryStreamCompleteEvent` | Stream finished successfully. Close the connection. |
+| `stream_error` | `QueryStreamErrorEvent` | An error occurred. Close the connection. |
+
+### Example SSE Stream
+
+```
+data: {"type":"tool.start","seq":1,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","args":{"query":"revenue projections Q4"}}
+
+data: {"type":"tool.end","seq":2,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","ok":true,"result_summary":{"resultCount":12}}
+
+data: {"type":"text","content":"Based on the documents"}
+data: {"type":"text","content":" provided, the revenue"}
+data: {"type":"text","content":" projections for Q4 show"}
+data: {"type":"text","content":" a 15% increase over Q3."}
+
+data: {"type":"stream_complete","metadata":{"totalResults":12},"stats":{"totalTokens":150}}
+```
+
+### Notes
+
+- The agent may perform multiple searches per query. Each search produces a `tool.start` / `tool.end` pair.
+- Text chunks are interleaved between tool events — text arrives after the agent has gathered results from a search.
+- Connect with `Accept: text/event-stream` and set a generous timeout (120s+) for long responses.
+</dd>
+</dl>
+</dd>
+</dl>
+
+#### 🔌 Usage
+
+<dl>
+<dd>
 
 <dl>
 <dd>
 
-**stream:** `typing.Optional[bool]` — Enable real-time streaming of the response
+```python
+from runcaptain import Captain
+
+client = Captain(
+    organization_id="YOUR_ORGANIZATION_ID",
+    key="YOUR_KEY",
+)
+client.query.collection_v2(
+    collection_name="collection_name",
+    query="query",
+)
+
+```
+</dd>
+</dl>
+</dd>
+</dl>
+
+#### ⚙️ Parameters
+
+<dl>
+<dd>
+
+<dl>
+<dd>
+
+**collection_name:** `str` 
+    
+</dd>
+</dl>
+
+<dl>
+<dd>
+
+**query:** `str` — The natural language query to search for
+    
+</dd>
+</dl>
+
+<dl>
+<dd>
+
+**inference:** `typing.Optional[bool]` — Enable LLM-generated answers based on the relevant sections retrieved. When false, returns raw search results.
     
 </dd>
 </dl>
@@ -783,14 +990,14 @@ Index all files from an S3 bucket into a collection. Returns a job_id for tracki
 from runcaptain import Captain
 
 client = Captain(
-    authorization="YOUR_AUTHORIZATION",
     organization_id="YOUR_ORGANIZATION_ID",
+    key="YOUR_KEY",
 )
 client.indexing.index_s3bucket_v2(
-    collection_name="my_documents",
-    bucket_name="my-s3-bucket",
+    collection_name="collection_name",
+    bucket_name="my-company-docs",
     aws_access_key_id="AKIAIOSFODNN7EXAMPLE",
-    aws_secret_access_key="your_secret_key",
+    aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
     bucket_region="us-east-1",
     processing_type="advanced",
 )
@@ -809,7 +1016,7 @@ client.indexing.index_s3bucket_v2(
 <dl>
 <dd>
 
-**collection_name:** `str` — Name of the collection to index into
+**collection_name:** `str` 
     
 </dd>
 </dl>
@@ -849,14 +1056,6 @@ client.indexing.index_s3bucket_v2(
 <dl>
 <dd>
 
-**idempotency_key:** `typing.Optional[str]` — UUID for request deduplication
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
 **bucket_region:** `typing.Optional[str]` — AWS region where the bucket is located
     
 </dd>
@@ -931,15 +1130,15 @@ Index a single file from an S3 bucket into a collection. Returns a job_id for tr
 from runcaptain import Captain
 
 client = Captain(
-    authorization="YOUR_AUTHORIZATION",
     organization_id="YOUR_ORGANIZATION_ID",
+    key="YOUR_KEY",
 )
 client.indexing.index_s3file_v2(
-    collection_name="my_documents",
-    bucket_name="my-s3-bucket",
-    file_uri="s3://my-s3-bucket/contracts/acme_contract.pdf",
+    collection_name="collection_name",
+    bucket_name="my-company-docs",
+    file_uri="s3://my-company-docs/contracts/acme_contract.pdf",
     aws_access_key_id="AKIAIOSFODNN7EXAMPLE",
-    aws_secret_access_key="your_secret_key",
+    aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
     bucket_region="us-east-1",
     processing_type="advanced",
 )
@@ -958,7 +1157,7 @@ client.indexing.index_s3file_v2(
 <dl>
 <dd>
 
-**collection_name:** `str` — Name of the collection to index into
+**collection_name:** `str` 
     
 </dd>
 </dl>
@@ -1064,12 +1263,12 @@ Index all files from a Google Cloud Storage bucket into a collection. Returns a
 from runcaptain import Captain
 
 client = Captain(
-    authorization="YOUR_AUTHORIZATION",
     organization_id="YOUR_ORGANIZATION_ID",
+    key="YOUR_KEY",
 )
 client.indexing.index_gcs_bucket_v2(
-    collection_name="my_documents",
-    bucket_name="my-gcs-bucket",
+    collection_name="collection_name",
+    bucket_name="my-company-docs",
     service_account_json='{"type":"service_account","project_id":"my-project",...}',
     processing_type="advanced",
 )
@@ -1088,7 +1287,7 @@ client.indexing.index_gcs_bucket_v2(
 <dl>
 <dd>
 
-**collection_name:** `str` — Name of the collection to index into
+**collection_name:** `str` 
     
 </dd>
 </dl>
@@ -1186,8 +1385,8 @@ Index a single file from a GCS bucket into a collection. Returns a job_id for tr
 from runcaptain import Captain
 
 client = Captain(
-    authorization="YOUR_AUTHORIZATION",
     organization_id="YOUR_ORGANIZATION_ID",
+    key="YOUR_KEY",
 )
 client.indexing.index_gcs_file_v2(
     collection_name="collection_name",
@@ -1211,7 +1410,7 @@ client.indexing.index_gcs_file_v2(
 <dl>
 <dd>
 
-**collection_name:** `str` — Name of the collection to index into
+**collection_name:** `str` 
     
 </dd>
 </dl>
@@ -1301,15 +1500,15 @@ Index all files from a specific directory in an S3 bucket into a collection. Use
 from runcaptain import Captain
 
 client = Captain(
-    authorization="YOUR_AUTHORIZATION",
     organization_id="YOUR_ORGANIZATION_ID",
+    key="YOUR_KEY",
 )
 client.indexing.index_s3directory_v2(
-    collection_name="my_documents",
-    bucket_name="my-s3-bucket",
+    collection_name="collection_name",
+    bucket_name="my-company-docs",
     directory_path="reports/2024/january",
     aws_access_key_id="AKIAIOSFODNN7EXAMPLE",
-    aws_secret_access_key="your_secret_key",
+    aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
     bucket_region="us-east-1",
     processing_type="advanced",
 )
@@ -1328,7 +1527,7 @@ client.indexing.index_s3directory_v2(
 <dl>
 <dd>
 
-**collection_name:** `str` — Name of the collection to index into
+**collection_name:** `str` 
     
 </dd>
 </dl>
@@ -1376,14 +1575,6 @@ client.indexing.index_s3directory_v2(
 <dl>
 <dd>
 
-**idempotency_key:** `typing.Optional[str]` — UUID for request deduplication
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
 **bucket_region:** `typing.Optional[str]` — AWS region where the bucket is located
     
 </dd>
@@ -1458,12 +1649,12 @@ Index all files from a specific directory in a GCS bucket into a collection. Use
 from runcaptain import Captain
 
 client = Captain(
-    authorization="YOUR_AUTHORIZATION",
     organization_id="YOUR_ORGANIZATION_ID",
+    key="YOUR_KEY",
 )
 client.indexing.index_gcs_directory_v2(
-    collection_name="my_documents",
-    bucket_name="my-gcs-bucket",
+    collection_name="collection_name",
+    bucket_name="my-company-docs",
     directory_path="reports/2024/january",
     service_account_json='{"type":"service_account","project_id":"my-project",...}',
     processing_type="advanced",
@@ -1483,7 +1674,7 @@ client.indexing.index_gcs_directory_v2(
 <dl>
 <dd>
 
-**collection_name:** `str` — Name of the collection to index into
+**collection_name:** `str` 
     
 </dd>
 </dl>
@@ -1523,14 +1714,6 @@ client.indexing.index_gcs_directory_v2(
 <dl>
 <dd>
 
-**idempotency_key:** `typing.Optional[str]` — UUID for request deduplication
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
 **max_files:** `typing.Optional[int]` — Maximum number of files to index (optional)
     
 </dd>
@@ -1597,14 +1780,14 @@ Index all files from an Azure Blob Storage container into a collection. Returns
 from runcaptain import Captain
 
 client = Captain(
-    authorization="YOUR_AUTHORIZATION",
     organization_id="YOUR_ORGANIZATION_ID",
+    key="YOUR_KEY",
 )
 client.indexing.index_azure_container_v2(
-    collection_name="my_documents",
+    collection_name="collection_name",
     container_name="my-container",
     account_name="mystorageaccount",
-    account_key="your_account_key",
+    account_key="your_account_key_base64",
     processing_type="advanced",
 )
 
@@ -1622,7 +1805,7 @@ client.indexing.index_azure_container_v2(
 <dl>
 <dd>
 
-**collection_name:** `str` — Name of the collection to index into
+**collection_name:** `str` 
     
 </dd>
 </dl>
@@ -1662,14 +1845,6 @@ client.indexing.index_azure_container_v2(
 <dl>
 <dd>
 
-**idempotency_key:** `typing.Optional[str]` — UUID for request deduplication
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
 **max_files:** `typing.Optional[int]` — Maximum number of files to index (optional)
     
 </dd>
@@ -1736,15 +1911,15 @@ Index a single file from an Azure Blob Storage container into a collection. Retu
 from runcaptain import Captain
 
 client = Captain(
-    authorization="YOUR_AUTHORIZATION",
     organization_id="YOUR_ORGANIZATION_ID",
+    key="YOUR_KEY",
 )
 client.indexing.index_azure_file_v2(
-    collection_name="my_documents",
+    collection_name="collection_name",
     container_name="my-container",
     file_uri="https://mystorageaccount.blob.core.windows.net/my-container/contracts/acme_contract.pdf",
     account_name="mystorageaccount",
-    account_key="your_account_key",
+    account_key="your_account_key_base64",
     processing_type="advanced",
 )
 
@@ -1762,7 +1937,7 @@ client.indexing.index_azure_file_v2(
 <dl>
 <dd>
 
-**collection_name:** `str` — Name of the collection to index into
+**collection_name:** `str` 
     
 </dd>
 </dl>
@@ -1860,15 +2035,15 @@ Index all files from a specific directory (prefix) in an Azure Blob Storage cont
 from runcaptain import Captain
 
 client = Captain(
-    authorization="YOUR_AUTHORIZATION",
     organization_id="YOUR_ORGANIZATION_ID",
+    key="YOUR_KEY",
 )
 client.indexing.index_azure_directory_v2(
-    collection_name="my_documents",
+    collection_name="collection_name",
     container_name="my-container",
     directory_path="reports/2024/january",
     account_name="mystorageaccount",
-    account_key="your_account_key",
+    account_key="your_account_key_base64",
     processing_type="advanced",
 )
 
@@ -1886,7 +2061,7 @@ client.indexing.index_azure_directory_v2(
 <dl>
 <dd>
 
-**collection_name:** `str` — Name of the collection to index into
+**collection_name:** `str` 
     
 </dd>
 </dl>
@@ -1934,14 +2109,6 @@ client.indexing.index_azure_directory_v2(
 <dl>
 <dd>
 
-**idempotency_key:** `typing.Optional[str]` — UUID for request deduplication
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
 **max_files:** `typing.Optional[int]` — Maximum number of files to index (optional)
     
 </dd>
@@ -2034,11 +2201,11 @@ Each file in the `files` array has a status:
 from runcaptain import Captain
 
 client = Captain(
-    authorization="YOUR_AUTHORIZATION",
     organization_id="YOUR_ORGANIZATION_ID",
+    key="YOUR_KEY",
 )
 client.jobs.get_job_status_v2(
-    job_id="abc123xyz-1234567890",
+    job_id="job_id",
 )
 
 ```
@@ -2055,7 +2222,7 @@ client.jobs.get_job_status_v2(
 <dl>
 <dd>
 
-**job_id:** `str` — The job ID returned from an indexing request
+**job_id:** `str` 
     
 </dd>
 </dl>
@@ -2109,11 +2276,11 @@ Behavior:
 from runcaptain import Captain
 
 client = Captain(
-    authorization="YOUR_AUTHORIZATION",
     organization_id="YOUR_ORGANIZATION_ID",
+    key="YOUR_KEY",
 )
 client.jobs.cancel_job_v2(
-    job_id="abc123xyz-1234567890",
+    job_id="job_id",
 )
 
 ```
@@ -2130,7 +2297,7 @@ client.jobs.cancel_job_v2(
 <dl>
 <dd>
 
-**job_id:** `str` — The job ID to cancel
+**job_id:** `str` 
     
 </dd>
 </dl>
@@ -2191,12 +2358,13 @@ Returns a list of search results with title, URL, snippet, and date.
 from runcaptain import Captain
 
 client = Captain(
-    authorization="YOUR_AUTHORIZATION",
     organization_id="YOUR_ORGANIZATION_ID",
+    key="YOUR_KEY",
 )
 client.datasets.search_dataset(
     dataset="nytimes",
-    q="",
+    q="q",
+    limit=1,
 )
 
 ```
@@ -2290,12 +2458,12 @@ Returns the full article content in markdown format, along with metadata like ti
 from runcaptain import Captain
 
 client = Captain(
-    authorization="YOUR_AUTHORIZATION",
     organization_id="YOUR_ORGANIZATION_ID",
+    key="YOUR_KEY",
 )
 client.datasets.get_dataset_article(
     dataset="nytimes",
-    url="https://www.washingtonpost.com/example/news_example.html",
+    url="url",
 )
 
 ```
diff --git a/src/runcaptain/__init__.py b/src/runcaptain/__init__.py
index b0d8097..528fe48 100644
--- a/src/runcaptain/__init__.py
+++ b/src/runcaptain/__init__.py
@@ -38,6 +38,17 @@
         JobStatusResponseV2JobType,
         JobStatusResponseV2Status,
         QueryResponseV2,
+        QueryStreamCompleteEvent,
+        QueryStreamErrorEvent,
+        QueryStreamEvent,
+        QueryStreamEvent_StreamComplete,
+        QueryStreamEvent_StreamError,
+        QueryStreamEvent_Text,
+        QueryStreamEvent_ToolEnd,
+        QueryStreamEvent_ToolStart,
+        QueryStreamTextEvent,
+        QueryStreamToolEndEvent,
+        QueryStreamToolStartEvent,
         RelevantDocumentV2,
         SearchResult,
         StandardResponseV2,
@@ -120,6 +131,17 @@
     "JobStatusResponseV2Status": ".types",
     "NotFoundError": ".errors",
     "QueryResponseV2": ".types",
+    "QueryStreamCompleteEvent": ".types",
+    "QueryStreamErrorEvent": ".types",
+    "QueryStreamEvent": ".types",
+    "QueryStreamEvent_StreamComplete": ".types",
+    "QueryStreamEvent_StreamError": ".types",
+    "QueryStreamEvent_Text": ".types",
+    "QueryStreamEvent_ToolEnd": ".types",
+    "QueryStreamEvent_ToolStart": ".types",
+    "QueryStreamTextEvent": ".types",
+    "QueryStreamToolEndEvent": ".types",
+    "QueryStreamToolStartEvent": ".types",
     "RelevantDocumentV2": ".types",
     "SearchDatasetRequestDataset": ".datasets",
     "SearchResult": ".types",
@@ -209,6 +231,17 @@ def __dir__():
     "JobStatusResponseV2Status",
     "NotFoundError",
     "QueryResponseV2",
+    "QueryStreamCompleteEvent",
+    "QueryStreamErrorEvent",
+    "QueryStreamEvent",
+    "QueryStreamEvent_StreamComplete",
+    "QueryStreamEvent_StreamError",
+    "QueryStreamEvent_Text",
+    "QueryStreamEvent_ToolEnd",
+    "QueryStreamEvent_ToolStart",
+    "QueryStreamTextEvent",
+    "QueryStreamToolEndEvent",
+    "QueryStreamToolStartEvent",
     "RelevantDocumentV2",
     "SearchDatasetRequestDataset",
     "SearchResult",
diff --git a/src/runcaptain/client.py b/src/runcaptain/client.py
index 4c84d40..ff87d3b 100644
--- a/src/runcaptain/client.py
+++ b/src/runcaptain/client.py
@@ -2,12 +2,16 @@
 
 from __future__ import annotations
 
+import os
 import typing
 
 import httpx
+from .core.api_error import ApiError
 from .core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
 from .core.logging import LogConfig, Logger
+from .core.request_options import RequestOptions
 from .environment import CaptainEnvironment
+from .raw_client import AsyncRawCaptain, RawCaptain
 
 if typing.TYPE_CHECKING:
     from .collections.client import AsyncCollectionsClient, CollectionsClient
@@ -35,8 +39,8 @@ class Captain:
 
 
 
-    authorization : str
     organization_id : typing.Optional[str]
+    key : typing.Optional[typing.Union[str, typing.Callable[[], str]]]
     headers : typing.Optional[typing.Dict[str, str]]
         Additional headers to send with every request.
 
@@ -57,8 +61,8 @@ class Captain:
     from runcaptain import Captain
 
     client = Captain(
-        authorization="YOUR_AUTHORIZATION",
         organization_id="YOUR_ORGANIZATION_ID",
+        key="YOUR_KEY",
     )
     """
 
@@ -67,8 +71,8 @@ def __init__(
         *,
         base_url: typing.Optional[str] = None,
         environment: CaptainEnvironment = CaptainEnvironment.DEFAULT,
-        authorization: str,
-        organization_id: typing.Optional[str] = None,
+        organization_id: typing.Optional[str] = os.getenv("CAPTAIN_ORGANIZATION_ID"),
+        key: typing.Optional[typing.Union[str, typing.Callable[[], str]]] = os.getenv("CAPTAIN_API_KEY"),
         headers: typing.Optional[typing.Dict[str, str]] = None,
         timeout: typing.Optional[float] = None,
         follow_redirects: typing.Optional[bool] = True,
@@ -78,10 +82,16 @@ def __init__(
         _defaulted_timeout = (
             timeout if timeout is not None else 60 if httpx_client is None else httpx_client.timeout.read
         )
+        if organization_id is None:
+            raise ApiError(
+                body="The client must be instantiated be either passing in organization_id or setting CAPTAIN_ORGANIZATION_ID"
+            )
+        if key is None:
+            raise ApiError(body="The client must be instantiated be either passing in key or setting CAPTAIN_API_KEY")
         self._client_wrapper = SyncClientWrapper(
             base_url=_get_base_url(base_url=base_url, environment=environment),
-            authorization=authorization,
             organization_id=organization_id,
+            key=key,
             headers=headers,
             httpx_client=httpx_client
             if httpx_client is not None
@@ -91,12 +101,80 @@ def __init__(
             timeout=_defaulted_timeout,
             logging=logging,
         )
+        self._raw_client = RawCaptain(client_wrapper=self._client_wrapper)
         self._collections: typing.Optional[CollectionsClient] = None
         self._query: typing.Optional[QueryClient] = None
         self._indexing: typing.Optional[IndexingClient] = None
         self._jobs: typing.Optional[JobsClient] = None
         self._datasets: typing.Optional[DatasetsClient] = None
 
+    @property
+    def with_raw_response(self) -> RawCaptain:
+        """
+        Retrieves a raw implementation of this client that returns raw responses.
+
+        Returns
+        -------
+        RawCaptain
+        """
+        return self._raw_client
+
+    def post_v2collections_collection_name_documents_wipe(
+        self, collection_name: str, *, request_options: typing.Optional[RequestOptions] = None
+    ) -> None:
+        """
+        Parameters
+        ----------
+        collection_name : str
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        None
+
+        Examples
+        --------
+        from runcaptain import Captain
+
+        client = Captain(
+            organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
+        )
+        client.post_v2collections_collection_name_documents_wipe(
+            collection_name="collection_name",
+        )
+        """
+        _response = self._raw_client.post_v2collections_collection_name_documents_wipe(
+            collection_name, request_options=request_options
+        )
+        return _response.data
+
+    def post_v2datasets_search(self, *, request_options: typing.Optional[RequestOptions] = None) -> None:
+        """
+        Parameters
+        ----------
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        None
+
+        Examples
+        --------
+        from runcaptain import Captain
+
+        client = Captain(
+            organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
+        )
+        client.post_v2datasets_search()
+        """
+        _response = self._raw_client.post_v2datasets_search(request_options=request_options)
+        return _response.data
+
     @property
     def collections(self):
         if self._collections is None:
@@ -156,8 +234,8 @@ class AsyncCaptain:
 
 
 
-    authorization : str
     organization_id : typing.Optional[str]
+    key : typing.Optional[typing.Union[str, typing.Callable[[], str]]]
     headers : typing.Optional[typing.Dict[str, str]]
         Additional headers to send with every request.
 
@@ -178,8 +256,8 @@ class AsyncCaptain:
     from runcaptain import AsyncCaptain
 
     client = AsyncCaptain(
-        authorization="YOUR_AUTHORIZATION",
         organization_id="YOUR_ORGANIZATION_ID",
+        key="YOUR_KEY",
     )
     """
 
@@ -188,8 +266,8 @@ def __init__(
         *,
         base_url: typing.Optional[str] = None,
         environment: CaptainEnvironment = CaptainEnvironment.DEFAULT,
-        authorization: str,
-        organization_id: typing.Optional[str] = None,
+        organization_id: typing.Optional[str] = os.getenv("CAPTAIN_ORGANIZATION_ID"),
+        key: typing.Optional[typing.Union[str, typing.Callable[[], str]]] = os.getenv("CAPTAIN_API_KEY"),
         headers: typing.Optional[typing.Dict[str, str]] = None,
         timeout: typing.Optional[float] = None,
         follow_redirects: typing.Optional[bool] = True,
@@ -199,10 +277,16 @@ def __init__(
         _defaulted_timeout = (
             timeout if timeout is not None else 60 if httpx_client is None else httpx_client.timeout.read
         )
+        if organization_id is None:
+            raise ApiError(
+                body="The client must be instantiated be either passing in organization_id or setting CAPTAIN_ORGANIZATION_ID"
+            )
+        if key is None:
+            raise ApiError(body="The client must be instantiated be either passing in key or setting CAPTAIN_API_KEY")
         self._client_wrapper = AsyncClientWrapper(
             base_url=_get_base_url(base_url=base_url, environment=environment),
-            authorization=authorization,
             organization_id=organization_id,
+            key=key,
             headers=headers,
             httpx_client=httpx_client
             if httpx_client is not None
@@ -212,12 +296,96 @@ def __init__(
             timeout=_defaulted_timeout,
             logging=logging,
         )
+        self._raw_client = AsyncRawCaptain(client_wrapper=self._client_wrapper)
         self._collections: typing.Optional[AsyncCollectionsClient] = None
         self._query: typing.Optional[AsyncQueryClient] = None
         self._indexing: typing.Optional[AsyncIndexingClient] = None
         self._jobs: typing.Optional[AsyncJobsClient] = None
         self._datasets: typing.Optional[AsyncDatasetsClient] = None
 
+    @property
+    def with_raw_response(self) -> AsyncRawCaptain:
+        """
+        Retrieves a raw implementation of this client that returns raw responses.
+
+        Returns
+        -------
+        AsyncRawCaptain
+        """
+        return self._raw_client
+
+    async def post_v2collections_collection_name_documents_wipe(
+        self, collection_name: str, *, request_options: typing.Optional[RequestOptions] = None
+    ) -> None:
+        """
+        Parameters
+        ----------
+        collection_name : str
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        None
+
+        Examples
+        --------
+        import asyncio
+
+        from runcaptain import AsyncCaptain
+
+        client = AsyncCaptain(
+            organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
+        )
+
+
+        async def main() -> None:
+            await client.post_v2collections_collection_name_documents_wipe(
+                collection_name="collection_name",
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._raw_client.post_v2collections_collection_name_documents_wipe(
+            collection_name, request_options=request_options
+        )
+        return _response.data
+
+    async def post_v2datasets_search(self, *, request_options: typing.Optional[RequestOptions] = None) -> None:
+        """
+        Parameters
+        ----------
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        None
+
+        Examples
+        --------
+        import asyncio
+
+        from runcaptain import AsyncCaptain
+
+        client = AsyncCaptain(
+            organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
+        )
+
+
+        async def main() -> None:
+            await client.post_v2datasets_search()
+
+
+        asyncio.run(main())
+        """
+        _response = await self._raw_client.post_v2datasets_search(request_options=request_options)
+        return _response.data
+
     @property
     def collections(self):
         if self._collections is None:
diff --git a/src/runcaptain/collections/client.py b/src/runcaptain/collections/client.py
index d043171..6fc6efe 100644
--- a/src/runcaptain/collections/client.py
+++ b/src/runcaptain/collections/client.py
@@ -33,11 +33,7 @@ def with_raw_response(self) -> RawCollectionsClient:
         return self._raw_client
 
     def list_collections_v2(
-        self,
-        *,
-        limit: typing.Optional[int] = None,
-        offset: typing.Optional[int] = None,
-        request_options: typing.Optional[RequestOptions] = None,
+        self, *, request_options: typing.Optional[RequestOptions] = None
     ) -> CollectionListResponseV2:
         """
         List all collections for an organization.
@@ -46,12 +42,6 @@ def list_collections_v2(
 
         Parameters
         ----------
-        limit : typing.Optional[int]
-            Maximum number of collections to return
-
-        offset : typing.Optional[int]
-            Pagination offset
-
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -65,12 +55,12 @@ def list_collections_v2(
         from runcaptain import Captain
 
         client = Captain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
         client.collections.list_collections_v2()
         """
-        _response = self._raw_client.list_collections_v2(limit=limit, offset=offset, request_options=request_options)
+        _response = self._raw_client.list_collections_v2(request_options=request_options)
         return _response.data
 
     def create_collection_v2(
@@ -86,7 +76,6 @@ def create_collection_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to create
 
         description : typing.Optional[str]
 
@@ -103,11 +92,11 @@ def create_collection_v2(
         from runcaptain import Captain
 
         client = Captain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
         client.collections.create_collection_v2(
-            collection_name="my_documents",
+            collection_name="collection_name",
         )
         """
         _response = self._raw_client.create_collection_v2(
@@ -124,7 +113,6 @@ def delete_collection_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to delete
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
@@ -139,11 +127,11 @@ def delete_collection_v2(
         from runcaptain import Captain
 
         client = Captain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
         client.collections.delete_collection_v2(
-            collection_name="my_documents",
+            collection_name="collection_name",
         )
         """
         _response = self._raw_client.delete_collection_v2(collection_name, request_options=request_options)
@@ -174,7 +162,6 @@ def change_collection_environment_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to move
 
         new_environment : ChangeEnvironmentRequestV2NewEnvironment
             The target environment to move the collection to
@@ -192,12 +179,12 @@ def change_collection_environment_v2(
         from runcaptain import Captain
 
         client = Captain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
         client.collections.change_collection_environment_v2(
-            collection_name="my_documents",
-            new_environment="production",
+            collection_name="collection_name",
+            new_environment="development",
         )
         """
         _response = self._raw_client.change_collection_environment_v2(
@@ -209,7 +196,6 @@ def list_documents_v2(
         self,
         collection_name: str,
         *,
-        limit: typing.Optional[int] = None,
         offset: typing.Optional[int] = None,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> DocumentListResponseV2:
@@ -219,10 +205,6 @@ def list_documents_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection
-
-        limit : typing.Optional[int]
-            Maximum number of documents to return
 
         offset : typing.Optional[int]
             Pagination offset
@@ -240,18 +222,15 @@ def list_documents_v2(
         from runcaptain import Captain
 
         client = Captain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
         client.collections.list_documents_v2(
-            collection_name="my_documents",
-            limit=100,
-            offset=0,
+            collection_name="collection_name",
+            offset=1,
         )
         """
-        _response = self._raw_client.list_documents_v2(
-            collection_name, limit=limit, offset=offset, request_options=request_options
-        )
+        _response = self._raw_client.list_documents_v2(collection_name, offset=offset, request_options=request_options)
         return _response.data
 
     def wipe_collection_documents_v2(
@@ -278,8 +257,8 @@ def wipe_collection_documents_v2(
         from runcaptain import Captain
 
         client = Captain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
         client.collections.wipe_collection_documents_v2(
             collection_name="collection_name",
@@ -297,10 +276,8 @@ def delete_document_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection
 
         document_id : str
-            ID of the document to delete
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
@@ -315,8 +292,8 @@ def delete_document_v2(
         from runcaptain import Captain
 
         client = Captain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
         client.collections.delete_document_v2(
             collection_name="collection_name",
@@ -343,11 +320,7 @@ def with_raw_response(self) -> AsyncRawCollectionsClient:
         return self._raw_client
 
     async def list_collections_v2(
-        self,
-        *,
-        limit: typing.Optional[int] = None,
-        offset: typing.Optional[int] = None,
-        request_options: typing.Optional[RequestOptions] = None,
+        self, *, request_options: typing.Optional[RequestOptions] = None
     ) -> CollectionListResponseV2:
         """
         List all collections for an organization.
@@ -356,12 +329,6 @@ async def list_collections_v2(
 
         Parameters
         ----------
-        limit : typing.Optional[int]
-            Maximum number of collections to return
-
-        offset : typing.Optional[int]
-            Pagination offset
-
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -377,8 +344,8 @@ async def list_collections_v2(
         from runcaptain import AsyncCaptain
 
         client = AsyncCaptain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
 
 
@@ -388,9 +355,7 @@ async def main() -> None:
 
         asyncio.run(main())
         """
-        _response = await self._raw_client.list_collections_v2(
-            limit=limit, offset=offset, request_options=request_options
-        )
+        _response = await self._raw_client.list_collections_v2(request_options=request_options)
         return _response.data
 
     async def create_collection_v2(
@@ -406,7 +371,6 @@ async def create_collection_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to create
 
         description : typing.Optional[str]
 
@@ -425,14 +389,14 @@ async def create_collection_v2(
         from runcaptain import AsyncCaptain
 
         client = AsyncCaptain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
 
 
         async def main() -> None:
             await client.collections.create_collection_v2(
-                collection_name="my_documents",
+                collection_name="collection_name",
             )
 
 
@@ -452,7 +416,6 @@ async def delete_collection_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to delete
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
@@ -469,14 +432,14 @@ async def delete_collection_v2(
         from runcaptain import AsyncCaptain
 
         client = AsyncCaptain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
 
 
         async def main() -> None:
             await client.collections.delete_collection_v2(
-                collection_name="my_documents",
+                collection_name="collection_name",
             )
 
 
@@ -510,7 +473,6 @@ async def change_collection_environment_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to move
 
         new_environment : ChangeEnvironmentRequestV2NewEnvironment
             The target environment to move the collection to
@@ -530,15 +492,15 @@ async def change_collection_environment_v2(
         from runcaptain import AsyncCaptain
 
         client = AsyncCaptain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
 
 
         async def main() -> None:
             await client.collections.change_collection_environment_v2(
-                collection_name="my_documents",
-                new_environment="production",
+                collection_name="collection_name",
+                new_environment="development",
             )
 
 
@@ -553,7 +515,6 @@ async def list_documents_v2(
         self,
         collection_name: str,
         *,
-        limit: typing.Optional[int] = None,
         offset: typing.Optional[int] = None,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> DocumentListResponseV2:
@@ -563,10 +524,6 @@ async def list_documents_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection
-
-        limit : typing.Optional[int]
-            Maximum number of documents to return
 
         offset : typing.Optional[int]
             Pagination offset
@@ -586,23 +543,22 @@ async def list_documents_v2(
         from runcaptain import AsyncCaptain
 
         client = AsyncCaptain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
 
 
         async def main() -> None:
             await client.collections.list_documents_v2(
-                collection_name="my_documents",
-                limit=100,
-                offset=0,
+                collection_name="collection_name",
+                offset=1,
             )
 
 
         asyncio.run(main())
         """
         _response = await self._raw_client.list_documents_v2(
-            collection_name, limit=limit, offset=offset, request_options=request_options
+            collection_name, offset=offset, request_options=request_options
         )
         return _response.data
 
@@ -632,8 +588,8 @@ async def wipe_collection_documents_v2(
         from runcaptain import AsyncCaptain
 
         client = AsyncCaptain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
 
 
@@ -659,10 +615,8 @@ async def delete_document_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection
 
         document_id : str
-            ID of the document to delete
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
@@ -679,8 +633,8 @@ async def delete_document_v2(
         from runcaptain import AsyncCaptain
 
         client = AsyncCaptain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
 
 
diff --git a/src/runcaptain/collections/raw_client.py b/src/runcaptain/collections/raw_client.py
index 8db600a..8453bf2 100644
--- a/src/runcaptain/collections/raw_client.py
+++ b/src/runcaptain/collections/raw_client.py
@@ -29,11 +29,7 @@ def __init__(self, *, client_wrapper: SyncClientWrapper):
         self._client_wrapper = client_wrapper
 
     def list_collections_v2(
-        self,
-        *,
-        limit: typing.Optional[int] = None,
-        offset: typing.Optional[int] = None,
-        request_options: typing.Optional[RequestOptions] = None,
+        self, *, request_options: typing.Optional[RequestOptions] = None
     ) -> HttpResponse[CollectionListResponseV2]:
         """
         List all collections for an organization.
@@ -42,12 +38,6 @@ def list_collections_v2(
 
         Parameters
         ----------
-        limit : typing.Optional[int]
-            Maximum number of collections to return
-
-        offset : typing.Optional[int]
-            Pagination offset
-
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -59,10 +49,6 @@ def list_collections_v2(
         _response = self._client_wrapper.httpx_client.request(
             "v2/collections",
             method="GET",
-            params={
-                "limit": limit,
-                "offset": offset,
-            },
             request_options=request_options,
         )
         try:
@@ -93,7 +79,6 @@ def create_collection_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to create
 
         description : typing.Optional[str]
 
@@ -141,7 +126,6 @@ def delete_collection_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to delete
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
@@ -207,7 +191,6 @@ def change_collection_environment_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to move
 
         new_environment : ChangeEnvironmentRequestV2NewEnvironment
             The target environment to move the collection to
@@ -284,7 +267,6 @@ def list_documents_v2(
         self,
         collection_name: str,
         *,
-        limit: typing.Optional[int] = None,
         offset: typing.Optional[int] = None,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> HttpResponse[DocumentListResponseV2]:
@@ -294,10 +276,6 @@ def list_documents_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection
-
-        limit : typing.Optional[int]
-            Maximum number of documents to return
 
         offset : typing.Optional[int]
             Pagination offset
@@ -314,7 +292,6 @@ def list_documents_v2(
             f"v2/collections/{jsonable_encoder(collection_name)}/documents",
             method="GET",
             params={
-                "limit": limit,
                 "offset": offset,
             },
             request_options=request_options,
@@ -382,10 +359,8 @@ def delete_document_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection
 
         document_id : str
-            ID of the document to delete
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
@@ -432,11 +407,7 @@ def __init__(self, *, client_wrapper: AsyncClientWrapper):
         self._client_wrapper = client_wrapper
 
     async def list_collections_v2(
-        self,
-        *,
-        limit: typing.Optional[int] = None,
-        offset: typing.Optional[int] = None,
-        request_options: typing.Optional[RequestOptions] = None,
+        self, *, request_options: typing.Optional[RequestOptions] = None
     ) -> AsyncHttpResponse[CollectionListResponseV2]:
         """
         List all collections for an organization.
@@ -445,12 +416,6 @@ async def list_collections_v2(
 
         Parameters
         ----------
-        limit : typing.Optional[int]
-            Maximum number of collections to return
-
-        offset : typing.Optional[int]
-            Pagination offset
-
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -462,10 +427,6 @@ async def list_collections_v2(
         _response = await self._client_wrapper.httpx_client.request(
             "v2/collections",
             method="GET",
-            params={
-                "limit": limit,
-                "offset": offset,
-            },
             request_options=request_options,
         )
         try:
@@ -496,7 +457,6 @@ async def create_collection_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to create
 
         description : typing.Optional[str]
 
@@ -544,7 +504,6 @@ async def delete_collection_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to delete
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
@@ -610,7 +569,6 @@ async def change_collection_environment_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to move
 
         new_environment : ChangeEnvironmentRequestV2NewEnvironment
             The target environment to move the collection to
@@ -687,7 +645,6 @@ async def list_documents_v2(
         self,
         collection_name: str,
         *,
-        limit: typing.Optional[int] = None,
         offset: typing.Optional[int] = None,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> AsyncHttpResponse[DocumentListResponseV2]:
@@ -697,10 +654,6 @@ async def list_documents_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection
-
-        limit : typing.Optional[int]
-            Maximum number of documents to return
 
         offset : typing.Optional[int]
             Pagination offset
@@ -717,7 +670,6 @@ async def list_documents_v2(
             f"v2/collections/{jsonable_encoder(collection_name)}/documents",
             method="GET",
             params={
-                "limit": limit,
                 "offset": offset,
             },
             request_options=request_options,
@@ -785,10 +737,8 @@ async def delete_document_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection
 
         document_id : str
-            ID of the document to delete
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
diff --git a/src/runcaptain/core/client_wrapper.py b/src/runcaptain/core/client_wrapper.py
index 8b7c254..7a3c74a 100644
--- a/src/runcaptain/core/client_wrapper.py
+++ b/src/runcaptain/core/client_wrapper.py
@@ -11,15 +11,15 @@ class BaseClientWrapper:
     def __init__(
         self,
         *,
-        authorization: str,
-        organization_id: typing.Optional[str] = None,
+        organization_id: str,
+        key: typing.Union[str, typing.Callable[[], str]],
         headers: typing.Optional[typing.Dict[str, str]] = None,
         base_url: str,
         timeout: typing.Optional[float] = None,
         logging: typing.Optional[typing.Union[LogConfig, Logger]] = None,
     ):
-        self._authorization = authorization
         self._organization_id = organization_id
+        self._key = key
         self._headers = headers
         self._base_url = base_url
         self._timeout = timeout
@@ -29,19 +29,24 @@ def get_headers(self) -> typing.Dict[str, str]:
         import platform
 
         headers: typing.Dict[str, str] = {
-            "User-Agent": "captain-sdk/0.0.0",
+            "User-Agent": "captain-sdk/0.0.1",
             "X-Fern-Language": "Python",
             "X-Fern-Runtime": f"python/{platform.python_version()}",
             "X-Fern-Platform": f"{platform.system().lower()}/{platform.release()}",
             "X-Fern-SDK-Name": "captain-sdk",
-            "X-Fern-SDK-Version": "0.0.0",
+            "X-Fern-SDK-Version": "0.0.16",
             **(self.get_custom_headers() or {}),
         }
-        headers["Authorization"] = self._authorization
-        if self._organization_id is not None:
-            headers["X-Organization-ID"] = self._organization_id
+        headers["X-Organization-ID"] = self._organization_id
+        headers["Authorization"] = f"Bearer {self._get_key()}"
         return headers
 
+    def _get_key(self) -> str:
+        if isinstance(self._key, str):
+            return self._key
+        else:
+            return self._key()
+
     def get_custom_headers(self) -> typing.Optional[typing.Dict[str, str]]:
         return self._headers
 
@@ -56,8 +61,8 @@ class SyncClientWrapper(BaseClientWrapper):
     def __init__(
         self,
         *,
-        authorization: str,
-        organization_id: typing.Optional[str] = None,
+        organization_id: str,
+        key: typing.Union[str, typing.Callable[[], str]],
         headers: typing.Optional[typing.Dict[str, str]] = None,
         base_url: str,
         timeout: typing.Optional[float] = None,
@@ -65,8 +70,8 @@ def __init__(
         httpx_client: httpx.Client,
     ):
         super().__init__(
-            authorization=authorization,
             organization_id=organization_id,
+            key=key,
             headers=headers,
             base_url=base_url,
             timeout=timeout,
@@ -85,8 +90,8 @@ class AsyncClientWrapper(BaseClientWrapper):
     def __init__(
         self,
         *,
-        authorization: str,
-        organization_id: typing.Optional[str] = None,
+        organization_id: str,
+        key: typing.Union[str, typing.Callable[[], str]],
         headers: typing.Optional[typing.Dict[str, str]] = None,
         base_url: str,
         timeout: typing.Optional[float] = None,
@@ -95,8 +100,8 @@ def __init__(
         httpx_client: httpx.AsyncClient,
     ):
         super().__init__(
-            authorization=authorization,
             organization_id=organization_id,
+            key=key,
             headers=headers,
             base_url=base_url,
             timeout=timeout,
diff --git a/src/runcaptain/datasets/client.py b/src/runcaptain/datasets/client.py
index e20d30b..e46bc4d 100644
--- a/src/runcaptain/datasets/client.py
+++ b/src/runcaptain/datasets/client.py
@@ -71,13 +71,13 @@ def search_dataset(
         from runcaptain import Captain
 
         client = Captain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
         client.datasets.search_dataset(
             dataset="nytimes",
-            q="artificial intelligence",
-            limit=5,
+            q="q",
+            limit=1,
         )
         """
         _response = self._raw_client.search_dataset(dataset, q=q, limit=limit, request_options=request_options)
@@ -125,12 +125,12 @@ def get_dataset_article(
         from runcaptain import Captain
 
         client = Captain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
         client.datasets.get_dataset_article(
             dataset="nytimes",
-            url="https://www.nytimes.com/example/news_example.html",
+            url="url",
         )
         """
         _response = self._raw_client.get_dataset_article(dataset, url, request_options=request_options)
@@ -199,16 +199,16 @@ async def search_dataset(
         from runcaptain import AsyncCaptain
 
         client = AsyncCaptain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
 
 
         async def main() -> None:
             await client.datasets.search_dataset(
                 dataset="nytimes",
-                q="artificial intelligence",
-                limit=5,
+                q="q",
+                limit=1,
             )
 
 
@@ -261,15 +261,15 @@ async def get_dataset_article(
         from runcaptain import AsyncCaptain
 
         client = AsyncCaptain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
 
 
         async def main() -> None:
             await client.datasets.get_dataset_article(
                 dataset="nytimes",
-                url="https://www.nytimes.com/example/news_example.html",
+                url="url",
             )
 
 
diff --git a/src/runcaptain/indexing/client.py b/src/runcaptain/indexing/client.py
index 41b804c..73d31ae 100644
--- a/src/runcaptain/indexing/client.py
+++ b/src/runcaptain/indexing/client.py
@@ -43,7 +43,6 @@ def index_s3bucket_v2(
         aws_access_key_id: str,
         aws_secret_access_key: str,
         processing_type: IndexS3RequestV2ProcessingType,
-        idempotency_key: typing.Optional[str] = None,
         bucket_region: typing.Optional[str] = OMIT,
         max_files: typing.Optional[int] = OMIT,
         skip_existing: typing.Optional[bool] = OMIT,
@@ -56,7 +55,6 @@ def index_s3bucket_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         bucket_name : str
             Name of the S3 bucket
@@ -70,9 +68,6 @@ def index_s3bucket_v2(
         processing_type : IndexS3RequestV2ProcessingType
             Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing.
 
-        idempotency_key : typing.Optional[str]
-            UUID for request deduplication
-
         bucket_region : typing.Optional[str]
             AWS region where the bucket is located
 
@@ -98,14 +93,14 @@ def index_s3bucket_v2(
         from runcaptain import Captain
 
         client = Captain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
         client.indexing.index_s3bucket_v2(
-            collection_name="my_documents",
-            bucket_name="my-s3-bucket",
+            collection_name="collection_name",
+            bucket_name="my-company-docs",
             aws_access_key_id="AKIAIOSFODNN7EXAMPLE",
-            aws_secret_access_key="your_secret_key",
+            aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
             bucket_region="us-east-1",
             processing_type="advanced",
         )
@@ -116,7 +111,6 @@ def index_s3bucket_v2(
             aws_access_key_id=aws_access_key_id,
             aws_secret_access_key=aws_secret_access_key,
             processing_type=processing_type,
-            idempotency_key=idempotency_key,
             bucket_region=bucket_region,
             max_files=max_files,
             skip_existing=skip_existing,
@@ -144,7 +138,6 @@ def index_s3file_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         bucket_name : str
             Name of the S3 bucket
@@ -180,15 +173,15 @@ def index_s3file_v2(
         from runcaptain import Captain
 
         client = Captain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
         client.indexing.index_s3file_v2(
-            collection_name="my_documents",
-            bucket_name="my-s3-bucket",
-            file_uri="s3://my-s3-bucket/contracts/acme_contract.pdf",
+            collection_name="collection_name",
+            bucket_name="my-company-docs",
+            file_uri="s3://my-company-docs/contracts/acme_contract.pdf",
             aws_access_key_id="AKIAIOSFODNN7EXAMPLE",
-            aws_secret_access_key="your_secret_key",
+            aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
             bucket_region="us-east-1",
             processing_type="advanced",
         )
@@ -224,7 +217,6 @@ def index_gcs_bucket_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         bucket_name : str
             Name of the GCS bucket
@@ -257,12 +249,12 @@ def index_gcs_bucket_v2(
         from runcaptain import Captain
 
         client = Captain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
         client.indexing.index_gcs_bucket_v2(
-            collection_name="my_documents",
-            bucket_name="my-gcs-bucket",
+            collection_name="collection_name",
+            bucket_name="my-company-docs",
             service_account_json='{"type":"service_account","project_id":"my-project",...}',
             processing_type="advanced",
         )
@@ -296,7 +288,6 @@ def index_gcs_file_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         bucket_name : str
             Name of the GCS bucket
@@ -326,8 +317,8 @@ def index_gcs_file_v2(
         from runcaptain import Captain
 
         client = Captain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
         client.indexing.index_gcs_file_v2(
             collection_name="collection_name",
@@ -357,7 +348,6 @@ def index_s3directory_v2(
         aws_access_key_id: str,
         aws_secret_access_key: str,
         processing_type: IndexS3DirectoryRequestV2ProcessingType,
-        idempotency_key: typing.Optional[str] = None,
         bucket_region: typing.Optional[str] = OMIT,
         max_files: typing.Optional[int] = OMIT,
         skip_existing: typing.Optional[bool] = OMIT,
@@ -370,7 +360,6 @@ def index_s3directory_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         bucket_name : str
             Name of the S3 bucket
@@ -387,9 +376,6 @@ def index_s3directory_v2(
         processing_type : IndexS3DirectoryRequestV2ProcessingType
             Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing.
 
-        idempotency_key : typing.Optional[str]
-            UUID for request deduplication
-
         bucket_region : typing.Optional[str]
             AWS region where the bucket is located
 
@@ -415,15 +401,15 @@ def index_s3directory_v2(
         from runcaptain import Captain
 
         client = Captain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
         client.indexing.index_s3directory_v2(
-            collection_name="my_documents",
-            bucket_name="my-s3-bucket",
+            collection_name="collection_name",
+            bucket_name="my-company-docs",
             directory_path="reports/2024/january",
             aws_access_key_id="AKIAIOSFODNN7EXAMPLE",
-            aws_secret_access_key="your_secret_key",
+            aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
             bucket_region="us-east-1",
             processing_type="advanced",
         )
@@ -435,7 +421,6 @@ def index_s3directory_v2(
             aws_access_key_id=aws_access_key_id,
             aws_secret_access_key=aws_secret_access_key,
             processing_type=processing_type,
-            idempotency_key=idempotency_key,
             bucket_region=bucket_region,
             max_files=max_files,
             skip_existing=skip_existing,
@@ -452,7 +437,6 @@ def index_gcs_directory_v2(
         directory_path: str,
         service_account_json: str,
         processing_type: IndexGcsDirectoryRequestV2ProcessingType,
-        idempotency_key: typing.Optional[str] = None,
         max_files: typing.Optional[int] = OMIT,
         skip_existing: typing.Optional[bool] = OMIT,
         custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT,
@@ -464,7 +448,6 @@ def index_gcs_directory_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         bucket_name : str
             Name of the GCS bucket
@@ -478,9 +461,6 @@ def index_gcs_directory_v2(
         processing_type : IndexGcsDirectoryRequestV2ProcessingType
             Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing.
 
-        idempotency_key : typing.Optional[str]
-            UUID for request deduplication
-
         max_files : typing.Optional[int]
             Maximum number of files to index (optional)
 
@@ -503,12 +483,12 @@ def index_gcs_directory_v2(
         from runcaptain import Captain
 
         client = Captain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
         client.indexing.index_gcs_directory_v2(
-            collection_name="my_documents",
-            bucket_name="my-gcs-bucket",
+            collection_name="collection_name",
+            bucket_name="my-company-docs",
             directory_path="reports/2024/january",
             service_account_json='{"type":"service_account","project_id":"my-project",...}',
             processing_type="advanced",
@@ -520,7 +500,6 @@ def index_gcs_directory_v2(
             directory_path=directory_path,
             service_account_json=service_account_json,
             processing_type=processing_type,
-            idempotency_key=idempotency_key,
             max_files=max_files,
             skip_existing=skip_existing,
             custom_metadata=custom_metadata,
@@ -536,7 +515,6 @@ def index_azure_container_v2(
         account_name: str,
         account_key: str,
         processing_type: IndexAzureRequestV2ProcessingType,
-        idempotency_key: typing.Optional[str] = None,
         max_files: typing.Optional[int] = OMIT,
         skip_existing: typing.Optional[bool] = OMIT,
         custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT,
@@ -548,7 +526,6 @@ def index_azure_container_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         container_name : str
             Name of the Azure Blob Storage container
@@ -562,9 +539,6 @@ def index_azure_container_v2(
         processing_type : IndexAzureRequestV2ProcessingType
             Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing.
 
-        idempotency_key : typing.Optional[str]
-            UUID for request deduplication
-
         max_files : typing.Optional[int]
             Maximum number of files to index (optional)
 
@@ -587,14 +561,14 @@ def index_azure_container_v2(
         from runcaptain import Captain
 
         client = Captain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
         client.indexing.index_azure_container_v2(
-            collection_name="my_documents",
+            collection_name="collection_name",
             container_name="my-container",
             account_name="mystorageaccount",
-            account_key="your_account_key",
+            account_key="your_account_key_base64",
             processing_type="advanced",
         )
         """
@@ -604,7 +578,6 @@ def index_azure_container_v2(
             account_name=account_name,
             account_key=account_key,
             processing_type=processing_type,
-            idempotency_key=idempotency_key,
             max_files=max_files,
             skip_existing=skip_existing,
             custom_metadata=custom_metadata,
@@ -630,7 +603,6 @@ def index_azure_file_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         container_name : str
             Name of the Azure Blob Storage container
@@ -663,15 +635,15 @@ def index_azure_file_v2(
         from runcaptain import Captain
 
         client = Captain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
         client.indexing.index_azure_file_v2(
-            collection_name="my_documents",
+            collection_name="collection_name",
             container_name="my-container",
             file_uri="https://mystorageaccount.blob.core.windows.net/my-container/contracts/acme_contract.pdf",
             account_name="mystorageaccount",
-            account_key="your_account_key",
+            account_key="your_account_key_base64",
             processing_type="advanced",
         )
         """
@@ -696,7 +668,6 @@ def index_azure_directory_v2(
         account_name: str,
         account_key: str,
         processing_type: IndexAzureDirectoryRequestV2ProcessingType,
-        idempotency_key: typing.Optional[str] = None,
         max_files: typing.Optional[int] = OMIT,
         skip_existing: typing.Optional[bool] = OMIT,
         custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT,
@@ -708,7 +679,6 @@ def index_azure_directory_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         container_name : str
             Name of the Azure Blob Storage container
@@ -725,9 +695,6 @@ def index_azure_directory_v2(
         processing_type : IndexAzureDirectoryRequestV2ProcessingType
             Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing.
 
-        idempotency_key : typing.Optional[str]
-            UUID for request deduplication
-
         max_files : typing.Optional[int]
             Maximum number of files to index (optional)
 
@@ -750,15 +717,15 @@ def index_azure_directory_v2(
         from runcaptain import Captain
 
         client = Captain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
         client.indexing.index_azure_directory_v2(
-            collection_name="my_documents",
+            collection_name="collection_name",
             container_name="my-container",
             directory_path="reports/2024/january",
             account_name="mystorageaccount",
-            account_key="your_account_key",
+            account_key="your_account_key_base64",
             processing_type="advanced",
         )
         """
@@ -769,7 +736,6 @@ def index_azure_directory_v2(
             account_name=account_name,
             account_key=account_key,
             processing_type=processing_type,
-            idempotency_key=idempotency_key,
             max_files=max_files,
             skip_existing=skip_existing,
             custom_metadata=custom_metadata,
@@ -801,7 +767,6 @@ async def index_s3bucket_v2(
         aws_access_key_id: str,
         aws_secret_access_key: str,
         processing_type: IndexS3RequestV2ProcessingType,
-        idempotency_key: typing.Optional[str] = None,
         bucket_region: typing.Optional[str] = OMIT,
         max_files: typing.Optional[int] = OMIT,
         skip_existing: typing.Optional[bool] = OMIT,
@@ -814,7 +779,6 @@ async def index_s3bucket_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         bucket_name : str
             Name of the S3 bucket
@@ -828,9 +792,6 @@ async def index_s3bucket_v2(
         processing_type : IndexS3RequestV2ProcessingType
             Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing.
 
-        idempotency_key : typing.Optional[str]
-            UUID for request deduplication
-
         bucket_region : typing.Optional[str]
             AWS region where the bucket is located
 
@@ -858,17 +819,17 @@ async def index_s3bucket_v2(
         from runcaptain import AsyncCaptain
 
         client = AsyncCaptain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
 
 
         async def main() -> None:
             await client.indexing.index_s3bucket_v2(
-                collection_name="my_documents",
-                bucket_name="my-s3-bucket",
+                collection_name="collection_name",
+                bucket_name="my-company-docs",
                 aws_access_key_id="AKIAIOSFODNN7EXAMPLE",
-                aws_secret_access_key="your_secret_key",
+                aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
                 bucket_region="us-east-1",
                 processing_type="advanced",
             )
@@ -882,7 +843,6 @@ async def main() -> None:
             aws_access_key_id=aws_access_key_id,
             aws_secret_access_key=aws_secret_access_key,
             processing_type=processing_type,
-            idempotency_key=idempotency_key,
             bucket_region=bucket_region,
             max_files=max_files,
             skip_existing=skip_existing,
@@ -910,7 +870,6 @@ async def index_s3file_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         bucket_name : str
             Name of the S3 bucket
@@ -948,18 +907,18 @@ async def index_s3file_v2(
         from runcaptain import AsyncCaptain
 
         client = AsyncCaptain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
 
 
         async def main() -> None:
             await client.indexing.index_s3file_v2(
-                collection_name="my_documents",
-                bucket_name="my-s3-bucket",
-                file_uri="s3://my-s3-bucket/contracts/acme_contract.pdf",
+                collection_name="collection_name",
+                bucket_name="my-company-docs",
+                file_uri="s3://my-company-docs/contracts/acme_contract.pdf",
                 aws_access_key_id="AKIAIOSFODNN7EXAMPLE",
-                aws_secret_access_key="your_secret_key",
+                aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
                 bucket_region="us-east-1",
                 processing_type="advanced",
             )
@@ -998,7 +957,6 @@ async def index_gcs_bucket_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         bucket_name : str
             Name of the GCS bucket
@@ -1033,15 +991,15 @@ async def index_gcs_bucket_v2(
         from runcaptain import AsyncCaptain
 
         client = AsyncCaptain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
 
 
         async def main() -> None:
             await client.indexing.index_gcs_bucket_v2(
-                collection_name="my_documents",
-                bucket_name="my-gcs-bucket",
+                collection_name="collection_name",
+                bucket_name="my-company-docs",
                 service_account_json='{"type":"service_account","project_id":"my-project",...}',
                 processing_type="advanced",
             )
@@ -1078,7 +1036,6 @@ async def index_gcs_file_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         bucket_name : str
             Name of the GCS bucket
@@ -1110,8 +1067,8 @@ async def index_gcs_file_v2(
         from runcaptain import AsyncCaptain
 
         client = AsyncCaptain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
 
 
@@ -1147,7 +1104,6 @@ async def index_s3directory_v2(
         aws_access_key_id: str,
         aws_secret_access_key: str,
         processing_type: IndexS3DirectoryRequestV2ProcessingType,
-        idempotency_key: typing.Optional[str] = None,
         bucket_region: typing.Optional[str] = OMIT,
         max_files: typing.Optional[int] = OMIT,
         skip_existing: typing.Optional[bool] = OMIT,
@@ -1160,7 +1116,6 @@ async def index_s3directory_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         bucket_name : str
             Name of the S3 bucket
@@ -1177,9 +1132,6 @@ async def index_s3directory_v2(
         processing_type : IndexS3DirectoryRequestV2ProcessingType
             Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing.
 
-        idempotency_key : typing.Optional[str]
-            UUID for request deduplication
-
         bucket_region : typing.Optional[str]
             AWS region where the bucket is located
 
@@ -1207,18 +1159,18 @@ async def index_s3directory_v2(
         from runcaptain import AsyncCaptain
 
         client = AsyncCaptain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
 
 
         async def main() -> None:
             await client.indexing.index_s3directory_v2(
-                collection_name="my_documents",
-                bucket_name="my-s3-bucket",
+                collection_name="collection_name",
+                bucket_name="my-company-docs",
                 directory_path="reports/2024/january",
                 aws_access_key_id="AKIAIOSFODNN7EXAMPLE",
-                aws_secret_access_key="your_secret_key",
+                aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
                 bucket_region="us-east-1",
                 processing_type="advanced",
             )
@@ -1233,7 +1185,6 @@ async def main() -> None:
             aws_access_key_id=aws_access_key_id,
             aws_secret_access_key=aws_secret_access_key,
             processing_type=processing_type,
-            idempotency_key=idempotency_key,
             bucket_region=bucket_region,
             max_files=max_files,
             skip_existing=skip_existing,
@@ -1250,7 +1201,6 @@ async def index_gcs_directory_v2(
         directory_path: str,
         service_account_json: str,
         processing_type: IndexGcsDirectoryRequestV2ProcessingType,
-        idempotency_key: typing.Optional[str] = None,
         max_files: typing.Optional[int] = OMIT,
         skip_existing: typing.Optional[bool] = OMIT,
         custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT,
@@ -1262,7 +1212,6 @@ async def index_gcs_directory_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         bucket_name : str
             Name of the GCS bucket
@@ -1276,9 +1225,6 @@ async def index_gcs_directory_v2(
         processing_type : IndexGcsDirectoryRequestV2ProcessingType
             Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing.
 
-        idempotency_key : typing.Optional[str]
-            UUID for request deduplication
-
         max_files : typing.Optional[int]
             Maximum number of files to index (optional)
 
@@ -1303,15 +1249,15 @@ async def index_gcs_directory_v2(
         from runcaptain import AsyncCaptain
 
         client = AsyncCaptain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
 
 
         async def main() -> None:
             await client.indexing.index_gcs_directory_v2(
-                collection_name="my_documents",
-                bucket_name="my-gcs-bucket",
+                collection_name="collection_name",
+                bucket_name="my-company-docs",
                 directory_path="reports/2024/january",
                 service_account_json='{"type":"service_account","project_id":"my-project",...}',
                 processing_type="advanced",
@@ -1326,7 +1272,6 @@ async def main() -> None:
             directory_path=directory_path,
             service_account_json=service_account_json,
             processing_type=processing_type,
-            idempotency_key=idempotency_key,
             max_files=max_files,
             skip_existing=skip_existing,
             custom_metadata=custom_metadata,
@@ -1342,7 +1287,6 @@ async def index_azure_container_v2(
         account_name: str,
         account_key: str,
         processing_type: IndexAzureRequestV2ProcessingType,
-        idempotency_key: typing.Optional[str] = None,
         max_files: typing.Optional[int] = OMIT,
         skip_existing: typing.Optional[bool] = OMIT,
         custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT,
@@ -1354,7 +1298,6 @@ async def index_azure_container_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         container_name : str
             Name of the Azure Blob Storage container
@@ -1368,9 +1311,6 @@ async def index_azure_container_v2(
         processing_type : IndexAzureRequestV2ProcessingType
             Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing.
 
-        idempotency_key : typing.Optional[str]
-            UUID for request deduplication
-
         max_files : typing.Optional[int]
             Maximum number of files to index (optional)
 
@@ -1395,17 +1335,17 @@ async def index_azure_container_v2(
         from runcaptain import AsyncCaptain
 
         client = AsyncCaptain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
 
 
         async def main() -> None:
             await client.indexing.index_azure_container_v2(
-                collection_name="my_documents",
+                collection_name="collection_name",
                 container_name="my-container",
                 account_name="mystorageaccount",
-                account_key="your_account_key",
+                account_key="your_account_key_base64",
                 processing_type="advanced",
             )
 
@@ -1418,7 +1358,6 @@ async def main() -> None:
             account_name=account_name,
             account_key=account_key,
             processing_type=processing_type,
-            idempotency_key=idempotency_key,
             max_files=max_files,
             skip_existing=skip_existing,
             custom_metadata=custom_metadata,
@@ -1444,7 +1383,6 @@ async def index_azure_file_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         container_name : str
             Name of the Azure Blob Storage container
@@ -1479,18 +1417,18 @@ async def index_azure_file_v2(
         from runcaptain import AsyncCaptain
 
         client = AsyncCaptain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
 
 
         async def main() -> None:
             await client.indexing.index_azure_file_v2(
-                collection_name="my_documents",
+                collection_name="collection_name",
                 container_name="my-container",
                 file_uri="https://mystorageaccount.blob.core.windows.net/my-container/contracts/acme_contract.pdf",
                 account_name="mystorageaccount",
-                account_key="your_account_key",
+                account_key="your_account_key_base64",
                 processing_type="advanced",
             )
 
@@ -1518,7 +1456,6 @@ async def index_azure_directory_v2(
         account_name: str,
         account_key: str,
         processing_type: IndexAzureDirectoryRequestV2ProcessingType,
-        idempotency_key: typing.Optional[str] = None,
         max_files: typing.Optional[int] = OMIT,
         skip_existing: typing.Optional[bool] = OMIT,
         custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT,
@@ -1530,7 +1467,6 @@ async def index_azure_directory_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         container_name : str
             Name of the Azure Blob Storage container
@@ -1547,9 +1483,6 @@ async def index_azure_directory_v2(
         processing_type : IndexAzureDirectoryRequestV2ProcessingType
             Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing.
 
-        idempotency_key : typing.Optional[str]
-            UUID for request deduplication
-
         max_files : typing.Optional[int]
             Maximum number of files to index (optional)
 
@@ -1574,18 +1507,18 @@ async def index_azure_directory_v2(
         from runcaptain import AsyncCaptain
 
         client = AsyncCaptain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
 
 
         async def main() -> None:
             await client.indexing.index_azure_directory_v2(
-                collection_name="my_documents",
+                collection_name="collection_name",
                 container_name="my-container",
                 directory_path="reports/2024/january",
                 account_name="mystorageaccount",
-                account_key="your_account_key",
+                account_key="your_account_key_base64",
                 processing_type="advanced",
             )
 
@@ -1599,7 +1532,6 @@ async def main() -> None:
             account_name=account_name,
             account_key=account_key,
             processing_type=processing_type,
-            idempotency_key=idempotency_key,
             max_files=max_files,
             skip_existing=skip_existing,
             custom_metadata=custom_metadata,
diff --git a/src/runcaptain/indexing/raw_client.py b/src/runcaptain/indexing/raw_client.py
index bb8f09b..db63203 100644
--- a/src/runcaptain/indexing/raw_client.py
+++ b/src/runcaptain/indexing/raw_client.py
@@ -36,7 +36,6 @@ def index_s3bucket_v2(
         aws_access_key_id: str,
         aws_secret_access_key: str,
         processing_type: IndexS3RequestV2ProcessingType,
-        idempotency_key: typing.Optional[str] = None,
         bucket_region: typing.Optional[str] = OMIT,
         max_files: typing.Optional[int] = OMIT,
         skip_existing: typing.Optional[bool] = OMIT,
@@ -49,7 +48,6 @@ def index_s3bucket_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         bucket_name : str
             Name of the S3 bucket
@@ -63,9 +61,6 @@ def index_s3bucket_v2(
         processing_type : IndexS3RequestV2ProcessingType
             Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing.
 
-        idempotency_key : typing.Optional[str]
-            UUID for request deduplication
-
         bucket_region : typing.Optional[str]
             AWS region where the bucket is located
 
@@ -101,7 +96,6 @@ def index_s3bucket_v2(
             },
             headers={
                 "content-type": "application/json",
-                "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None,
             },
             request_options=request_options,
             omit=OMIT,
@@ -140,7 +134,6 @@ def index_s3file_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         bucket_name : str
             Name of the S3 bucket
@@ -222,7 +215,6 @@ def index_gcs_bucket_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         bucket_name : str
             Name of the GCS bucket
@@ -299,7 +291,6 @@ def index_gcs_file_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         bucket_name : str
             Name of the GCS bucket
@@ -364,7 +355,6 @@ def index_s3directory_v2(
         aws_access_key_id: str,
         aws_secret_access_key: str,
         processing_type: IndexS3DirectoryRequestV2ProcessingType,
-        idempotency_key: typing.Optional[str] = None,
         bucket_region: typing.Optional[str] = OMIT,
         max_files: typing.Optional[int] = OMIT,
         skip_existing: typing.Optional[bool] = OMIT,
@@ -377,7 +367,6 @@ def index_s3directory_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         bucket_name : str
             Name of the S3 bucket
@@ -394,9 +383,6 @@ def index_s3directory_v2(
         processing_type : IndexS3DirectoryRequestV2ProcessingType
             Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing.
 
-        idempotency_key : typing.Optional[str]
-            UUID for request deduplication
-
         bucket_region : typing.Optional[str]
             AWS region where the bucket is located
 
@@ -433,7 +419,6 @@ def index_s3directory_v2(
             },
             headers={
                 "content-type": "application/json",
-                "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None,
             },
             request_options=request_options,
             omit=OMIT,
@@ -461,7 +446,6 @@ def index_gcs_directory_v2(
         directory_path: str,
         service_account_json: str,
         processing_type: IndexGcsDirectoryRequestV2ProcessingType,
-        idempotency_key: typing.Optional[str] = None,
         max_files: typing.Optional[int] = OMIT,
         skip_existing: typing.Optional[bool] = OMIT,
         custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT,
@@ -473,7 +457,6 @@ def index_gcs_directory_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         bucket_name : str
             Name of the GCS bucket
@@ -487,9 +470,6 @@ def index_gcs_directory_v2(
         processing_type : IndexGcsDirectoryRequestV2ProcessingType
             Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing.
 
-        idempotency_key : typing.Optional[str]
-            UUID for request deduplication
-
         max_files : typing.Optional[int]
             Maximum number of files to index (optional)
 
@@ -521,7 +501,6 @@ def index_gcs_directory_v2(
             },
             headers={
                 "content-type": "application/json",
-                "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None,
             },
             request_options=request_options,
             omit=OMIT,
@@ -549,7 +528,6 @@ def index_azure_container_v2(
         account_name: str,
         account_key: str,
         processing_type: IndexAzureRequestV2ProcessingType,
-        idempotency_key: typing.Optional[str] = None,
         max_files: typing.Optional[int] = OMIT,
         skip_existing: typing.Optional[bool] = OMIT,
         custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT,
@@ -561,7 +539,6 @@ def index_azure_container_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         container_name : str
             Name of the Azure Blob Storage container
@@ -575,9 +552,6 @@ def index_azure_container_v2(
         processing_type : IndexAzureRequestV2ProcessingType
             Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing.
 
-        idempotency_key : typing.Optional[str]
-            UUID for request deduplication
-
         max_files : typing.Optional[int]
             Maximum number of files to index (optional)
 
@@ -609,7 +583,6 @@ def index_azure_container_v2(
             },
             headers={
                 "content-type": "application/json",
-                "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None,
             },
             request_options=request_options,
             omit=OMIT,
@@ -647,7 +620,6 @@ def index_azure_file_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         container_name : str
             Name of the Azure Blob Storage container
@@ -716,7 +688,6 @@ def index_azure_directory_v2(
         account_name: str,
         account_key: str,
         processing_type: IndexAzureDirectoryRequestV2ProcessingType,
-        idempotency_key: typing.Optional[str] = None,
         max_files: typing.Optional[int] = OMIT,
         skip_existing: typing.Optional[bool] = OMIT,
         custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT,
@@ -728,7 +699,6 @@ def index_azure_directory_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         container_name : str
             Name of the Azure Blob Storage container
@@ -745,9 +715,6 @@ def index_azure_directory_v2(
         processing_type : IndexAzureDirectoryRequestV2ProcessingType
             Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing.
 
-        idempotency_key : typing.Optional[str]
-            UUID for request deduplication
-
         max_files : typing.Optional[int]
             Maximum number of files to index (optional)
 
@@ -780,7 +747,6 @@ def index_azure_directory_v2(
             },
             headers={
                 "content-type": "application/json",
-                "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None,
             },
             request_options=request_options,
             omit=OMIT,
@@ -813,7 +779,6 @@ async def index_s3bucket_v2(
         aws_access_key_id: str,
         aws_secret_access_key: str,
         processing_type: IndexS3RequestV2ProcessingType,
-        idempotency_key: typing.Optional[str] = None,
         bucket_region: typing.Optional[str] = OMIT,
         max_files: typing.Optional[int] = OMIT,
         skip_existing: typing.Optional[bool] = OMIT,
@@ -826,7 +791,6 @@ async def index_s3bucket_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         bucket_name : str
             Name of the S3 bucket
@@ -840,9 +804,6 @@ async def index_s3bucket_v2(
         processing_type : IndexS3RequestV2ProcessingType
             Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing.
 
-        idempotency_key : typing.Optional[str]
-            UUID for request deduplication
-
         bucket_region : typing.Optional[str]
             AWS region where the bucket is located
 
@@ -878,7 +839,6 @@ async def index_s3bucket_v2(
             },
             headers={
                 "content-type": "application/json",
-                "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None,
             },
             request_options=request_options,
             omit=OMIT,
@@ -917,7 +877,6 @@ async def index_s3file_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         bucket_name : str
             Name of the S3 bucket
@@ -999,7 +958,6 @@ async def index_gcs_bucket_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         bucket_name : str
             Name of the GCS bucket
@@ -1076,7 +1034,6 @@ async def index_gcs_file_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         bucket_name : str
             Name of the GCS bucket
@@ -1141,7 +1098,6 @@ async def index_s3directory_v2(
         aws_access_key_id: str,
         aws_secret_access_key: str,
         processing_type: IndexS3DirectoryRequestV2ProcessingType,
-        idempotency_key: typing.Optional[str] = None,
         bucket_region: typing.Optional[str] = OMIT,
         max_files: typing.Optional[int] = OMIT,
         skip_existing: typing.Optional[bool] = OMIT,
@@ -1154,7 +1110,6 @@ async def index_s3directory_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         bucket_name : str
             Name of the S3 bucket
@@ -1171,9 +1126,6 @@ async def index_s3directory_v2(
         processing_type : IndexS3DirectoryRequestV2ProcessingType
             Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing.
 
-        idempotency_key : typing.Optional[str]
-            UUID for request deduplication
-
         bucket_region : typing.Optional[str]
             AWS region where the bucket is located
 
@@ -1210,7 +1162,6 @@ async def index_s3directory_v2(
             },
             headers={
                 "content-type": "application/json",
-                "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None,
             },
             request_options=request_options,
             omit=OMIT,
@@ -1238,7 +1189,6 @@ async def index_gcs_directory_v2(
         directory_path: str,
         service_account_json: str,
         processing_type: IndexGcsDirectoryRequestV2ProcessingType,
-        idempotency_key: typing.Optional[str] = None,
         max_files: typing.Optional[int] = OMIT,
         skip_existing: typing.Optional[bool] = OMIT,
         custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT,
@@ -1250,7 +1200,6 @@ async def index_gcs_directory_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         bucket_name : str
             Name of the GCS bucket
@@ -1264,9 +1213,6 @@ async def index_gcs_directory_v2(
         processing_type : IndexGcsDirectoryRequestV2ProcessingType
             Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing.
 
-        idempotency_key : typing.Optional[str]
-            UUID for request deduplication
-
         max_files : typing.Optional[int]
             Maximum number of files to index (optional)
 
@@ -1298,7 +1244,6 @@ async def index_gcs_directory_v2(
             },
             headers={
                 "content-type": "application/json",
-                "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None,
             },
             request_options=request_options,
             omit=OMIT,
@@ -1326,7 +1271,6 @@ async def index_azure_container_v2(
         account_name: str,
         account_key: str,
         processing_type: IndexAzureRequestV2ProcessingType,
-        idempotency_key: typing.Optional[str] = None,
         max_files: typing.Optional[int] = OMIT,
         skip_existing: typing.Optional[bool] = OMIT,
         custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT,
@@ -1338,7 +1282,6 @@ async def index_azure_container_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         container_name : str
             Name of the Azure Blob Storage container
@@ -1352,9 +1295,6 @@ async def index_azure_container_v2(
         processing_type : IndexAzureRequestV2ProcessingType
             Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing.
 
-        idempotency_key : typing.Optional[str]
-            UUID for request deduplication
-
         max_files : typing.Optional[int]
             Maximum number of files to index (optional)
 
@@ -1386,7 +1326,6 @@ async def index_azure_container_v2(
             },
             headers={
                 "content-type": "application/json",
-                "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None,
             },
             request_options=request_options,
             omit=OMIT,
@@ -1424,7 +1363,6 @@ async def index_azure_file_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         container_name : str
             Name of the Azure Blob Storage container
@@ -1493,7 +1431,6 @@ async def index_azure_directory_v2(
         account_name: str,
         account_key: str,
         processing_type: IndexAzureDirectoryRequestV2ProcessingType,
-        idempotency_key: typing.Optional[str] = None,
         max_files: typing.Optional[int] = OMIT,
         skip_existing: typing.Optional[bool] = OMIT,
         custom_metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT,
@@ -1505,7 +1442,6 @@ async def index_azure_directory_v2(
         Parameters
         ----------
         collection_name : str
-            Name of the collection to index into
 
         container_name : str
             Name of the Azure Blob Storage container
@@ -1522,9 +1458,6 @@ async def index_azure_directory_v2(
         processing_type : IndexAzureDirectoryRequestV2ProcessingType
             Document processing type. 'advanced' uses agentic OCR with AI-enhanced extraction for complex layouts, tables, figures, charts, and documents containing images. 'basic' provides reliable OCR optimized for general document indexing and high-volume processing.
 
-        idempotency_key : typing.Optional[str]
-            UUID for request deduplication
-
         max_files : typing.Optional[int]
             Maximum number of files to index (optional)
 
@@ -1557,7 +1490,6 @@ async def index_azure_directory_v2(
             },
             headers={
                 "content-type": "application/json",
-                "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None,
             },
             request_options=request_options,
             omit=OMIT,
diff --git a/src/runcaptain/jobs/client.py b/src/runcaptain/jobs/client.py
index cd021dd..10f5a5b 100644
--- a/src/runcaptain/jobs/client.py
+++ b/src/runcaptain/jobs/client.py
@@ -58,7 +58,6 @@ def get_job_status_v2(
         Parameters
         ----------
         job_id : str
-            The job ID returned from an indexing request
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
@@ -73,11 +72,11 @@ def get_job_status_v2(
         from runcaptain import Captain
 
         client = Captain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
         client.jobs.get_job_status_v2(
-            job_id="abc123xyz-1234567890",
+            job_id="job_id",
         )
         """
         _response = self._raw_client.get_job_status_v2(job_id, request_options=request_options)
@@ -96,7 +95,6 @@ def cancel_job_v2(
         Parameters
         ----------
         job_id : str
-            The job ID to cancel
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
@@ -111,11 +109,11 @@ def cancel_job_v2(
         from runcaptain import Captain
 
         client = Captain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
         client.jobs.cancel_job_v2(
-            job_id="abc123xyz-1234567890",
+            job_id="job_id",
         )
         """
         _response = self._raw_client.cancel_job_v2(job_id, request_options=request_options)
@@ -171,7 +169,6 @@ async def get_job_status_v2(
         Parameters
         ----------
         job_id : str
-            The job ID returned from an indexing request
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
@@ -188,14 +185,14 @@ async def get_job_status_v2(
         from runcaptain import AsyncCaptain
 
         client = AsyncCaptain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
 
 
         async def main() -> None:
             await client.jobs.get_job_status_v2(
-                job_id="abc123xyz-1234567890",
+                job_id="job_id",
             )
 
 
@@ -217,7 +214,6 @@ async def cancel_job_v2(
         Parameters
         ----------
         job_id : str
-            The job ID to cancel
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
@@ -234,14 +230,14 @@ async def cancel_job_v2(
         from runcaptain import AsyncCaptain
 
         client = AsyncCaptain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
 
 
         async def main() -> None:
             await client.jobs.cancel_job_v2(
-                job_id="abc123xyz-1234567890",
+                job_id="job_id",
             )
 
 
diff --git a/src/runcaptain/jobs/raw_client.py b/src/runcaptain/jobs/raw_client.py
index 8f15a15..74cd595 100644
--- a/src/runcaptain/jobs/raw_client.py
+++ b/src/runcaptain/jobs/raw_client.py
@@ -52,7 +52,6 @@ def get_job_status_v2(
         Parameters
         ----------
         job_id : str
-            The job ID returned from an indexing request
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
@@ -106,7 +105,6 @@ def cancel_job_v2(
         Parameters
         ----------
         job_id : str
-            The job ID to cancel
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
@@ -175,7 +173,6 @@ async def get_job_status_v2(
         Parameters
         ----------
         job_id : str
-            The job ID returned from an indexing request
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
@@ -229,7 +226,6 @@ async def cancel_job_v2(
         Parameters
         ----------
         job_id : str
-            The job ID to cancel
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
diff --git a/src/runcaptain/query/client.py b/src/runcaptain/query/client.py
index a923999..f73849a 100644
--- a/src/runcaptain/query/client.py
+++ b/src/runcaptain/query/client.py
@@ -4,7 +4,7 @@
 
 from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
 from ..core.request_options import RequestOptions
-from ..types.query_response_v2 import QueryResponseV2
+from ..types.query_stream_event import QueryStreamEvent
 from .raw_client import AsyncRawQueryClient, RawQueryClient
 
 # this is used as the default value for optional parameters
@@ -26,20 +26,18 @@ def with_raw_response(self) -> RawQueryClient:
         """
         return self._raw_client
 
-    def collection_v2(
+    def collection_v2stream(
         self,
         collection_name: str,
         *,
         query: str,
-        idempotency_key: typing.Optional[str] = None,
         inference: typing.Optional[bool] = OMIT,
-        stream: typing.Optional[bool] = OMIT,
         top_k: typing.Optional[int] = OMIT,
         rerank: typing.Optional[bool] = OMIT,
         metadata_filter: typing.Optional[typing.Dict[str, typing.Any]] = OMIT,
         custom_prompt: typing.Optional[str] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> QueryResponseV2:
+    ) -> typing.Iterator[QueryStreamEvent]:
         """
         Execute a natural language query against a collection.
 
@@ -48,69 +46,159 @@ def collection_v2(
 
         ## Streaming (SSE)
 
-        When `stream: true` and `inference: true`, the JSON response includes a `request_id`. Refer to the sample implementations to best make use of streams.
+        When `stream: true` and `inference: true`, the response is a Server-Sent Events stream. Every `data:` field is a JSON object with a `type` discriminator.
 
         ### SSE Event Types
 
-        | Event | Format | Description |
-        |-------|--------|-------------|
-        | Text chunk | `data: <text>\\n\\n` | Incremental text of the AI response. Plain text (not JSON). Newlines within text are escaped as `\\n`. |
-        | Tool start | `event: tool_start\\ndata: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"..."}}\\n\\n` | The AI agent is performing a knowledge base search. The `args.query` field contains the search query. |
-        | Tool end | `event: tool_end\\ndata: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}}\\n\\n` | A search completed. `tool_call_id` correlates with the preceding `tool_start`. `result_summary.resultCount` indicates how many results were found. |
-        | Complete | `event: complete\\ndata: {"type":"stream_complete"}\\n\\n` | Stream finished successfully. Close the connection after receiving this. |
-        | Error | `event: error\\ndata: {"type":"stream_error","error":"..."}\\n\\n` | An error occurred during generation. Close the connection. |
+        | `type` value | Schema | Description |
+        |---|---|---|
+        | `text` | `QueryStreamTextEvent` | Incremental text chunk of the AI response. |
+        | `tool.start` | `QueryStreamToolStartEvent` | The agent is performing a knowledge-base search. |
+        | `tool.end` | `QueryStreamToolEndEvent` | A tool call completed. `tool_call_id` correlates with the preceding `tool.start`. |
+        | `stream_complete` | `QueryStreamCompleteEvent` | Stream finished successfully. Close the connection. |
+        | `stream_error` | `QueryStreamErrorEvent` | An error occurred. Close the connection. |
 
         ### Example SSE Stream
 
         ```
-        event: tool_start
-        data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"revenue projections Q4"}}
+        data: {"type":"tool.start","seq":1,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","args":{"query":"revenue projections Q4"}}
+
+        data: {"type":"tool.end","seq":2,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","ok":true,"result_summary":{"resultCount":12}}
+
+        data: {"type":"text","content":"Based on the documents"}
+        data: {"type":"text","content":" provided, the revenue"}
+        data: {"type":"text","content":" projections for Q4 show"}
+        data: {"type":"text","content":" a 15% increase over Q3."}
+
+        data: {"type":"stream_complete","metadata":{"totalResults":12},"stats":{"totalTokens":150}}
+        ```
+
+        ### Notes
+
+        - The agent may perform multiple searches per query. Each search produces a `tool.start` / `tool.end` pair.
+        - Text chunks are interleaved between tool events — text arrives after the agent has gathered results from a search.
+        - Connect with `Accept: text/event-stream` and set a generous timeout (120s+) for long responses.
+
+        Parameters
+        ----------
+        collection_name : str
+
+        query : str
+            The natural language query to search for
+
+        inference : typing.Optional[bool]
+            Enable LLM-generated answers based on the relevant sections retrieved. When false, returns raw search results.
+
+        top_k : typing.Optional[int]
+            Number of results to return. Only valid when inference=false. Not supported when inference=true (the agent controls its own search strategy).
+
+        rerank : typing.Optional[bool]
+            Enable Voyage AI rerank-2.5 reranking for improved relevance ordering. Adds ~100-300ms latency.
+
+        metadata_filter : typing.Optional[typing.Dict[str, typing.Any]]
+            Filter expression for vector search. Supports: $eq, $ne, $gt, $gte, $lt, $lte, $in, $nin, $and, $or
+
+        custom_prompt : typing.Optional[str]
+            Custom system prompt to override the default RAG prompt when inference=true. Allows customizing how the LLM processes and responds to the query with the retrieved context.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Yields
+        ------
+        typing.Iterator[QueryStreamEvent]
+
+
+        Examples
+        --------
+        from runcaptain import Captain
+
+        client = Captain(
+            organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
+        )
+        response = client.query.collection_v2stream(
+            collection_name="collection_name",
+            query="query",
+        )
+        for chunk in response:
+            yield chunk
+        """
+        with self._raw_client.collection_v2stream(
+            collection_name,
+            query=query,
+            inference=inference,
+            top_k=top_k,
+            rerank=rerank,
+            metadata_filter=metadata_filter,
+            custom_prompt=custom_prompt,
+            request_options=request_options,
+        ) as r:
+            yield from r.data
 
-        event: tool_end
-        data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}}
+    def collection_v2(
+        self,
+        collection_name: str,
+        *,
+        query: str,
+        inference: typing.Optional[bool] = OMIT,
+        top_k: typing.Optional[int] = OMIT,
+        rerank: typing.Optional[bool] = OMIT,
+        metadata_filter: typing.Optional[typing.Dict[str, typing.Any]] = OMIT,
+        custom_prompt: typing.Optional[str] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> typing.Any:
+        """
+        Execute a natural language query against a collection.
 
-        data: Based on the documents
-        data:  provided, the revenue
-        data:  projections for Q4 show
-        data:  a 15% increase over Q3.
+        When `inference=true`, returns an AI-generated response with relevant documents.
+        When `inference=false`, returns raw search results with content and metadata.
 
-        event: tool_start
-        data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_2","args":{"query":"Q3 comparison metrics"}}
+        ## Streaming (SSE)
 
-        event: tool_end
-        data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_2","ok":true,"result_summary":{"resultCount":8}}
+        When `stream: true` and `inference: true`, the response is a Server-Sent Events stream. Every `data:` field is a JSON object with a `type` discriminator.
 
-        data:  Compared to Q3, the key
-        data:  drivers were operational
-        data:  efficiency gains.
+        ### SSE Event Types
+
+        | `type` value | Schema | Description |
+        |---|---|---|
+        | `text` | `QueryStreamTextEvent` | Incremental text chunk of the AI response. |
+        | `tool.start` | `QueryStreamToolStartEvent` | The agent is performing a knowledge-base search. |
+        | `tool.end` | `QueryStreamToolEndEvent` | A tool call completed. `tool_call_id` correlates with the preceding `tool.start`. |
+        | `stream_complete` | `QueryStreamCompleteEvent` | Stream finished successfully. Close the connection. |
+        | `stream_error` | `QueryStreamErrorEvent` | An error occurred. Close the connection. |
+
+        ### Example SSE Stream
 
-        event: complete
-        data: {"type":"stream_complete"}
+        ```
+        data: {"type":"tool.start","seq":1,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","args":{"query":"revenue projections Q4"}}
+
+        data: {"type":"tool.end","seq":2,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","ok":true,"result_summary":{"resultCount":12}}
+
+        data: {"type":"text","content":"Based on the documents"}
+        data: {"type":"text","content":" provided, the revenue"}
+        data: {"type":"text","content":" projections for Q4 show"}
+        data: {"type":"text","content":" a 15% increase over Q3."}
+
+        data: {"type":"stream_complete","metadata":{"totalResults":12},"stats":{"totalTokens":150}}
         ```
 
         ### Notes
 
-        - The agent may perform multiple searches per query. Each search produces a `tool_start`/`tool_end` pair.
+        - The agent may perform multiple searches per query. Each search produces a `tool.start` / `tool.end` pair.
         - Text chunks are interleaved between tool events — text arrives after the agent has gathered results from a search.
         - Connect with `Accept: text/event-stream` and set a generous timeout (120s+) for long responses.
 
         Parameters
         ----------
         collection_name : str
-            Name of the collection to query
 
         query : str
             The natural language query to search for
 
-        idempotency_key : typing.Optional[str]
-            UUID for request deduplication
-
         inference : typing.Optional[bool]
             Enable LLM-generated answers based on the relevant sections retrieved. When false, returns raw search results.
 
-        stream : typing.Optional[bool]
-            Enable real-time streaming of the response
-
         top_k : typing.Optional[int]
             Number of results to return. Only valid when inference=false. Not supported when inference=true (the agent controls its own search strategy).
 
@@ -128,32 +216,26 @@ def collection_v2(
 
         Returns
         -------
-        QueryResponseV2
-            Successful Response — returns JSON when `stream: false`, or SSE event stream when `stream: true`.
+        typing.Any
+
 
         Examples
         --------
         from runcaptain import Captain
 
         client = Captain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
         client.query.collection_v2(
-            collection_name="my_documents",
-            query="What are the key terms in the contract?",
-            inference=False,
-            stream=False,
-            top_k=10,
-            rerank=True,
+            collection_name="collection_name",
+            query="query",
         )
         """
         _response = self._raw_client.collection_v2(
             collection_name,
             query=query,
-            idempotency_key=idempotency_key,
             inference=inference,
-            stream=stream,
             top_k=top_k,
             rerank=rerank,
             metadata_filter=metadata_filter,
@@ -178,20 +260,18 @@ def with_raw_response(self) -> AsyncRawQueryClient:
         """
         return self._raw_client
 
-    async def collection_v2(
+    async def collection_v2stream(
         self,
         collection_name: str,
         *,
         query: str,
-        idempotency_key: typing.Optional[str] = None,
         inference: typing.Optional[bool] = OMIT,
-        stream: typing.Optional[bool] = OMIT,
         top_k: typing.Optional[int] = OMIT,
         rerank: typing.Optional[bool] = OMIT,
         metadata_filter: typing.Optional[typing.Dict[str, typing.Any]] = OMIT,
         custom_prompt: typing.Optional[str] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> QueryResponseV2:
+    ) -> typing.AsyncIterator[QueryStreamEvent]:
         """
         Execute a natural language query against a collection.
 
@@ -200,69 +280,168 @@ async def collection_v2(
 
         ## Streaming (SSE)
 
-        When `stream: true` and `inference: true`, the JSON response includes a `request_id`. Refer to the sample implementations to best make use of streams.
+        When `stream: true` and `inference: true`, the response is a Server-Sent Events stream. Every `data:` field is a JSON object with a `type` discriminator.
 
         ### SSE Event Types
 
-        | Event | Format | Description |
-        |-------|--------|-------------|
-        | Text chunk | `data: <text>\\n\\n` | Incremental text of the AI response. Plain text (not JSON). Newlines within text are escaped as `\\n`. |
-        | Tool start | `event: tool_start\\ndata: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"..."}}\\n\\n` | The AI agent is performing a knowledge base search. The `args.query` field contains the search query. |
-        | Tool end | `event: tool_end\\ndata: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}}\\n\\n` | A search completed. `tool_call_id` correlates with the preceding `tool_start`. `result_summary.resultCount` indicates how many results were found. |
-        | Complete | `event: complete\\ndata: {"type":"stream_complete"}\\n\\n` | Stream finished successfully. Close the connection after receiving this. |
-        | Error | `event: error\\ndata: {"type":"stream_error","error":"..."}\\n\\n` | An error occurred during generation. Close the connection. |
+        | `type` value | Schema | Description |
+        |---|---|---|
+        | `text` | `QueryStreamTextEvent` | Incremental text chunk of the AI response. |
+        | `tool.start` | `QueryStreamToolStartEvent` | The agent is performing a knowledge-base search. |
+        | `tool.end` | `QueryStreamToolEndEvent` | A tool call completed. `tool_call_id` correlates with the preceding `tool.start`. |
+        | `stream_complete` | `QueryStreamCompleteEvent` | Stream finished successfully. Close the connection. |
+        | `stream_error` | `QueryStreamErrorEvent` | An error occurred. Close the connection. |
 
         ### Example SSE Stream
 
         ```
-        event: tool_start
-        data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"revenue projections Q4"}}
+        data: {"type":"tool.start","seq":1,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","args":{"query":"revenue projections Q4"}}
+
+        data: {"type":"tool.end","seq":2,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","ok":true,"result_summary":{"resultCount":12}}
+
+        data: {"type":"text","content":"Based on the documents"}
+        data: {"type":"text","content":" provided, the revenue"}
+        data: {"type":"text","content":" projections for Q4 show"}
+        data: {"type":"text","content":" a 15% increase over Q3."}
+
+        data: {"type":"stream_complete","metadata":{"totalResults":12},"stats":{"totalTokens":150}}
+        ```
+
+        ### Notes
+
+        - The agent may perform multiple searches per query. Each search produces a `tool.start` / `tool.end` pair.
+        - Text chunks are interleaved between tool events — text arrives after the agent has gathered results from a search.
+        - Connect with `Accept: text/event-stream` and set a generous timeout (120s+) for long responses.
+
+        Parameters
+        ----------
+        collection_name : str
+
+        query : str
+            The natural language query to search for
+
+        inference : typing.Optional[bool]
+            Enable LLM-generated answers based on the relevant sections retrieved. When false, returns raw search results.
+
+        top_k : typing.Optional[int]
+            Number of results to return. Only valid when inference=false. Not supported when inference=true (the agent controls its own search strategy).
+
+        rerank : typing.Optional[bool]
+            Enable Voyage AI rerank-2.5 reranking for improved relevance ordering. Adds ~100-300ms latency.
+
+        metadata_filter : typing.Optional[typing.Dict[str, typing.Any]]
+            Filter expression for vector search. Supports: $eq, $ne, $gt, $gte, $lt, $lte, $in, $nin, $and, $or
+
+        custom_prompt : typing.Optional[str]
+            Custom system prompt to override the default RAG prompt when inference=true. Allows customizing how the LLM processes and responds to the query with the retrieved context.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Yields
+        ------
+        typing.AsyncIterator[QueryStreamEvent]
+
+
+        Examples
+        --------
+        import asyncio
+
+        from runcaptain import AsyncCaptain
+
+        client = AsyncCaptain(
+            organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
+        )
 
-        event: tool_end
-        data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}}
 
-        data: Based on the documents
-        data:  provided, the revenue
-        data:  projections for Q4 show
-        data:  a 15% increase over Q3.
+        async def main() -> None:
+            response = await client.query.collection_v2stream(
+                collection_name="collection_name",
+                query="query",
+            )
+            async for chunk in response:
+                yield chunk
 
-        event: tool_start
-        data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_2","args":{"query":"Q3 comparison metrics"}}
 
-        event: tool_end
-        data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_2","ok":true,"result_summary":{"resultCount":8}}
+        asyncio.run(main())
+        """
+        async with self._raw_client.collection_v2stream(
+            collection_name,
+            query=query,
+            inference=inference,
+            top_k=top_k,
+            rerank=rerank,
+            metadata_filter=metadata_filter,
+            custom_prompt=custom_prompt,
+            request_options=request_options,
+        ) as r:
+            async for _chunk in r.data:
+                yield _chunk
 
-        data:  Compared to Q3, the key
-        data:  drivers were operational
-        data:  efficiency gains.
+    async def collection_v2(
+        self,
+        collection_name: str,
+        *,
+        query: str,
+        inference: typing.Optional[bool] = OMIT,
+        top_k: typing.Optional[int] = OMIT,
+        rerank: typing.Optional[bool] = OMIT,
+        metadata_filter: typing.Optional[typing.Dict[str, typing.Any]] = OMIT,
+        custom_prompt: typing.Optional[str] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> typing.Any:
+        """
+        Execute a natural language query against a collection.
+
+        When `inference=true`, returns an AI-generated response with relevant documents.
+        When `inference=false`, returns raw search results with content and metadata.
+
+        ## Streaming (SSE)
+
+        When `stream: true` and `inference: true`, the response is a Server-Sent Events stream. Every `data:` field is a JSON object with a `type` discriminator.
+
+        ### SSE Event Types
 
-        event: complete
-        data: {"type":"stream_complete"}
+        | `type` value | Schema | Description |
+        |---|---|---|
+        | `text` | `QueryStreamTextEvent` | Incremental text chunk of the AI response. |
+        | `tool.start` | `QueryStreamToolStartEvent` | The agent is performing a knowledge-base search. |
+        | `tool.end` | `QueryStreamToolEndEvent` | A tool call completed. `tool_call_id` correlates with the preceding `tool.start`. |
+        | `stream_complete` | `QueryStreamCompleteEvent` | Stream finished successfully. Close the connection. |
+        | `stream_error` | `QueryStreamErrorEvent` | An error occurred. Close the connection. |
+
+        ### Example SSE Stream
+
+        ```
+        data: {"type":"tool.start","seq":1,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","args":{"query":"revenue projections Q4"}}
+
+        data: {"type":"tool.end","seq":2,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","ok":true,"result_summary":{"resultCount":12}}
+
+        data: {"type":"text","content":"Based on the documents"}
+        data: {"type":"text","content":" provided, the revenue"}
+        data: {"type":"text","content":" projections for Q4 show"}
+        data: {"type":"text","content":" a 15% increase over Q3."}
+
+        data: {"type":"stream_complete","metadata":{"totalResults":12},"stats":{"totalTokens":150}}
         ```
 
         ### Notes
 
-        - The agent may perform multiple searches per query. Each search produces a `tool_start`/`tool_end` pair.
+        - The agent may perform multiple searches per query. Each search produces a `tool.start` / `tool.end` pair.
         - Text chunks are interleaved between tool events — text arrives after the agent has gathered results from a search.
         - Connect with `Accept: text/event-stream` and set a generous timeout (120s+) for long responses.
 
         Parameters
         ----------
         collection_name : str
-            Name of the collection to query
 
         query : str
             The natural language query to search for
 
-        idempotency_key : typing.Optional[str]
-            UUID for request deduplication
-
         inference : typing.Optional[bool]
             Enable LLM-generated answers based on the relevant sections retrieved. When false, returns raw search results.
 
-        stream : typing.Optional[bool]
-            Enable real-time streaming of the response
-
         top_k : typing.Optional[int]
             Number of results to return. Only valid when inference=false. Not supported when inference=true (the agent controls its own search strategy).
 
@@ -280,8 +459,8 @@ async def collection_v2(
 
         Returns
         -------
-        QueryResponseV2
-            Successful Response — returns JSON when `stream: false`, or SSE event stream when `stream: true`.
+        typing.Any
+
 
         Examples
         --------
@@ -290,19 +469,15 @@ async def collection_v2(
         from runcaptain import AsyncCaptain
 
         client = AsyncCaptain(
-            authorization="YOUR_AUTHORIZATION",
             organization_id="YOUR_ORGANIZATION_ID",
+            key="YOUR_KEY",
         )
 
 
         async def main() -> None:
             await client.query.collection_v2(
-                collection_name="my_documents",
-                query="What are the key terms in the contract?",
-                inference=False,
-                stream=False,
-                top_k=10,
-                rerank=True,
+                collection_name="collection_name",
+                query="query",
             )
 
 
@@ -311,9 +486,7 @@ async def main() -> None:
         _response = await self._raw_client.collection_v2(
             collection_name,
             query=query,
-            idempotency_key=idempotency_key,
             inference=inference,
-            stream=stream,
             top_k=top_k,
             rerank=rerank,
             metadata_filter=metadata_filter,
diff --git a/src/runcaptain/query/raw_client.py b/src/runcaptain/query/raw_client.py
index 1c53391..8b56e1e 100644
--- a/src/runcaptain/query/raw_client.py
+++ b/src/runcaptain/query/raw_client.py
@@ -1,15 +1,18 @@
 # This file was auto-generated by Fern from our API Definition.
 
+import contextlib
 import typing
 from json.decoder import JSONDecodeError
+from logging import error, warning
 
 from ..core.api_error import ApiError
 from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
 from ..core.http_response import AsyncHttpResponse, HttpResponse
+from ..core.http_sse._api import EventSource
 from ..core.jsonable_encoder import jsonable_encoder
-from ..core.pydantic_utilities import parse_obj_as
+from ..core.pydantic_utilities import parse_obj_as, parse_sse_obj
 from ..core.request_options import RequestOptions
-from ..types.query_response_v2 import QueryResponseV2
+from ..types.query_stream_event import QueryStreamEvent
 
 # this is used as the default value for optional parameters
 OMIT = typing.cast(typing.Any, ...)
@@ -19,20 +22,19 @@ class RawQueryClient:
     def __init__(self, *, client_wrapper: SyncClientWrapper):
         self._client_wrapper = client_wrapper
 
-    def collection_v2(
+    @contextlib.contextmanager
+    def collection_v2stream(
         self,
         collection_name: str,
         *,
         query: str,
-        idempotency_key: typing.Optional[str] = None,
         inference: typing.Optional[bool] = OMIT,
-        stream: typing.Optional[bool] = OMIT,
         top_k: typing.Optional[int] = OMIT,
         rerank: typing.Optional[bool] = OMIT,
         metadata_filter: typing.Optional[typing.Dict[str, typing.Any]] = OMIT,
         custom_prompt: typing.Optional[str] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> HttpResponse[QueryResponseV2]:
+    ) -> typing.Iterator[HttpResponse[typing.Iterator[QueryStreamEvent]]]:
         """
         Execute a natural language query against a collection.
 
@@ -41,69 +43,191 @@ def collection_v2(
 
         ## Streaming (SSE)
 
-        When `stream: true` and `inference: true`, the JSON response includes a `request_id`. Refer to the sample implementations to best make use of streams.
+        When `stream: true` and `inference: true`, the response is a Server-Sent Events stream. Every `data:` field is a JSON object with a `type` discriminator.
 
         ### SSE Event Types
 
-        | Event | Format | Description |
-        |-------|--------|-------------|
-        | Text chunk | `data: <text>\\n\\n` | Incremental text of the AI response. Plain text (not JSON). Newlines within text are escaped as `\\n`. |
-        | Tool start | `event: tool_start\\ndata: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"..."}}\\n\\n` | The AI agent is performing a knowledge base search. The `args.query` field contains the search query. |
-        | Tool end | `event: tool_end\\ndata: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}}\\n\\n` | A search completed. `tool_call_id` correlates with the preceding `tool_start`. `result_summary.resultCount` indicates how many results were found. |
-        | Complete | `event: complete\\ndata: {"type":"stream_complete"}\\n\\n` | Stream finished successfully. Close the connection after receiving this. |
-        | Error | `event: error\\ndata: {"type":"stream_error","error":"..."}\\n\\n` | An error occurred during generation. Close the connection. |
+        | `type` value | Schema | Description |
+        |---|---|---|
+        | `text` | `QueryStreamTextEvent` | Incremental text chunk of the AI response. |
+        | `tool.start` | `QueryStreamToolStartEvent` | The agent is performing a knowledge-base search. |
+        | `tool.end` | `QueryStreamToolEndEvent` | A tool call completed. `tool_call_id` correlates with the preceding `tool.start`. |
+        | `stream_complete` | `QueryStreamCompleteEvent` | Stream finished successfully. Close the connection. |
+        | `stream_error` | `QueryStreamErrorEvent` | An error occurred. Close the connection. |
 
         ### Example SSE Stream
 
         ```
-        event: tool_start
-        data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"revenue projections Q4"}}
+        data: {"type":"tool.start","seq":1,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","args":{"query":"revenue projections Q4"}}
+
+        data: {"type":"tool.end","seq":2,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","ok":true,"result_summary":{"resultCount":12}}
+
+        data: {"type":"text","content":"Based on the documents"}
+        data: {"type":"text","content":" provided, the revenue"}
+        data: {"type":"text","content":" projections for Q4 show"}
+        data: {"type":"text","content":" a 15% increase over Q3."}
+
+        data: {"type":"stream_complete","metadata":{"totalResults":12},"stats":{"totalTokens":150}}
+        ```
+
+        ### Notes
+
+        - The agent may perform multiple searches per query. Each search produces a `tool.start` / `tool.end` pair.
+        - Text chunks are interleaved between tool events — text arrives after the agent has gathered results from a search.
+        - Connect with `Accept: text/event-stream` and set a generous timeout (120s+) for long responses.
+
+        Parameters
+        ----------
+        collection_name : str
+
+        query : str
+            The natural language query to search for
+
+        inference : typing.Optional[bool]
+            Enable LLM-generated answers based on the relevant sections retrieved. When false, returns raw search results.
+
+        top_k : typing.Optional[int]
+            Number of results to return. Only valid when inference=false. Not supported when inference=true (the agent controls its own search strategy).
+
+        rerank : typing.Optional[bool]
+            Enable Voyage AI rerank-2.5 reranking for improved relevance ordering. Adds ~100-300ms latency.
+
+        metadata_filter : typing.Optional[typing.Dict[str, typing.Any]]
+            Filter expression for vector search. Supports: $eq, $ne, $gt, $gte, $lt, $lte, $in, $nin, $and, $or
+
+        custom_prompt : typing.Optional[str]
+            Custom system prompt to override the default RAG prompt when inference=true. Allows customizing how the LLM processes and responds to the query with the retrieved context.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Yields
+        ------
+        typing.Iterator[HttpResponse[typing.Iterator[QueryStreamEvent]]]
+
+        """
+        with self._client_wrapper.httpx_client.stream(
+            f"v2/collections/{jsonable_encoder(collection_name)}/query",
+            method="POST",
+            json={
+                "query": query,
+                "inference": inference,
+                "top_k": top_k,
+                "rerank": rerank,
+                "metadata_filter": metadata_filter,
+                "custom_prompt": custom_prompt,
+                "stream": True,
+            },
+            headers={
+                "content-type": "application/json",
+            },
+            request_options=request_options,
+            omit=OMIT,
+        ) as _response:
+
+            def _stream() -> HttpResponse[typing.Iterator[QueryStreamEvent]]:
+                try:
+                    if 200 <= _response.status_code < 300:
+
+                        def _iter():
+                            _event_source = EventSource(_response)
+                            for _sse in _event_source.iter_sse():
+                                if _sse.data == None:
+                                    return
+                                try:
+                                    yield typing.cast(
+                                        QueryStreamEvent,
+                                        parse_sse_obj(
+                                            sse=_sse,
+                                            type_=QueryStreamEvent,  # type: ignore
+                                        ),
+                                    )
+                                except JSONDecodeError as e:
+                                    warning(f"Skipping SSE event with invalid JSON: {e}, sse: {_sse!r}")
+                                except (TypeError, ValueError, KeyError, AttributeError) as e:
+                                    warning(
+                                        f"Skipping SSE event due to model construction error: {type(e).__name__}: {e}, sse: {_sse!r}"
+                                    )
+                                except Exception as e:
+                                    error(
+                                        f"Unexpected error processing SSE event: {type(e).__name__}: {e}, sse: {_sse!r}"
+                                    )
+                            return
+
+                        return HttpResponse(response=_response, data=_iter())
+                    _response.read()
+                    _response_json = _response.json()
+                except JSONDecodeError:
+                    raise ApiError(
+                        status_code=_response.status_code, headers=dict(_response.headers), body=_response.text
+                    )
+                raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
+
+            yield _stream()
+
+    def collection_v2(
+        self,
+        collection_name: str,
+        *,
+        query: str,
+        inference: typing.Optional[bool] = OMIT,
+        top_k: typing.Optional[int] = OMIT,
+        rerank: typing.Optional[bool] = OMIT,
+        metadata_filter: typing.Optional[typing.Dict[str, typing.Any]] = OMIT,
+        custom_prompt: typing.Optional[str] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> HttpResponse[typing.Any]:
+        """
+        Execute a natural language query against a collection.
+
+        When `inference=true`, returns an AI-generated response with relevant documents.
+        When `inference=false`, returns raw search results with content and metadata.
 
-        event: tool_end
-        data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}}
+        ## Streaming (SSE)
+
+        When `stream: true` and `inference: true`, the response is a Server-Sent Events stream. Every `data:` field is a JSON object with a `type` discriminator.
+
+        ### SSE Event Types
+
+        | `type` value | Schema | Description |
+        |---|---|---|
+        | `text` | `QueryStreamTextEvent` | Incremental text chunk of the AI response. |
+        | `tool.start` | `QueryStreamToolStartEvent` | The agent is performing a knowledge-base search. |
+        | `tool.end` | `QueryStreamToolEndEvent` | A tool call completed. `tool_call_id` correlates with the preceding `tool.start`. |
+        | `stream_complete` | `QueryStreamCompleteEvent` | Stream finished successfully. Close the connection. |
+        | `stream_error` | `QueryStreamErrorEvent` | An error occurred. Close the connection. |
 
-        data: Based on the documents
-        data:  provided, the revenue
-        data:  projections for Q4 show
-        data:  a 15% increase over Q3.
+        ### Example SSE Stream
 
-        event: tool_start
-        data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_2","args":{"query":"Q3 comparison metrics"}}
+        ```
+        data: {"type":"tool.start","seq":1,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","args":{"query":"revenue projections Q4"}}
 
-        event: tool_end
-        data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_2","ok":true,"result_summary":{"resultCount":8}}
+        data: {"type":"tool.end","seq":2,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","ok":true,"result_summary":{"resultCount":12}}
 
-        data:  Compared to Q3, the key
-        data:  drivers were operational
-        data:  efficiency gains.
+        data: {"type":"text","content":"Based on the documents"}
+        data: {"type":"text","content":" provided, the revenue"}
+        data: {"type":"text","content":" projections for Q4 show"}
+        data: {"type":"text","content":" a 15% increase over Q3."}
 
-        event: complete
-        data: {"type":"stream_complete"}
+        data: {"type":"stream_complete","metadata":{"totalResults":12},"stats":{"totalTokens":150}}
         ```
 
         ### Notes
 
-        - The agent may perform multiple searches per query. Each search produces a `tool_start`/`tool_end` pair.
+        - The agent may perform multiple searches per query. Each search produces a `tool.start` / `tool.end` pair.
         - Text chunks are interleaved between tool events — text arrives after the agent has gathered results from a search.
         - Connect with `Accept: text/event-stream` and set a generous timeout (120s+) for long responses.
 
         Parameters
         ----------
         collection_name : str
-            Name of the collection to query
 
         query : str
             The natural language query to search for
 
-        idempotency_key : typing.Optional[str]
-            UUID for request deduplication
-
         inference : typing.Optional[bool]
             Enable LLM-generated answers based on the relevant sections retrieved. When false, returns raw search results.
 
-        stream : typing.Optional[bool]
-            Enable real-time streaming of the response
-
         top_k : typing.Optional[int]
             Number of results to return. Only valid when inference=false. Not supported when inference=true (the agent controls its own search strategy).
 
@@ -121,8 +245,8 @@ def collection_v2(
 
         Returns
         -------
-        HttpResponse[QueryResponseV2]
-            Successful Response — returns JSON when `stream: false`, or SSE event stream when `stream: true`.
+        HttpResponse[typing.Any]
+
         """
         _response = self._client_wrapper.httpx_client.request(
             f"v2/collections/{jsonable_encoder(collection_name)}/query",
@@ -130,25 +254,26 @@ def collection_v2(
             json={
                 "query": query,
                 "inference": inference,
-                "stream": stream,
                 "top_k": top_k,
                 "rerank": rerank,
                 "metadata_filter": metadata_filter,
                 "custom_prompt": custom_prompt,
+                "stream": False,
             },
             headers={
                 "content-type": "application/json",
-                "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None,
             },
             request_options=request_options,
             omit=OMIT,
         )
         try:
+            if _response is None or not _response.text.strip():
+                return HttpResponse(response=_response, data=None)
             if 200 <= _response.status_code < 300:
                 _data = typing.cast(
-                    QueryResponseV2,
+                    typing.Any,
                     parse_obj_as(
-                        type_=QueryResponseV2,  # type: ignore
+                        type_=typing.Any,  # type: ignore
                         object_=_response.json(),
                     ),
                 )
@@ -163,20 +288,19 @@ class AsyncRawQueryClient:
     def __init__(self, *, client_wrapper: AsyncClientWrapper):
         self._client_wrapper = client_wrapper
 
-    async def collection_v2(
+    @contextlib.asynccontextmanager
+    async def collection_v2stream(
         self,
         collection_name: str,
         *,
         query: str,
-        idempotency_key: typing.Optional[str] = None,
         inference: typing.Optional[bool] = OMIT,
-        stream: typing.Optional[bool] = OMIT,
         top_k: typing.Optional[int] = OMIT,
         rerank: typing.Optional[bool] = OMIT,
         metadata_filter: typing.Optional[typing.Dict[str, typing.Any]] = OMIT,
         custom_prompt: typing.Optional[str] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> AsyncHttpResponse[QueryResponseV2]:
+    ) -> typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[QueryStreamEvent]]]:
         """
         Execute a natural language query against a collection.
 
@@ -185,69 +309,191 @@ async def collection_v2(
 
         ## Streaming (SSE)
 
-        When `stream: true` and `inference: true`, the JSON response includes a `request_id`. Refer to the sample implementations to best make use of streams.
+        When `stream: true` and `inference: true`, the response is a Server-Sent Events stream. Every `data:` field is a JSON object with a `type` discriminator.
 
         ### SSE Event Types
 
-        | Event | Format | Description |
-        |-------|--------|-------------|
-        | Text chunk | `data: <text>\\n\\n` | Incremental text of the AI response. Plain text (not JSON). Newlines within text are escaped as `\\n`. |
-        | Tool start | `event: tool_start\\ndata: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"..."}}\\n\\n` | The AI agent is performing a knowledge base search. The `args.query` field contains the search query. |
-        | Tool end | `event: tool_end\\ndata: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}}\\n\\n` | A search completed. `tool_call_id` correlates with the preceding `tool_start`. `result_summary.resultCount` indicates how many results were found. |
-        | Complete | `event: complete\\ndata: {"type":"stream_complete"}\\n\\n` | Stream finished successfully. Close the connection after receiving this. |
-        | Error | `event: error\\ndata: {"type":"stream_error","error":"..."}\\n\\n` | An error occurred during generation. Close the connection. |
+        | `type` value | Schema | Description |
+        |---|---|---|
+        | `text` | `QueryStreamTextEvent` | Incremental text chunk of the AI response. |
+        | `tool.start` | `QueryStreamToolStartEvent` | The agent is performing a knowledge-base search. |
+        | `tool.end` | `QueryStreamToolEndEvent` | A tool call completed. `tool_call_id` correlates with the preceding `tool.start`. |
+        | `stream_complete` | `QueryStreamCompleteEvent` | Stream finished successfully. Close the connection. |
+        | `stream_error` | `QueryStreamErrorEvent` | An error occurred. Close the connection. |
 
         ### Example SSE Stream
 
         ```
-        event: tool_start
-        data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_1","args":{"query":"revenue projections Q4"}}
+        data: {"type":"tool.start","seq":1,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","args":{"query":"revenue projections Q4"}}
+
+        data: {"type":"tool.end","seq":2,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","ok":true,"result_summary":{"resultCount":12}}
+
+        data: {"type":"text","content":"Based on the documents"}
+        data: {"type":"text","content":" provided, the revenue"}
+        data: {"type":"text","content":" projections for Q4 show"}
+        data: {"type":"text","content":" a 15% increase over Q3."}
+
+        data: {"type":"stream_complete","metadata":{"totalResults":12},"stats":{"totalTokens":150}}
+        ```
+
+        ### Notes
+
+        - The agent may perform multiple searches per query. Each search produces a `tool.start` / `tool.end` pair.
+        - Text chunks are interleaved between tool events — text arrives after the agent has gathered results from a search.
+        - Connect with `Accept: text/event-stream` and set a generous timeout (120s+) for long responses.
+
+        Parameters
+        ----------
+        collection_name : str
+
+        query : str
+            The natural language query to search for
+
+        inference : typing.Optional[bool]
+            Enable LLM-generated answers based on the relevant sections retrieved. When false, returns raw search results.
+
+        top_k : typing.Optional[int]
+            Number of results to return. Only valid when inference=false. Not supported when inference=true (the agent controls its own search strategy).
+
+        rerank : typing.Optional[bool]
+            Enable Voyage AI rerank-2.5 reranking for improved relevance ordering. Adds ~100-300ms latency.
+
+        metadata_filter : typing.Optional[typing.Dict[str, typing.Any]]
+            Filter expression for vector search. Supports: $eq, $ne, $gt, $gte, $lt, $lte, $in, $nin, $and, $or
+
+        custom_prompt : typing.Optional[str]
+            Custom system prompt to override the default RAG prompt when inference=true. Allows customizing how the LLM processes and responds to the query with the retrieved context.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Yields
+        ------
+        typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[QueryStreamEvent]]]
+
+        """
+        async with self._client_wrapper.httpx_client.stream(
+            f"v2/collections/{jsonable_encoder(collection_name)}/query",
+            method="POST",
+            json={
+                "query": query,
+                "inference": inference,
+                "top_k": top_k,
+                "rerank": rerank,
+                "metadata_filter": metadata_filter,
+                "custom_prompt": custom_prompt,
+                "stream": True,
+            },
+            headers={
+                "content-type": "application/json",
+            },
+            request_options=request_options,
+            omit=OMIT,
+        ) as _response:
+
+            async def _stream() -> AsyncHttpResponse[typing.AsyncIterator[QueryStreamEvent]]:
+                try:
+                    if 200 <= _response.status_code < 300:
+
+                        async def _iter():
+                            _event_source = EventSource(_response)
+                            async for _sse in _event_source.aiter_sse():
+                                if _sse.data == None:
+                                    return
+                                try:
+                                    yield typing.cast(
+                                        QueryStreamEvent,
+                                        parse_sse_obj(
+                                            sse=_sse,
+                                            type_=QueryStreamEvent,  # type: ignore
+                                        ),
+                                    )
+                                except JSONDecodeError as e:
+                                    warning(f"Skipping SSE event with invalid JSON: {e}, sse: {_sse!r}")
+                                except (TypeError, ValueError, KeyError, AttributeError) as e:
+                                    warning(
+                                        f"Skipping SSE event due to model construction error: {type(e).__name__}: {e}, sse: {_sse!r}"
+                                    )
+                                except Exception as e:
+                                    error(
+                                        f"Unexpected error processing SSE event: {type(e).__name__}: {e}, sse: {_sse!r}"
+                                    )
+                            return
+
+                        return AsyncHttpResponse(response=_response, data=_iter())
+                    await _response.aread()
+                    _response_json = _response.json()
+                except JSONDecodeError:
+                    raise ApiError(
+                        status_code=_response.status_code, headers=dict(_response.headers), body=_response.text
+                    )
+                raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
+
+            yield await _stream()
+
+    async def collection_v2(
+        self,
+        collection_name: str,
+        *,
+        query: str,
+        inference: typing.Optional[bool] = OMIT,
+        top_k: typing.Optional[int] = OMIT,
+        rerank: typing.Optional[bool] = OMIT,
+        metadata_filter: typing.Optional[typing.Dict[str, typing.Any]] = OMIT,
+        custom_prompt: typing.Optional[str] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> AsyncHttpResponse[typing.Any]:
+        """
+        Execute a natural language query against a collection.
+
+        When `inference=true`, returns an AI-generated response with relevant documents.
+        When `inference=false`, returns raw search results with content and metadata.
 
-        event: tool_end
-        data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_1","ok":true,"result_summary":{"resultCount":12}}
+        ## Streaming (SSE)
+
+        When `stream: true` and `inference: true`, the response is a Server-Sent Events stream. Every `data:` field is a JSON object with a `type` discriminator.
+
+        ### SSE Event Types
+
+        | `type` value | Schema | Description |
+        |---|---|---|
+        | `text` | `QueryStreamTextEvent` | Incremental text chunk of the AI response. |
+        | `tool.start` | `QueryStreamToolStartEvent` | The agent is performing a knowledge-base search. |
+        | `tool.end` | `QueryStreamToolEndEvent` | A tool call completed. `tool_call_id` correlates with the preceding `tool.start`. |
+        | `stream_complete` | `QueryStreamCompleteEvent` | Stream finished successfully. Close the connection. |
+        | `stream_error` | `QueryStreamErrorEvent` | An error occurred. Close the connection. |
 
-        data: Based on the documents
-        data:  provided, the revenue
-        data:  projections for Q4 show
-        data:  a 15% increase over Q3.
+        ### Example SSE Stream
 
-        event: tool_start
-        data: {"type":"tool.start","name":"searchKnowledgeBase","tool_call_id":"tc_2","args":{"query":"Q3 comparison metrics"}}
+        ```
+        data: {"type":"tool.start","seq":1,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","args":{"query":"revenue projections Q4"}}
 
-        event: tool_end
-        data: {"type":"tool.end","name":"searchKnowledgeBase","tool_call_id":"tc_2","ok":true,"result_summary":{"resultCount":8}}
+        data: {"type":"tool.end","seq":2,"run_id":"run_abc","tool_call_id":"tc_1","name":"searchKnowledgeBase","ok":true,"result_summary":{"resultCount":12}}
 
-        data:  Compared to Q3, the key
-        data:  drivers were operational
-        data:  efficiency gains.
+        data: {"type":"text","content":"Based on the documents"}
+        data: {"type":"text","content":" provided, the revenue"}
+        data: {"type":"text","content":" projections for Q4 show"}
+        data: {"type":"text","content":" a 15% increase over Q3."}
 
-        event: complete
-        data: {"type":"stream_complete"}
+        data: {"type":"stream_complete","metadata":{"totalResults":12},"stats":{"totalTokens":150}}
         ```
 
         ### Notes
 
-        - The agent may perform multiple searches per query. Each search produces a `tool_start`/`tool_end` pair.
+        - The agent may perform multiple searches per query. Each search produces a `tool.start` / `tool.end` pair.
         - Text chunks are interleaved between tool events — text arrives after the agent has gathered results from a search.
         - Connect with `Accept: text/event-stream` and set a generous timeout (120s+) for long responses.
 
         Parameters
         ----------
         collection_name : str
-            Name of the collection to query
 
         query : str
             The natural language query to search for
 
-        idempotency_key : typing.Optional[str]
-            UUID for request deduplication
-
         inference : typing.Optional[bool]
             Enable LLM-generated answers based on the relevant sections retrieved. When false, returns raw search results.
 
-        stream : typing.Optional[bool]
-            Enable real-time streaming of the response
-
         top_k : typing.Optional[int]
             Number of results to return. Only valid when inference=false. Not supported when inference=true (the agent controls its own search strategy).
 
@@ -265,8 +511,8 @@ async def collection_v2(
 
         Returns
         -------
-        AsyncHttpResponse[QueryResponseV2]
-            Successful Response — returns JSON when `stream: false`, or SSE event stream when `stream: true`.
+        AsyncHttpResponse[typing.Any]
+
         """
         _response = await self._client_wrapper.httpx_client.request(
             f"v2/collections/{jsonable_encoder(collection_name)}/query",
@@ -274,25 +520,26 @@ async def collection_v2(
             json={
                 "query": query,
                 "inference": inference,
-                "stream": stream,
                 "top_k": top_k,
                 "rerank": rerank,
                 "metadata_filter": metadata_filter,
                 "custom_prompt": custom_prompt,
+                "stream": False,
             },
             headers={
                 "content-type": "application/json",
-                "Idempotency-Key": str(idempotency_key) if idempotency_key is not None else None,
             },
             request_options=request_options,
             omit=OMIT,
         )
         try:
+            if _response is None or not _response.text.strip():
+                return AsyncHttpResponse(response=_response, data=None)
             if 200 <= _response.status_code < 300:
                 _data = typing.cast(
-                    QueryResponseV2,
+                    typing.Any,
                     parse_obj_as(
-                        type_=QueryResponseV2,  # type: ignore
+                        type_=typing.Any,  # type: ignore
                         object_=_response.json(),
                     ),
                 )
diff --git a/src/runcaptain/raw_client.py b/src/runcaptain/raw_client.py
new file mode 100644
index 0000000..b59019a
--- /dev/null
+++ b/src/runcaptain/raw_client.py
@@ -0,0 +1,126 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+from json.decoder import JSONDecodeError
+
+from .core.api_error import ApiError
+from .core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
+from .core.http_response import AsyncHttpResponse, HttpResponse
+from .core.jsonable_encoder import jsonable_encoder
+from .core.request_options import RequestOptions
+
+
+class RawCaptain:
+    def __init__(self, *, client_wrapper: SyncClientWrapper):
+        self._client_wrapper = client_wrapper
+
+    def post_v2collections_collection_name_documents_wipe(
+        self, collection_name: str, *, request_options: typing.Optional[RequestOptions] = None
+    ) -> HttpResponse[None]:
+        """
+        Parameters
+        ----------
+        collection_name : str
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        HttpResponse[None]
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            f"v2/collections/{jsonable_encoder(collection_name)}/documents/wipe",
+            method="POST",
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return HttpResponse(response=_response, data=None)
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
+        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
+
+    def post_v2datasets_search(self, *, request_options: typing.Optional[RequestOptions] = None) -> HttpResponse[None]:
+        """
+        Parameters
+        ----------
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        HttpResponse[None]
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            "v2/datasets/search",
+            method="POST",
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return HttpResponse(response=_response, data=None)
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
+        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
+
+
+class AsyncRawCaptain:
+    def __init__(self, *, client_wrapper: AsyncClientWrapper):
+        self._client_wrapper = client_wrapper
+
+    async def post_v2collections_collection_name_documents_wipe(
+        self, collection_name: str, *, request_options: typing.Optional[RequestOptions] = None
+    ) -> AsyncHttpResponse[None]:
+        """
+        Parameters
+        ----------
+        collection_name : str
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        AsyncHttpResponse[None]
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            f"v2/collections/{jsonable_encoder(collection_name)}/documents/wipe",
+            method="POST",
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return AsyncHttpResponse(response=_response, data=None)
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
+        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
+
+    async def post_v2datasets_search(
+        self, *, request_options: typing.Optional[RequestOptions] = None
+    ) -> AsyncHttpResponse[None]:
+        """
+        Parameters
+        ----------
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        AsyncHttpResponse[None]
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            "v2/datasets/search",
+            method="POST",
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return AsyncHttpResponse(response=_response, data=None)
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
+        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
diff --git a/src/runcaptain/types/__init__.py b/src/runcaptain/types/__init__.py
index c362b46..35c7fb3 100644
--- a/src/runcaptain/types/__init__.py
+++ b/src/runcaptain/types/__init__.py
@@ -37,6 +37,19 @@
     from .job_status_response_v2job_type import JobStatusResponseV2JobType
     from .job_status_response_v2status import JobStatusResponseV2Status
     from .query_response_v2 import QueryResponseV2
+    from .query_stream_complete_event import QueryStreamCompleteEvent
+    from .query_stream_error_event import QueryStreamErrorEvent
+    from .query_stream_event import (
+        QueryStreamEvent,
+        QueryStreamEvent_StreamComplete,
+        QueryStreamEvent_StreamError,
+        QueryStreamEvent_Text,
+        QueryStreamEvent_ToolEnd,
+        QueryStreamEvent_ToolStart,
+    )
+    from .query_stream_text_event import QueryStreamTextEvent
+    from .query_stream_tool_end_event import QueryStreamToolEndEvent
+    from .query_stream_tool_start_event import QueryStreamToolStartEvent
     from .relevant_document_v2 import RelevantDocumentV2
     from .search_result import SearchResult
     from .standard_response_v2 import StandardResponseV2
@@ -75,6 +88,17 @@
     "JobStatusResponseV2JobType": ".job_status_response_v2job_type",
     "JobStatusResponseV2Status": ".job_status_response_v2status",
     "QueryResponseV2": ".query_response_v2",
+    "QueryStreamCompleteEvent": ".query_stream_complete_event",
+    "QueryStreamErrorEvent": ".query_stream_error_event",
+    "QueryStreamEvent": ".query_stream_event",
+    "QueryStreamEvent_StreamComplete": ".query_stream_event",
+    "QueryStreamEvent_StreamError": ".query_stream_event",
+    "QueryStreamEvent_Text": ".query_stream_event",
+    "QueryStreamEvent_ToolEnd": ".query_stream_event",
+    "QueryStreamEvent_ToolStart": ".query_stream_event",
+    "QueryStreamTextEvent": ".query_stream_text_event",
+    "QueryStreamToolEndEvent": ".query_stream_tool_end_event",
+    "QueryStreamToolStartEvent": ".query_stream_tool_start_event",
     "RelevantDocumentV2": ".relevant_document_v2",
     "SearchResult": ".search_result",
     "StandardResponseV2": ".standard_response_v2",
@@ -137,6 +161,17 @@ def __dir__():
     "JobStatusResponseV2JobType",
     "JobStatusResponseV2Status",
     "QueryResponseV2",
+    "QueryStreamCompleteEvent",
+    "QueryStreamErrorEvent",
+    "QueryStreamEvent",
+    "QueryStreamEvent_StreamComplete",
+    "QueryStreamEvent_StreamError",
+    "QueryStreamEvent_Text",
+    "QueryStreamEvent_ToolEnd",
+    "QueryStreamEvent_ToolStart",
+    "QueryStreamTextEvent",
+    "QueryStreamToolEndEvent",
+    "QueryStreamToolStartEvent",
     "RelevantDocumentV2",
     "SearchResult",
     "StandardResponseV2",
diff --git a/src/runcaptain/types/collection_item_v2.py b/src/runcaptain/types/collection_item_v2.py
index d7c9be3..15b105d 100644
--- a/src/runcaptain/types/collection_item_v2.py
+++ b/src/runcaptain/types/collection_item_v2.py
@@ -7,19 +7,9 @@
 
 
 class CollectionItemV2(UniversalBaseModel):
-    collection_id: str = pydantic.Field()
-    """
-    Unique identifier for the collection
-    """
-
-    collection_name: str = pydantic.Field()
-    """
-    Name of the collection
-    """
-
     environment: typing.Optional[str] = pydantic.Field(default=None)
     """
-    Environment the collection belongs to (e.g. production, staging, development)
+    Environment the collection belongs to
     """
 
     is_active: typing.Optional[bool] = pydantic.Field(default=None)
@@ -37,9 +27,14 @@ class CollectionItemV2(UniversalBaseModel):
     Total number of API requests made against this collection
     """
 
-    document_count: typing.Optional[int] = pydantic.Field(default=None)
+    database_name: str = pydantic.Field()
+    """
+    Name of the collection database
+    """
+
+    file_count: int = pydantic.Field()
     """
-    Total number of documents indexed in this collection
+    Total number of files indexed in this collection
     """
 
     if IS_PYDANTIC_V2:
diff --git a/src/runcaptain/types/document_item_v2.py b/src/runcaptain/types/document_item_v2.py
index 716b59a..3951731 100644
--- a/src/runcaptain/types/document_item_v2.py
+++ b/src/runcaptain/types/document_item_v2.py
@@ -7,7 +7,7 @@
 
 
 class DocumentItemV2(UniversalBaseModel):
-    document_id: str = pydantic.Field()
+    file_id: str = pydantic.Field()
     """
     Unique identifier for the document
     """
diff --git a/src/runcaptain/types/query_stream_complete_event.py b/src/runcaptain/types/query_stream_complete_event.py
new file mode 100644
index 0000000..40829d0
--- /dev/null
+++ b/src/runcaptain/types/query_stream_complete_event.py
@@ -0,0 +1,31 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
+
+
+class QueryStreamCompleteEvent(UniversalBaseModel):
+    """
+    Emitted when the stream finishes successfully. Close the connection after receiving this.
+    """
+
+    metadata: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(default=None)
+    """
+    Stream metadata (e.g. totalResults, totalSearches)
+    """
+
+    stats: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(default=None)
+    """
+    Token and tool-call statistics
+    """
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/runcaptain/types/query_stream_error_event.py b/src/runcaptain/types/query_stream_error_event.py
new file mode 100644
index 0000000..c7091ee
--- /dev/null
+++ b/src/runcaptain/types/query_stream_error_event.py
@@ -0,0 +1,26 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
+
+
+class QueryStreamErrorEvent(UniversalBaseModel):
+    """
+    Emitted when an error occurs during generation. Close the connection after receiving this.
+    """
+
+    error: str = pydantic.Field()
+    """
+    Human-readable error message
+    """
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/runcaptain/types/query_stream_event.py b/src/runcaptain/types/query_stream_event.py
new file mode 100644
index 0000000..37737dd
--- /dev/null
+++ b/src/runcaptain/types/query_stream_event.py
@@ -0,0 +1,101 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from __future__ import annotations
+
+import typing
+
+import pydantic
+import typing_extensions
+from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
+
+
+class QueryStreamEvent_Text(UniversalBaseModel):
+    type: typing.Literal["text"] = "text"
+    content: str
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
+
+
+class QueryStreamEvent_ToolStart(UniversalBaseModel):
+    type: typing.Literal["tool.start"] = "tool.start"
+    seq: typing.Optional[int] = None
+    run_id: typing.Optional[str] = None
+    tool_call_id: str
+    name: str
+    args: typing.Optional[typing.Dict[str, typing.Any]] = None
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
+
+
+class QueryStreamEvent_ToolEnd(UniversalBaseModel):
+    type: typing.Literal["tool.end"] = "tool.end"
+    seq: typing.Optional[int] = None
+    run_id: typing.Optional[str] = None
+    tool_call_id: str
+    name: str
+    ok: bool
+    result_summary: typing.Optional[typing.Dict[str, typing.Any]] = None
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
+
+
+class QueryStreamEvent_StreamComplete(UniversalBaseModel):
+    type: typing.Literal["stream_complete"] = "stream_complete"
+    metadata: typing.Optional[typing.Dict[str, typing.Any]] = None
+    stats: typing.Optional[typing.Dict[str, typing.Any]] = None
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
+
+
+class QueryStreamEvent_StreamError(UniversalBaseModel):
+    type: typing.Literal["stream_error"] = "stream_error"
+    error: str
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
+
+
+QueryStreamEvent = typing_extensions.Annotated[
+    typing.Union[
+        QueryStreamEvent_Text,
+        QueryStreamEvent_ToolStart,
+        QueryStreamEvent_ToolEnd,
+        QueryStreamEvent_StreamComplete,
+        QueryStreamEvent_StreamError,
+    ],
+    pydantic.Field(discriminator="type"),
+]
diff --git a/src/runcaptain/types/query_stream_text_event.py b/src/runcaptain/types/query_stream_text_event.py
new file mode 100644
index 0000000..b2d54a8
--- /dev/null
+++ b/src/runcaptain/types/query_stream_text_event.py
@@ -0,0 +1,26 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
+
+
+class QueryStreamTextEvent(UniversalBaseModel):
+    """
+    Incremental text chunk of the AI response.
+    """
+
+    content: str = pydantic.Field()
+    """
+    Text fragment of the AI-generated response
+    """
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/runcaptain/types/query_stream_tool_end_event.py b/src/runcaptain/types/query_stream_tool_end_event.py
new file mode 100644
index 0000000..58bd3bb
--- /dev/null
+++ b/src/runcaptain/types/query_stream_tool_end_event.py
@@ -0,0 +1,51 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
+
+
+class QueryStreamToolEndEvent(UniversalBaseModel):
+    """
+    Emitted when a tool call completes.
+    """
+
+    seq: typing.Optional[int] = pydantic.Field(default=None)
+    """
+    Monotonically increasing sequence number within the stream
+    """
+
+    run_id: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    Identifier for the current agent run
+    """
+
+    tool_call_id: str = pydantic.Field()
+    """
+    Correlates with the preceding tool.start event
+    """
+
+    name: str = pydantic.Field()
+    """
+    Tool name
+    """
+
+    ok: bool = pydantic.Field()
+    """
+    Whether the tool call succeeded
+    """
+
+    result_summary: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(default=None)
+    """
+    Summary of the tool results (e.g. {"resultCount": 12, "hasResults": true})
+    """
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/runcaptain/types/query_stream_tool_start_event.py b/src/runcaptain/types/query_stream_tool_start_event.py
new file mode 100644
index 0000000..860bd03
--- /dev/null
+++ b/src/runcaptain/types/query_stream_tool_start_event.py
@@ -0,0 +1,46 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
+
+
+class QueryStreamToolStartEvent(UniversalBaseModel):
+    """
+    Emitted when the AI agent begins a knowledge-base search.
+    """
+
+    seq: typing.Optional[int] = pydantic.Field(default=None)
+    """
+    Monotonically increasing sequence number within the stream
+    """
+
+    run_id: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    Identifier for the current agent run
+    """
+
+    tool_call_id: str = pydantic.Field()
+    """
+    Correlates this start with the corresponding tool.end event
+    """
+
+    name: str = pydantic.Field()
+    """
+    Tool name, e.g. searchKnowledgeBase
+    """
+
+    args: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(default=None)
+    """
+    Arguments passed to the tool (e.g. {"query": "...", "topK": 10})
+    """
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow