Merge pull request #57 from zeroentropy-ai/release-please--branches--main--changes--next

npip99 · web-flow · commit af5259da1e25 · 2026-03-03T00:56:30.000-08:00
release: 0.1.0-alpha.11
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.1.0-alpha.10"
+  ".": "0.1.0-alpha.11"
 }
diff --git a/.stats.yml b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 15
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/zeroentropy%2Fzeroentropy-9cd927800fd253f2116ab12aa496b086605bd31d295cb600b65d793203e1e9e7.yml
-openapi_spec_hash: cd7f6d9db9ae338091bc6da83e27f4a6
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/zeroentropy%2Fzeroentropy-5ff1b24060b908d169910a7366213eeb423b52d14b03cc16902923eb0ffac71a.yml
+openapi_spec_hash: fc50a1765bfbfe9a3da7ea9001e479d4
 config_hash: e56152e1ee1a9273241d925702077e49
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,13 @@
 # Changelog
 
+## 0.1.0-alpha.11 (2026-03-03)
+
+Full Changelog: [v0.1.0-alpha.10...v0.1.0-alpha.11](https://github.com/zeroentropy-ai/zeroentropy-python/compare/v0.1.0-alpha.10...v0.1.0-alpha.11)
+
+### Features
+
+* **api:** manual updates ([6654f01](https://github.com/zeroentropy-ai/zeroentropy-python/commit/6654f01491feb8233c146e2bc6bd678d42f7f810))
+
 ## 0.1.0-alpha.10 (2026-03-03)
 
 Full Changelog: [v0.1.0-alpha.9...v0.1.0-alpha.10](https://github.com/zeroentropy-ai/zeroentropy-python/compare/v0.1.0-alpha.9...v0.1.0-alpha.10)
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "zeroentropy"
-version = "0.1.0-alpha.10"
+version = "0.1.0-alpha.11"
 description = "The official Python library for the ZeroEntropy API"
 dynamic = ["readme"]
 license = "Apache-2.0"
diff --git a/src/zeroentropy/_version.py b/src/zeroentropy/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "zeroentropy"
-__version__ = "0.1.0-alpha.10"  # x-release-please-version
+__version__ = "0.1.0-alpha.11"  # x-release-please-version
diff --git a/src/zeroentropy/resources/models.py b/src/zeroentropy/resources/models.py
@@ -51,9 +51,9 @@ def embed(
         input: Union[str, SequenceNotStr[str]],
         input_type: Literal["query", "document"],
         model: str,
+        dimensions: Optional[int] | Omit = omit,
+        encoding_format: Literal["float", "base64"] | Omit = omit,
         latency: Optional[Literal["fast", "slow"]] | Omit = omit,
-        output_dimensions: Optional[int] | Omit = omit,
-        output_format: Literal["float", "base64"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -78,24 +78,27 @@ def embed(
         [Slack](https://go.zeroentropy.dev/slack)!
 
         Args:
-          input: The string, or list of strings, to embed
+          input: The string, or list of strings, to embed.
 
           input_type: The input type. For retrieval tasks, either `query` or `document`.
 
           model: The model ID to use for embedding. Options are: ["zembed-1"]
 
+          dimensions: The output dimensionality of the embedding model. For `zembed-1`, the available
+              options are: [2560, 1280, 640, 320, 160, 80, 40].
+
+          encoding_format: The output format of the embedding. If `float`, an array of floats will be
+              returned for each embeddings. If `base64`, a f32 little endian byte array will
+              be returned, encoded as a base64 string. `base64` is significantly more
+              efficient than `float`. The default is `float`.
+
           latency: Whether the call will be inferenced "fast" or "slow". RateLimits for slow API
-              calls are orders of magnitude higher, but you can expect >10 second latency.
+              calls are orders of magnitude higher, but you can expect 2-20 second latency.
               Fast inferences are guaranteed subsecond, but rate limits are lower. If not
               specified, first a "fast" call will be attempted, but if you have exceeded your
               fast rate limit, then a slow call will be executed. If explicitly set to "fast",
               then 429 will be returned if it cannot be executed fast.
 
-          output_dimensions: The output dimensionality of the embedding model.
-
-          output_format: The output format of the embedding. `base64` is significantly more efficient
-              than `float`. The default is `float`.
-
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -111,9 +114,9 @@ def embed(
                     "input": input,
                     "input_type": input_type,
                     "model": model,
+                    "dimensions": dimensions,
+                    "encoding_format": encoding_format,
                     "latency": latency,
-                    "output_dimensions": output_dimensions,
-                    "output_format": output_format,
                 },
                 model_embed_params.ModelEmbedParams,
             ),
@@ -227,9 +230,9 @@ async def embed(
         input: Union[str, SequenceNotStr[str]],
         input_type: Literal["query", "document"],
         model: str,
+        dimensions: Optional[int] | Omit = omit,
+        encoding_format: Literal["float", "base64"] | Omit = omit,
         latency: Optional[Literal["fast", "slow"]] | Omit = omit,
-        output_dimensions: Optional[int] | Omit = omit,
-        output_format: Literal["float", "base64"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -254,24 +257,27 @@ async def embed(
         [Slack](https://go.zeroentropy.dev/slack)!
 
         Args:
-          input: The string, or list of strings, to embed
+          input: The string, or list of strings, to embed.
 
           input_type: The input type. For retrieval tasks, either `query` or `document`.
 
           model: The model ID to use for embedding. Options are: ["zembed-1"]
 
+          dimensions: The output dimensionality of the embedding model. For `zembed-1`, the available
+              options are: [2560, 1280, 640, 320, 160, 80, 40].
+
+          encoding_format: The output format of the embedding. If `float`, an array of floats will be
+              returned for each embeddings. If `base64`, a f32 little endian byte array will
+              be returned, encoded as a base64 string. `base64` is significantly more
+              efficient than `float`. The default is `float`.
+
           latency: Whether the call will be inferenced "fast" or "slow". RateLimits for slow API
-              calls are orders of magnitude higher, but you can expect >10 second latency.
+              calls are orders of magnitude higher, but you can expect 2-20 second latency.
               Fast inferences are guaranteed subsecond, but rate limits are lower. If not
               specified, first a "fast" call will be attempted, but if you have exceeded your
               fast rate limit, then a slow call will be executed. If explicitly set to "fast",
               then 429 will be returned if it cannot be executed fast.
 
-          output_dimensions: The output dimensionality of the embedding model.
-
-          output_format: The output format of the embedding. `base64` is significantly more efficient
-              than `float`. The default is `float`.
-
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -287,9 +293,9 @@ async def embed(
                     "input": input,
                     "input_type": input_type,
                     "model": model,
+                    "dimensions": dimensions,
+                    "encoding_format": encoding_format,
                     "latency": latency,
-                    "output_dimensions": output_dimensions,
-                    "output_format": output_format,
                 },
                 model_embed_params.ModelEmbedParams,
             ),
diff --git a/src/zeroentropy/types/model_embed_params.py b/src/zeroentropy/types/model_embed_params.py
@@ -12,30 +12,36 @@
 
 class ModelEmbedParams(TypedDict, total=False):
     input: Required[Union[str, SequenceNotStr[str]]]
-    """The string, or list of strings, to embed"""
+    """The string, or list of strings, to embed."""
 
     input_type: Required[Literal["query", "document"]]
     """The input type. For retrieval tasks, either `query` or `document`."""
 
     model: Required[str]
     """The model ID to use for embedding. Options are: ["zembed-1"]"""
 
-    latency: Optional[Literal["fast", "slow"]]
-    """Whether the call will be inferenced "fast" or "slow".
+    dimensions: Optional[int]
+    """The output dimensionality of the embedding model.
 
-    RateLimits for slow API calls are orders of magnitude higher, but you can
-    expect >10 second latency. Fast inferences are guaranteed subsecond, but rate
-    limits are lower. If not specified, first a "fast" call will be attempted, but
-    if you have exceeded your fast rate limit, then a slow call will be executed. If
-    explicitly set to "fast", then 429 will be returned if it cannot be executed
-    fast.
+    For `zembed-1`, the available options are: [2560, 1280, 640, 320, 160, 80, 40].
     """
 
-    output_dimensions: Optional[int]
-    """The output dimensionality of the embedding model."""
-
-    output_format: Literal["float", "base64"]
+    encoding_format: Literal["float", "base64"]
     """The output format of the embedding.
 
-    `base64` is significantly more efficient than `float`. The default is `float`.
+    If `float`, an array of floats will be returned for each embeddings. If
+    `base64`, a f32 little endian byte array will be returned, encoded as a base64
+    string. `base64` is significantly more efficient than `float`. The default is
+    `float`.
+    """
+
+    latency: Optional[Literal["fast", "slow"]]
+    """Whether the call will be inferenced "fast" or "slow".
+
+    RateLimits for slow API calls are orders of magnitude higher, but you can expect
+    2-20 second latency. Fast inferences are guaranteed subsecond, but rate limits
+    are lower. If not specified, first a "fast" call will be attempted, but if you
+    have exceeded your fast rate limit, then a slow call will be executed. If
+    explicitly set to "fast", then 429 will be returned if it cannot be executed
+    fast.
     """
diff --git a/src/zeroentropy/types/model_embed_response.py b/src/zeroentropy/types/model_embed_response.py
@@ -9,10 +9,10 @@
 
 class Result(BaseModel):
     embedding: Union[List[float], str]
-    """The embedding of the input text.
+    """The embedding of the input text, as an array of floats.
 
     If `base64` format is requested, the response will be an fp32 little endian byte
-    array, encoded as base64.
+    array, encoded as a base64 string.
     """
 
 
diff --git a/tests/api_resources/test_models.py b/tests/api_resources/test_models.py
@@ -32,9 +32,9 @@ def test_method_embed_with_all_params(self, client: ZeroEntropy) -> None:
             input="string",
             input_type="query",
             model="model",
+            dimensions=0,
+            encoding_format="float",
             latency="fast",
-            output_dimensions=0,
-            output_format="float",
         )
         assert_matches_type(ModelEmbedResponse, model, path=["response"])
 
@@ -135,9 +135,9 @@ async def test_method_embed_with_all_params(self, async_client: AsyncZeroEntropy
             input="string",
             input_type="query",
             model="model",
+            dimensions=0,
+            encoding_format="float",
             latency="fast",
-            output_dimensions=0,
-            output_format="float",
         )
         assert_matches_type(ModelEmbedResponse, model, path=["response"])
 

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`{`
`2`		`- ".": "0.1.0-alpha.10"`
	`2`	`+ ".": "0.1.0-alpha.11"`
`3`	`3`	`}`