@@ -51,9 +51,9 @@ def embed(
5151 input : Union [str , SequenceNotStr [str ]],
5252 input_type : Literal ["query" , "document" ],
5353 model : str ,
54+ dimensions : Optional [int ] | Omit = omit ,
55+ encoding_format : Literal ["float" , "base64" ] | Omit = omit ,
5456 latency : Optional [Literal ["fast" , "slow" ]] | Omit = omit ,
55- output_dimensions : Optional [int ] | Omit = omit ,
56- output_format : Literal ["float" , "base64" ] | Omit = omit ,
5757 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
5858 # The extra values given here take precedence over values defined on the client or passed to this method.
5959 extra_headers : Headers | None = None ,
@@ -78,24 +78,27 @@ def embed(
7878 [Slack](https://go.zeroentropy.dev/slack)!
7979
8080 Args:
81- input: The string, or list of strings, to embed
81+ input: The string, or list of strings, to embed.
8282
8383 input_type: The input type. For retrieval tasks, either `query` or `document`.
8484
8585 model: The model ID to use for embedding. Options are: ["zembed-1"]
8686
87+ dimensions: The output dimensionality of the embedding model. For `zembed-1`, the available
88+ options are: [2560, 1280, 640, 320, 160, 80, 40].
89+
90+ encoding_format: The output format of the embedding. If `float`, an array of floats will be
91+ returned for each embeddings. If `base64`, a f32 little endian byte array will
92+ be returned, encoded as a base64 string. `base64` is significantly more
93+ efficient than `float`. The default is `float`.
94+
8795 latency: Whether the call will be inferenced "fast" or "slow". RateLimits for slow API
88- calls are orders of magnitude higher, but you can expect >10 second latency.
96+ calls are orders of magnitude higher, but you can expect 2-20 second latency.
8997 Fast inferences are guaranteed subsecond, but rate limits are lower. If not
9098 specified, first a "fast" call will be attempted, but if you have exceeded your
9199 fast rate limit, then a slow call will be executed. If explicitly set to "fast",
92100 then 429 will be returned if it cannot be executed fast.
93101
94- output_dimensions: The output dimensionality of the embedding model.
95-
96- output_format: The output format of the embedding. `base64` is significantly more efficient
97- than `float`. The default is `float`.
98-
99102 extra_headers: Send extra headers
100103
101104 extra_query: Add additional query parameters to the request
@@ -111,9 +114,9 @@ def embed(
111114 "input" : input ,
112115 "input_type" : input_type ,
113116 "model" : model ,
117+ "dimensions" : dimensions ,
118+ "encoding_format" : encoding_format ,
114119 "latency" : latency ,
115- "output_dimensions" : output_dimensions ,
116- "output_format" : output_format ,
117120 },
118121 model_embed_params .ModelEmbedParams ,
119122 ),
@@ -227,9 +230,9 @@ async def embed(
227230 input : Union [str , SequenceNotStr [str ]],
228231 input_type : Literal ["query" , "document" ],
229232 model : str ,
233+ dimensions : Optional [int ] | Omit = omit ,
234+ encoding_format : Literal ["float" , "base64" ] | Omit = omit ,
230235 latency : Optional [Literal ["fast" , "slow" ]] | Omit = omit ,
231- output_dimensions : Optional [int ] | Omit = omit ,
232- output_format : Literal ["float" , "base64" ] | Omit = omit ,
233236 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
234237 # The extra values given here take precedence over values defined on the client or passed to this method.
235238 extra_headers : Headers | None = None ,
@@ -254,24 +257,27 @@ async def embed(
254257 [Slack](https://go.zeroentropy.dev/slack)!
255258
256259 Args:
257- input: The string, or list of strings, to embed
260+ input: The string, or list of strings, to embed.
258261
259262 input_type: The input type. For retrieval tasks, either `query` or `document`.
260263
261264 model: The model ID to use for embedding. Options are: ["zembed-1"]
262265
266+ dimensions: The output dimensionality of the embedding model. For `zembed-1`, the available
267+ options are: [2560, 1280, 640, 320, 160, 80, 40].
268+
269+ encoding_format: The output format of the embedding. If `float`, an array of floats will be
270+ returned for each embeddings. If `base64`, a f32 little endian byte array will
271+ be returned, encoded as a base64 string. `base64` is significantly more
272+ efficient than `float`. The default is `float`.
273+
263274 latency: Whether the call will be inferenced "fast" or "slow". RateLimits for slow API
264- calls are orders of magnitude higher, but you can expect >10 second latency.
275+ calls are orders of magnitude higher, but you can expect 2-20 second latency.
265276 Fast inferences are guaranteed subsecond, but rate limits are lower. If not
266277 specified, first a "fast" call will be attempted, but if you have exceeded your
267278 fast rate limit, then a slow call will be executed. If explicitly set to "fast",
268279 then 429 will be returned if it cannot be executed fast.
269280
270- output_dimensions: The output dimensionality of the embedding model.
271-
272- output_format: The output format of the embedding. `base64` is significantly more efficient
273- than `float`. The default is `float`.
274-
275281 extra_headers: Send extra headers
276282
277283 extra_query: Add additional query parameters to the request
@@ -287,9 +293,9 @@ async def embed(
287293 "input" : input ,
288294 "input_type" : input_type ,
289295 "model" : model ,
296+ "dimensions" : dimensions ,
297+ "encoding_format" : encoding_format ,
290298 "latency" : latency ,
291- "output_dimensions" : output_dimensions ,
292- "output_format" : output_format ,
293299 },
294300 model_embed_params .ModelEmbedParams ,
295301 ),
0 commit comments