Skip to content

Commit bc6b6fd

Browse files
feat: add optional compression fields to completion requests (#7)
Co-authored-by: Claude <noreply@anthropic.com>
1 parent 46a8c41 commit bc6b6fd

5 files changed

Lines changed: 226 additions & 2 deletions

File tree

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,15 @@ response = edgee.send(
4242
print(response.text) # Text content
4343
print(response.finish_reason) # Finish reason
4444
print(response.tool_calls) # Tool calls (if any)
45+
46+
# Access usage and compression info
47+
if response.usage:
48+
print(f"Tokens used: {response.usage.total_tokens}")
49+
50+
if response.compression:
51+
print(f"Input tokens: {response.compression.input_tokens}")
52+
print(f"Saved tokens: {response.compression.saved_tokens}")
53+
print(f"Compression rate: {response.compression.rate}")
4554
```
4655

4756
## Stream Method
@@ -64,6 +73,7 @@ for chunk in edgee.stream("gpt-4o", "Tell me a story"):
6473
-**Streaming** - Real-time response streaming with generators
6574
-**Tool calling** - Full support for function calling
6675
-**Flexible input** - Accept strings, dicts, or InputObject
76+
-**Compression info** - Access token compression metrics in responses
6777
-**Zero dependencies** - Uses only Python standard library
6878

6979
## Documentation

edgee/__init__.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@ class InputObject:
4646
tools: list[dict] | None = None
4747
tool_choice: str | dict | None = None
4848
tags: list[str] | None = None
49+
enable_compression: bool | None = (
50+
None # Enable token compression (gateway-internal, not sent to providers)
51+
)
52+
compression_rate: float | None = (
53+
None # Compression rate 0.0-1.0 (gateway-internal, not sent to providers)
54+
)
4955

5056

5157
@dataclass
@@ -62,10 +68,18 @@ class Usage:
6268
total_tokens: int
6369

6470

71+
@dataclass
72+
class Compression:
73+
input_tokens: int
74+
saved_tokens: int
75+
rate: float
76+
77+
6578
@dataclass
6679
class SendResponse:
6780
choices: list[Choice]
6881
usage: Usage | None = None
82+
compression: Compression | None = None
6983

7084
@property
7185
def text(self) -> str | None:
@@ -190,16 +204,22 @@ def send(
190204
tools = None
191205
tool_choice = None
192206
tags = None
207+
enable_compression = None
208+
compression_rate = None
193209
elif isinstance(input, InputObject):
194210
messages = input.messages
195211
tools = input.tools
196212
tool_choice = input.tool_choice
197213
tags = input.tags
214+
enable_compression = input.enable_compression
215+
compression_rate = input.compression_rate
198216
else:
199217
messages = input.get("messages", [])
200218
tools = input.get("tools")
201219
tool_choice = input.get("tool_choice")
202220
tags = input.get("tags")
221+
enable_compression = input.get("enable_compression")
222+
compression_rate = input.get("compression_rate")
203223

204224
body: dict = {"model": model, "messages": messages}
205225
if stream:
@@ -210,6 +230,10 @@ def send(
210230
body["tool_choice"] = tool_choice
211231
if tags:
212232
body["tags"] = tags
233+
if enable_compression is not None:
234+
body["enable_compression"] = enable_compression
235+
if compression_rate is not None:
236+
body["compression_rate"] = compression_rate
213237

214238
request = Request(
215239
f"{self.base_url}{API_ENDPOINT}",
@@ -252,7 +276,15 @@ def _handle_non_streaming_response(self, request: Request) -> SendResponse:
252276
total_tokens=data["usage"]["total_tokens"],
253277
)
254278

255-
return SendResponse(choices=choices, usage=usage)
279+
compression = None
280+
if "compression" in data:
281+
compression = Compression(
282+
input_tokens=data["compression"]["input_tokens"],
283+
saved_tokens=data["compression"]["saved_tokens"],
284+
rate=data["compression"]["rate"],
285+
)
286+
287+
return SendResponse(choices=choices, usage=usage, compression=compression)
256288

257289
def _handle_streaming_response(self, request: Request):
258290
"""Handle streaming response, yielding StreamChunk objects."""

example/compression.py

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
"""Example: Token compression with Edgee Gateway SDK
2+
3+
This example demonstrates how to:
4+
1. Enable compression for a request with a large input context
5+
2. Set a custom compression rate
6+
3. Access compression metrics from the response
7+
8+
IMPORTANT: Only USER messages are compressed. System messages are not compressed.
9+
This example includes a large context in the user message to demonstrate meaningful
10+
compression savings.
11+
"""
12+
13+
import os
14+
import sys
15+
16+
# Add parent directory to path for local testing
17+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
18+
19+
from edgee import Edgee
20+
21+
# Initialize the client
22+
edgee = Edgee(os.environ.get("EDGEE_API_KEY"))
23+
24+
# Large context document to demonstrate input compression
25+
LARGE_CONTEXT = """
26+
The History and Impact of Artificial Intelligence
27+
28+
Artificial intelligence (AI) has evolved from a theoretical concept to a
29+
transformative technology that influences nearly every aspect of modern life.
30+
The field began in earnest in the 1950s when pioneers like Alan Turing and
31+
John McCarthy laid the groundwork for machine intelligence.
32+
33+
Early developments focused on symbolic reasoning and expert systems. These
34+
rule-based approaches dominated the field through the 1970s and 1980s, with
35+
systems like MYCIN demonstrating practical applications in medical diagnosis.
36+
However, these early systems were limited by their inability to learn from data
37+
and adapt to new situations.
38+
39+
The resurgence of neural networks in the 1980s and 1990s, particularly with
40+
backpropagation algorithms, opened new possibilities. Yet it wasn't until the
41+
2010s, with the advent of deep learning and the availability of massive datasets
42+
and computational power, that AI truly began to revolutionize industries.
43+
44+
Modern AI applications span numerous domains:
45+
- Natural language processing enables machines to understand and generate human language
46+
- Computer vision allows machines to interpret visual information from the world
47+
- Robotics combines AI with mechanical systems for autonomous operation
48+
- Healthcare uses AI for diagnosis, drug discovery, and personalized treatment
49+
- Finance leverages AI for fraud detection, algorithmic trading, and risk assessment
50+
- Transportation is being transformed by autonomous vehicles and traffic optimization
51+
52+
The development of large language models like GPT, BERT, and others has
53+
particularly accelerated progress in natural language understanding and generation.
54+
These models, trained on vast amounts of text data, can perform a wide range of
55+
language tasks with remarkable proficiency.
56+
57+
Despite remarkable progress, significant challenges remain. Issues of bias,
58+
interpretability, safety, and ethical considerations continue to be areas of
59+
active research and debate. The AI community is working to ensure that these
60+
powerful technologies are developed and deployed responsibly, with consideration
61+
for their societal impact.
62+
63+
Looking forward, AI is expected to continue advancing rapidly, with potential
64+
breakthroughs in areas like artificial general intelligence, quantum machine
65+
learning, and brain-computer interfaces. The integration of AI into daily life
66+
will likely deepen, raising important questions about human-AI collaboration,
67+
workforce transformation, and the future of human cognition itself.
68+
"""
69+
70+
print("=" * 70)
71+
print("Edgee Token Compression Example")
72+
print("=" * 70)
73+
print()
74+
75+
# Example: Request with compression enabled and large input
76+
print("Example: Large user message with compression enabled")
77+
print("-" * 70)
78+
print(f"Input context length: {len(LARGE_CONTEXT)} characters")
79+
print()
80+
81+
# NOTE: Only USER messages are compressed
82+
# Put the large context in the user message to demonstrate compression
83+
user_message = f"""Here is some context about AI:
84+
85+
{LARGE_CONTEXT}
86+
87+
Based on this context, summarize the key milestones in AI development in 3 bullet points."""
88+
89+
response = edgee.send(
90+
model="gpt-4o",
91+
input={
92+
"messages": [
93+
{"role": "user", "content": user_message},
94+
],
95+
"enable_compression": True,
96+
"compression_rate": 0.5,
97+
},
98+
)
99+
100+
print(f"Response: {response.text}")
101+
print()
102+
103+
# Display usage information
104+
if response.usage:
105+
print("Token Usage:")
106+
print(f" Prompt tokens: {response.usage.prompt_tokens}")
107+
print(f" Completion tokens: {response.usage.completion_tokens}")
108+
print(f" Total tokens: {response.usage.total_tokens}")
109+
print()
110+
111+
# Display compression information
112+
if response.compression:
113+
print("Compression Metrics:")
114+
print(f" Input tokens: {response.compression.input_tokens}")
115+
print(f" Saved tokens: {response.compression.saved_tokens}")
116+
print(f" Compression rate: {response.compression.rate:.2%}")
117+
savings_pct = (
118+
(response.compression.saved_tokens / response.compression.input_tokens * 100)
119+
if response.compression.input_tokens > 0
120+
else 0
121+
)
122+
print(f" Savings: {savings_pct:.1f}% of input tokens saved!")
123+
print()
124+
print(" 💡 Without compression, this request would have used")
125+
print(f" {response.compression.input_tokens} input tokens.")
126+
print(
127+
f" With compression, only {response.compression.input_tokens - response.compression.saved_tokens} tokens were processed!"
128+
)
129+
else:
130+
print("No compression data available in response.")
131+
print("Note: Compression data is only returned when compression is enabled")
132+
print(" and supported by your API key configuration.")
133+
134+
print()
135+
print("=" * 70)

tests/test_edgee.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,3 +306,50 @@ def test_config_base_url_overrides_env(self, mock_urlopen):
306306

307307
call_args = mock_urlopen.call_args[0][0]
308308
assert call_args.full_url == f"{config_base_url}/v1/chat/completions"
309+
310+
@patch("edgee.urlopen")
311+
def test_send_with_compression_response(self, mock_urlopen):
312+
"""Should handle response with compression field"""
313+
mock_response_data = {
314+
"choices": [
315+
{
316+
"index": 0,
317+
"message": {"role": "assistant", "content": "Response"},
318+
"finish_reason": "stop",
319+
}
320+
],
321+
"usage": {"prompt_tokens": 100, "completion_tokens": 50, "total_tokens": 150},
322+
"compression": {
323+
"input_tokens": 100,
324+
"saved_tokens": 42,
325+
"rate": 0.6102003642987249,
326+
},
327+
}
328+
mock_urlopen.return_value = self._mock_response(mock_response_data)
329+
330+
client = Edgee("test-api-key")
331+
result = client.send(model="gpt-4", input="Test")
332+
333+
assert result.compression is not None
334+
assert result.compression.input_tokens == 100
335+
assert result.compression.saved_tokens == 42
336+
assert result.compression.rate == 0.6102003642987249
337+
338+
@patch("edgee.urlopen")
339+
def test_send_without_compression_response(self, mock_urlopen):
340+
"""Should handle response without compression field"""
341+
mock_response_data = {
342+
"choices": [
343+
{
344+
"index": 0,
345+
"message": {"role": "assistant", "content": "Response"},
346+
"finish_reason": "stop",
347+
}
348+
],
349+
}
350+
mock_urlopen.return_value = self._mock_response(mock_response_data)
351+
352+
client = Edgee("test-api-key")
353+
result = client.send(model="gpt-4", input="Test")
354+
355+
assert result.compression is None

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)