Skip to content

Commit 34a47c1

Browse files
truncate large spans
1 parent 8c51c49 commit 34a47c1

3 files changed

Lines changed: 151 additions & 1 deletion

File tree

libraries/microsoft-agents-a365-observability-core/microsoft_agents_a365/observability/core/exporters/agent365_exporter.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
kind_name,
2424
partition_by_identity,
2525
status_name,
26+
truncate_span_if_needed,
2627
)
2728

2829
# ---- Exporter ---------------------------------------------------------------
@@ -295,7 +296,7 @@ def _map_span(self, sp: ReadableSpan) -> dict[str, Any]:
295296
start_ns = sp.start_time
296297
end_ns = sp.end_time
297298

298-
return {
299+
span_dict = {
299300
"traceId": hex_trace_id(ctx.trace_id),
300301
"spanId": hex_span_id(ctx.span_id),
301302
"parentSpanId": parent_span_id,
@@ -308,3 +309,6 @@ def _map_span(self, sp: ReadableSpan) -> dict[str, Any]:
308309
"links": links,
309310
"status": status,
310311
}
312+
313+
# Apply truncation if needed
314+
return truncate_span_if_needed(span_dict)

libraries/microsoft-agents-a365-observability-core/microsoft_agents_a365/observability/core/exporters/utils.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# Copyright (c) Microsoft. All rights reserved.
22

3+
import json
4+
import logging
35
import os
46
from collections.abc import Sequence
57
from typing import Any
@@ -13,6 +15,11 @@
1315
TENANT_ID_KEY,
1416
)
1517

18+
logger = logging.getLogger(__name__)
19+
20+
# Maximum allowed span size in bytes (250KB)
21+
MAX_SPAN_SIZE_BYTES = 250 * 1024
22+
1623

1724
def hex_trace_id(value: int) -> str:
1825
# 128-bit -> 32 hex chars
@@ -46,6 +53,77 @@ def status_name(code: StatusCode) -> str:
4653
return str(code)
4754

4855

56+
def truncate_span_if_needed(span_dict: dict[str, Any]) -> dict[str, Any]:
57+
"""
58+
Truncate span attributes if the serialized span exceeds MAX_SPAN_SIZE_BYTES.
59+
This mirrors the .NET ExportFormatter behavior.
60+
61+
Args:
62+
span_dict: The span dictionary to potentially truncate
63+
64+
Returns:
65+
The potentially truncated span dictionary
66+
"""
67+
try:
68+
# Serialize the span to check its size
69+
serialized = json.dumps(span_dict, separators=(",", ":"))
70+
current_size = len(serialized.encode("utf-8"))
71+
72+
if current_size <= MAX_SPAN_SIZE_BYTES:
73+
return span_dict
74+
75+
logger.warning(
76+
f"Span size ({current_size} bytes) exceeds limit ({MAX_SPAN_SIZE_BYTES} bytes). "
77+
"Truncating large payload attributes."
78+
)
79+
80+
# Create a deep copy to modify (shallow copy would still reference original attributes)
81+
truncated_span = span_dict.copy()
82+
if "attributes" in truncated_span:
83+
truncated_span["attributes"] = truncated_span["attributes"].copy()
84+
attributes = truncated_span.get("attributes", {})
85+
86+
# Track what was truncated for logging
87+
truncated_keys = []
88+
89+
# Sort attributes by size (largest first) and truncate until size is acceptable
90+
if attributes:
91+
# Calculate size of each attribute value when serialized
92+
attr_sizes = []
93+
for key, value in attributes.items():
94+
try:
95+
value_size = len(json.dumps(value, separators=(",", ":")).encode("utf-8"))
96+
attr_sizes.append((key, value_size))
97+
except Exception:
98+
# If we can't serialize the value, assume it's small
99+
attr_sizes.append((key, 0))
100+
101+
# Sort by size (descending - largest first)
102+
attr_sizes.sort(key=lambda x: x[1], reverse=True)
103+
104+
# Truncate largest attributes first until size is acceptable
105+
for key, _ in attr_sizes:
106+
if key in attributes:
107+
attributes[key] = "TRUNCATED"
108+
truncated_keys.append(key)
109+
110+
# Check size after truncation
111+
serialized = json.dumps(truncated_span, separators=(",", ":"))
112+
current_size = len(serialized.encode("utf-8"))
113+
114+
if current_size <= MAX_SPAN_SIZE_BYTES:
115+
break
116+
117+
if truncated_keys:
118+
logger.info(f"Truncated attributes: {', '.join(truncated_keys)}")
119+
120+
return truncated_span
121+
122+
except Exception as e:
123+
logger.error(f"Error during span truncation: {e}")
124+
return span_dict
125+
126+
49127
def partition_by_identity(
50128
spans: Sequence[ReadableSpan],
51129
) -> dict[tuple[str, str], list[ReadableSpan]]:
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# Copyright (c) Microsoft Corporation.
2+
# Licensed under the MIT License.
3+
4+
import unittest
5+
6+
from microsoft_agents_a365.observability.core.exporters.utils import (
7+
truncate_span_if_needed,
8+
)
9+
10+
11+
class TestUtils(unittest.TestCase):
12+
"""Unit tests for utility functions."""
13+
14+
def test_truncate_span_if_needed(self):
15+
"""Test truncate_span_if_needed with various span sizes."""
16+
# Small span - should return unchanged
17+
small_span = {
18+
"traceId": "abc123",
19+
"spanId": "def456",
20+
"name": "small_span",
21+
"attributes": {"key1": "value1", "key2": "value2"},
22+
}
23+
result = truncate_span_if_needed(small_span)
24+
self.assertIsNotNone(result)
25+
self.assertEqual(result["name"], "small_span")
26+
self.assertEqual(result["attributes"]["key1"], "value1")
27+
28+
# Large span with large payload attributes - should truncate attributes
29+
large_span = {
30+
"traceId": "abc123",
31+
"spanId": "def456",
32+
"name": "large_span",
33+
"attributes": {
34+
"gen_ai.system": "openai",
35+
"gen_ai.request.model": "gpt-4",
36+
"gen_ai.response.model": "gpt-4",
37+
"gen_ai.input.messages": "x" * 150000, # Large payload
38+
"gen_ai.output.messages": "y" * 150000, # Large payload
39+
"gen_ai.sample.attribute": "x" * 250000, # Large payload
40+
"small_attr": "small_value",
41+
},
42+
}
43+
result = truncate_span_if_needed(large_span)
44+
self.assertIsNotNone(result)
45+
# The largest attributes should be truncated first
46+
self.assertEqual(result["attributes"]["gen_ai.input.messages"], "TRUNCATED")
47+
self.assertEqual(result["attributes"]["small_attr"], "small_value") # Unchanged
48+
self.assertEqual(result["attributes"]["gen_ai.sample.attribute"], "TRUNCATED")
49+
50+
# Extremely large span - should return truncated span even if still large
51+
extreme_span = {
52+
"traceId": "abc123",
53+
"spanId": "def456",
54+
"name": "extreme_span",
55+
"attributes": {f"attr_{i}": "x" * 10000 for i in range(100)}, # Many large attributes
56+
"events": [
57+
{"name": f"event_{i}", "attributes": {"data": "y" * 10000}} for i in range(50)
58+
],
59+
}
60+
result = truncate_span_if_needed(extreme_span)
61+
self.assertIsNotNone(result) # Should always return a span, even if still large
62+
# All attributes should be truncated due to size
63+
for key in result["attributes"]:
64+
self.assertEqual(result["attributes"][key], "TRUNCATED")
65+
66+
67+
if __name__ == "__main__":
68+
unittest.main()

0 commit comments

Comments
 (0)