From 3c89d8d1ad2688459887a36c33c2dd371340a21e Mon Sep 17 00:00:00 2001 From: dnandakumar-nv Date: Wed, 28 Jan 2026 13:15:55 -0800 Subject: [PATCH] Refactor `_to_dict` for OTLP-compatible JSON serialization Ensure `_to_dict` consistently returns a JSON string, handling complex, nested, or mixed data types safely. Added comprehensive tests to validate the serialization logic and edge cases. Signed-off-by: dnandakumar-nv --- .../observability/exporter/span_exporter.py | 34 ++-- .../exporter/test_span_exporter.py | 176 ++++++++++++++++++ 2 files changed, 199 insertions(+), 11 deletions(-) diff --git a/src/nat/observability/exporter/span_exporter.py b/src/nat/observability/exporter/span_exporter.py index 38177b48ea..96b6ffee1c 100644 --- a/src/nat/observability/exporter/span_exporter.py +++ b/src/nat/observability/exporter/span_exporter.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json import logging import os import re @@ -322,20 +323,31 @@ def _process_end_event(self, event: IntermediateStep): # Export the span with processing pipeline self._create_export_task(self._export_with_processing(sub_span)) # type: ignore - def _to_dict(self, data: typing.Any) -> dict[str, typing.Any] | typing.Any: - """Transform serialized payload into a structured dict for span attributes.""" + def _to_dict(self, data: typing.Any) -> str: + """Transform payload into a JSON string for span attributes. - if hasattr(data, 'model_dump'): - result = data.model_dump(exclude_none=True) - elif isinstance(data, dict): - result = {k: v for k, v in data.items() if v is not None} - else: - return data + Converts the input data to a JSON string representation that is always + compatible with OTLP span attribute encoding. Raw dicts and nested structures + can contain types (None, custom objects) that OTLP cannot encode, so the + result is serialized to a JSON string for safety. + """ + + try: + if hasattr(data, 'model_dump'): + result = data.model_dump(exclude_none=True) + elif isinstance(data, dict): + result = {k: v for k, v in data.items() if v is not None} + elif isinstance(data, list): + result = [item.model_dump(exclude_none=True) if hasattr(item, 'model_dump') else item for item in data] + else: + return str(data) - if 'value' in result and result['value'] is not None: - return result['value'] + if isinstance(result, dict) and 'value' in result and result['value'] is not None: + result = result['value'] - return result + return json.dumps(result, default=str) + except Exception: + return str(data) @override async def _cleanup(self): diff --git a/tests/nat/observability/exporter/test_span_exporter.py b/tests/nat/observability/exporter/test_span_exporter.py index fbfcbe5eb6..d968d18055 100644 --- a/tests/nat/observability/exporter/test_span_exporter.py +++ b/tests/nat/observability/exporter/test_span_exporter.py @@ -13,12 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json import os import uuid from datetime import datetime from unittest.mock import patch import pytest +from pydantic import BaseModel from nat.builder.framework_enum import LLMFrameworkEnum from nat.data_models.intermediate_step import IntermediateStep @@ -573,3 +575,177 @@ async def test_usage_info_without_token_usage(self, span_exporter): # Check that span was processed and attributes set correctly assert len(span_exporter._outstanding_spans) == 0 assert len(span_exporter.exported_spans) == 1 + + +class TestToDictSerialization: + """Tests for _to_dict ensuring OTLP-compatible serialization.""" + + @pytest.fixture(name="exporter") + def fixture_exporter(self): + return ConcreteSpanExporter() + + def test_string_input(self, exporter): + """String input is returned as-is via str().""" + result = exporter._to_dict("hello") + assert result == "hello" + assert isinstance(result, str) + + def test_dict_input(self, exporter): + """Dict input is JSON-serialized.""" + data = {"key": "value", "number": 42} + result = exporter._to_dict(data) + assert isinstance(result, str) + assert json.loads(result) == {"key": "value", "number": 42} + + def test_dict_filters_none_values(self, exporter): + """Dict input has None values filtered out before serialization.""" + data = {"key": "value", "empty": None, "number": 0} + result = exporter._to_dict(data) + parsed = json.loads(result) + assert "empty" not in parsed + assert parsed == {"key": "value", "number": 0} + + def test_dict_with_value_key(self, exporter): + """Dict with a 'value' key extracts and serializes just the value.""" + data = {"value": "extracted", "other": "ignored"} + result = exporter._to_dict(data) + parsed = json.loads(result) + assert parsed == "extracted" + + def test_dict_with_none_value_key(self, exporter): + """Dict with value=None does not extract the value field.""" + data = {"value": None, "other": "kept"} + result = exporter._to_dict(data) + parsed = json.loads(result) + assert parsed == {"other": "kept"} + + def test_pydantic_model(self, exporter): + """Pydantic model is serialized via model_dump then JSON.""" + + class SampleModel(BaseModel): + content: str + score: float + optional_field: str | None = None + + model = SampleModel(content="test message", score=0.95) + result = exporter._to_dict(model) + assert isinstance(result, str) + parsed = json.loads(result) + assert parsed == {"content": "test message", "score": 0.95} + + def test_pydantic_model_with_value_key(self, exporter): + """Pydantic model with a 'value' field extracts just that field.""" + + class WrappedModel(BaseModel): + value: str + metadata: str | None = None + + model = WrappedModel(value="unwrapped content") + result = exporter._to_dict(model) + parsed = json.loads(result) + assert parsed == "unwrapped content" + + def test_list_of_pydantic_models(self, exporter): + """List of Pydantic models is serialized — the original HumanMessage scenario.""" + + class MockMessage(BaseModel): + content: str + role: str + extra: str | None = None + + messages = [MockMessage(content="Hello", role="human"), MockMessage(content="Hi there", role="assistant")] + result = exporter._to_dict(messages) + assert isinstance(result, str) + parsed = json.loads(result) + assert len(parsed) == 2 + assert parsed[0] == {"content": "Hello", "role": "human"} + assert parsed[1] == {"content": "Hi there", "role": "assistant"} + + def test_list_of_mixed_types(self, exporter): + """List with mixed types (models and primitives) is serialized.""" + + class Item(BaseModel): + name: str + + data = [Item(name="first"), "plain string", 42] + result = exporter._to_dict(data) + assert isinstance(result, str) + parsed = json.loads(result) + assert parsed == [{"name": "first"}, "plain string", 42] + + def test_dict_with_nested_none_values(self, exporter): + """Dict with deeply nested None values does not cause OTLP encoding errors.""" + data = { + "level1": { + "level2": [{ + "key": "value", "empty": None + }, { + "nested_none": None, "data": "present" + }] + }, + "top_none": None, + } + result = exporter._to_dict(data) + assert isinstance(result, str) + # Should be valid JSON regardless of nested Nones + parsed = json.loads(result) + assert "top_none" not in parsed + assert parsed["level1"]["level2"][0]["key"] == "value" + + def test_arbitrary_object_falls_back_to_str(self, exporter): + """Non-serializable objects fall back to str() representation.""" + + class CustomObj: + + def __str__(self): + return "custom_string_repr" + + result = exporter._to_dict(CustomObj()) + assert result == "custom_string_repr" + assert isinstance(result, str) + + def test_exception_during_serialization_falls_back_to_str(self, exporter): + """If JSON serialization fails, falls back to str().""" + + class BrokenModel: + """Object with model_dump that returns non-serializable data.""" + + def model_dump(self, **kwargs): + raise RuntimeError("serialization broken") + + def __str__(self): + return "broken_model_str" + + result = exporter._to_dict(BrokenModel()) + assert result == "broken_model_str" + assert isinstance(result, str) + + def test_integer_input(self, exporter): + """Integer input is converted to string.""" + result = exporter._to_dict(42) + assert isinstance(result, str) + + def test_none_input(self, exporter): + """None input is converted to string.""" + result = exporter._to_dict(None) + assert isinstance(result, str) + + def test_result_is_always_a_string(self, exporter): + """Every code path returns a string — the key invariant for OTLP safety.""" + test_cases = [ + "text", + 42, + 3.14, + True, + None, + { + "a": 1 + }, + [1, 2, 3], + { + "value": "extracted" + }, + ] + for data in test_cases: + result = exporter._to_dict(data) + assert isinstance(result, str), f"_to_dict({data!r}) returned {type(result).__name__}, expected str"