-
Notifications
You must be signed in to change notification settings - Fork 498
Refactor _to_dict for OTLP-compatible JSON serialization
#1506
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
dnandakumar-nv
wants to merge
1
commit into
NVIDIA:develop
Choose a base branch
from
dnandakumar-nv:otel-serialization-fix
base: develop
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,12 +13,14 @@ | |
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| import json | ||
| import os | ||
| import uuid | ||
| from datetime import datetime | ||
| from unittest.mock import patch | ||
|
|
||
| import pytest | ||
| from pydantic import BaseModel | ||
|
|
||
| from nat.builder.framework_enum import LLMFrameworkEnum | ||
| from nat.data_models.intermediate_step import IntermediateStep | ||
|
|
@@ -573,3 +575,177 @@ async def test_usage_info_without_token_usage(self, span_exporter): | |
| # Check that span was processed and attributes set correctly | ||
| assert len(span_exporter._outstanding_spans) == 0 | ||
| assert len(span_exporter.exported_spans) == 1 | ||
|
|
||
|
|
||
| class TestToDictSerialization: | ||
| """Tests for _to_dict ensuring OTLP-compatible serialization.""" | ||
|
|
||
| @pytest.fixture(name="exporter") | ||
| def fixture_exporter(self): | ||
| return ConcreteSpanExporter() | ||
|
|
||
| def test_string_input(self, exporter): | ||
| """String input is returned as-is via str().""" | ||
| result = exporter._to_dict("hello") | ||
| assert result == "hello" | ||
| assert isinstance(result, str) | ||
|
|
||
| def test_dict_input(self, exporter): | ||
| """Dict input is JSON-serialized.""" | ||
| data = {"key": "value", "number": 42} | ||
| result = exporter._to_dict(data) | ||
| assert isinstance(result, str) | ||
| assert json.loads(result) == {"key": "value", "number": 42} | ||
|
|
||
| def test_dict_filters_none_values(self, exporter): | ||
| """Dict input has None values filtered out before serialization.""" | ||
| data = {"key": "value", "empty": None, "number": 0} | ||
| result = exporter._to_dict(data) | ||
| parsed = json.loads(result) | ||
| assert "empty" not in parsed | ||
| assert parsed == {"key": "value", "number": 0} | ||
|
|
||
| def test_dict_with_value_key(self, exporter): | ||
| """Dict with a 'value' key extracts and serializes just the value.""" | ||
| data = {"value": "extracted", "other": "ignored"} | ||
| result = exporter._to_dict(data) | ||
| parsed = json.loads(result) | ||
| assert parsed == "extracted" | ||
|
|
||
| def test_dict_with_none_value_key(self, exporter): | ||
| """Dict with value=None does not extract the value field.""" | ||
| data = {"value": None, "other": "kept"} | ||
| result = exporter._to_dict(data) | ||
| parsed = json.loads(result) | ||
| assert parsed == {"other": "kept"} | ||
|
|
||
| def test_pydantic_model(self, exporter): | ||
| """Pydantic model is serialized via model_dump then JSON.""" | ||
|
|
||
| class SampleModel(BaseModel): | ||
| content: str | ||
| score: float | ||
| optional_field: str | None = None | ||
|
|
||
| model = SampleModel(content="test message", score=0.95) | ||
| result = exporter._to_dict(model) | ||
| assert isinstance(result, str) | ||
| parsed = json.loads(result) | ||
| assert parsed == {"content": "test message", "score": 0.95} | ||
|
|
||
| def test_pydantic_model_with_value_key(self, exporter): | ||
| """Pydantic model with a 'value' field extracts just that field.""" | ||
|
|
||
| class WrappedModel(BaseModel): | ||
| value: str | ||
| metadata: str | None = None | ||
|
|
||
| model = WrappedModel(value="unwrapped content") | ||
| result = exporter._to_dict(model) | ||
| parsed = json.loads(result) | ||
| assert parsed == "unwrapped content" | ||
|
|
||
| def test_list_of_pydantic_models(self, exporter): | ||
| """List of Pydantic models is serialized — the original HumanMessage scenario.""" | ||
|
|
||
| class MockMessage(BaseModel): | ||
| content: str | ||
| role: str | ||
| extra: str | None = None | ||
|
|
||
| messages = [MockMessage(content="Hello", role="human"), MockMessage(content="Hi there", role="assistant")] | ||
| result = exporter._to_dict(messages) | ||
| assert isinstance(result, str) | ||
| parsed = json.loads(result) | ||
| assert len(parsed) == 2 | ||
| assert parsed[0] == {"content": "Hello", "role": "human"} | ||
| assert parsed[1] == {"content": "Hi there", "role": "assistant"} | ||
|
|
||
| def test_list_of_mixed_types(self, exporter): | ||
| """List with mixed types (models and primitives) is serialized.""" | ||
|
|
||
| class Item(BaseModel): | ||
| name: str | ||
|
|
||
| data = [Item(name="first"), "plain string", 42] | ||
| result = exporter._to_dict(data) | ||
| assert isinstance(result, str) | ||
| parsed = json.loads(result) | ||
| assert parsed == [{"name": "first"}, "plain string", 42] | ||
|
|
||
| def test_dict_with_nested_none_values(self, exporter): | ||
| """Dict with deeply nested None values does not cause OTLP encoding errors.""" | ||
| data = { | ||
| "level1": { | ||
| "level2": [{ | ||
| "key": "value", "empty": None | ||
| }, { | ||
| "nested_none": None, "data": "present" | ||
| }] | ||
| }, | ||
| "top_none": None, | ||
| } | ||
| result = exporter._to_dict(data) | ||
| assert isinstance(result, str) | ||
| # Should be valid JSON regardless of nested Nones | ||
| parsed = json.loads(result) | ||
| assert "top_none" not in parsed | ||
| assert parsed["level1"]["level2"][0]["key"] == "value" | ||
|
|
||
| def test_arbitrary_object_falls_back_to_str(self, exporter): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would a fallback to a json.dumps be more reliable? |
||
| """Non-serializable objects fall back to str() representation.""" | ||
|
|
||
| class CustomObj: | ||
|
|
||
| def __str__(self): | ||
| return "custom_string_repr" | ||
|
|
||
| result = exporter._to_dict(CustomObj()) | ||
| assert result == "custom_string_repr" | ||
| assert isinstance(result, str) | ||
|
|
||
| def test_exception_during_serialization_falls_back_to_str(self, exporter): | ||
| """If JSON serialization fails, falls back to str().""" | ||
|
|
||
| class BrokenModel: | ||
| """Object with model_dump that returns non-serializable data.""" | ||
|
|
||
| def model_dump(self, **kwargs): | ||
| raise RuntimeError("serialization broken") | ||
|
|
||
dnandakumar-nv marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| def __str__(self): | ||
| return "broken_model_str" | ||
|
|
||
| result = exporter._to_dict(BrokenModel()) | ||
| assert result == "broken_model_str" | ||
| assert isinstance(result, str) | ||
|
|
||
| def test_integer_input(self, exporter): | ||
| """Integer input is converted to string.""" | ||
| result = exporter._to_dict(42) | ||
| assert isinstance(result, str) | ||
|
|
||
| def test_none_input(self, exporter): | ||
| """None input is converted to string.""" | ||
| result = exporter._to_dict(None) | ||
| assert isinstance(result, str) | ||
|
|
||
| def test_result_is_always_a_string(self, exporter): | ||
| """Every code path returns a string — the key invariant for OTLP safety.""" | ||
| test_cases = [ | ||
| "text", | ||
| 42, | ||
| 3.14, | ||
| True, | ||
| None, | ||
| { | ||
| "a": 1 | ||
| }, | ||
| [1, 2, 3], | ||
| { | ||
| "value": "extracted" | ||
| }, | ||
| ] | ||
| for data in test_cases: | ||
| result = exporter._to_dict(data) | ||
| assert isinstance(result, str), f"_to_dict({data!r}) returned {type(result).__name__}, expected str" | ||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm worries that the current implementation could be unreliable in the following ways:
silent failure str(data) could return non serializable json
inconsistent normalization for different inputs like lists and nested dicts.
Something like this could harden it a bit:
'''
def _to_json_string(self, typing.Any) -> str:
'''