tracebility-ai · gaurav0107 · Jun 7, 2026 · Jun 7, 2026 · Jun 7, 2026 · Jun 7, 2026
diff --git a/schemas/postgres/migrations/0027_playground_session_rendered_messages.sql b/schemas/postgres/migrations/0027_playground_session_rendered_messages.sql
@@ -0,0 +1,33 @@
+-- 0027_playground_session_rendered_messages.sql
+-- Add a structured form of the rendered prompt to playground_session.
+-- The existing rendered_prompt text stays as a human-readable
+-- newline-joined view (used by the trace UI today); the new
+-- rendered_messages jsonb is what replay / re-dispatch will read so
+-- the message structure round-trips exactly.
+
+begin;
+
+alter table playground_session
+    add column rendered_messages jsonb;
+
+-- Backfill: existing sessions wrapped as a single human message so the
+-- column is never null going forward. This preserves the meaning of
+-- old sessions whose rendered_prompt was a single concatenated string.
+update playground_session
+   set rendered_messages = jsonb_build_array(
+           jsonb_build_object('role', 'human', 'content', rendered_prompt)
+       )
+ where rendered_messages is null;
+
+alter table playground_session
+    alter column rendered_messages set not null;
+
+alter table playground_session
+    add constraint playground_session_rendered_messages_nonempty
+    check (jsonb_typeof(rendered_messages) = 'array'
+           and jsonb_array_length(rendered_messages) > 0);
+
+insert into schema_migrations (version) values ('0027_playground_session_rendered_messages')
+on conflict (version) do nothing;
+
+commit;
diff --git a/services/api/tests/unit/test_playground_create_validation.py b/services/api/tests/unit/test_playground_create_validation.py
@@ -0,0 +1,134 @@
+"""PlaygroundCreate xor: exactly one of prompt_version_id /
+raw_template / raw_messages is required per request. Zero or more than
+one is a 422 (pydantic ValidationError)."""
+
+from __future__ import annotations
+
+from uuid import uuid4
+
+import pytest
+from pydantic import ValidationError
+from tracebility_api.routers.playground import Message, PlaygroundCreate
+
+_MODEL = "anthropic/claude-sonnet-4-6"
+
+
+def test_accepts_raw_messages():
+    body = PlaygroundCreate(
+        project_id=uuid4(),
+        raw_messages=[Message(role="human", content="hi {{ x }}")],
+        variables={"x": "y"},
+        model=_MODEL,
+    )
+    assert body.raw_messages is not None
+    assert body.raw_template is None
+    assert body.prompt_version_id is None
+
+
+def test_accepts_raw_template():
+    body = PlaygroundCreate(
+        project_id=uuid4(),
+        raw_template="hi {{ x }}",
+        model=_MODEL,
+    )
+    assert body.raw_template == "hi {{ x }}"
+    assert body.raw_messages is None
+
+
+def test_accepts_prompt_version_id():
+    body = PlaygroundCreate(
+        project_id=uuid4(),
+        prompt_version_id=uuid4(),
+        model=_MODEL,
+    )
+    assert body.prompt_version_id is not None
+    assert body.raw_template is None
+    assert body.raw_messages is None
+
+
+def test_rejects_zero_template_sources():
+    with pytest.raises(ValidationError) as exc:
+        PlaygroundCreate(project_id=uuid4(), model=_MODEL)
+    assert "required" in str(exc.value).lower()
+
+
+def test_rejects_template_and_messages_together():
+    with pytest.raises(ValidationError):
+        PlaygroundCreate(
+            project_id=uuid4(),
+            raw_template="hi",
+            raw_messages=[Message(role="human", content="hi")],
+            model=_MODEL,
+        )
+
+
+def test_rejects_prompt_id_and_raw_template_together():
+    with pytest.raises(ValidationError):
+        PlaygroundCreate(
+            project_id=uuid4(),
+            prompt_version_id=uuid4(),
+            raw_template="hi",
+            model=_MODEL,
+        )
+
+
+def test_rejects_prompt_id_and_raw_messages_together():
+    with pytest.raises(ValidationError):
+        PlaygroundCreate(
+            project_id=uuid4(),
+            prompt_version_id=uuid4(),
+            raw_messages=[Message(role="human", content="hi")],
+            model=_MODEL,
+        )
+
+
+def test_rejects_all_three_together():
+    with pytest.raises(ValidationError) as exc:
+        PlaygroundCreate(
+            project_id=uuid4(),
+            prompt_version_id=uuid4(),
+            raw_template="hi",
+            raw_messages=[Message(role="human", content="hi")],
+            model=_MODEL,
+        )
+    assert "mutually exclusive" in str(exc.value).lower()
+
+
+def test_rejects_empty_raw_messages_list():
+    """An empty messages list is not a valid template source - at least
+    one message is required. (The check constraint on the prompt_version
+    table enforces this on the storage side; the request-side validator
+    closes the gap so we don't even attempt a render with zero messages.)
+    """
+    with pytest.raises(ValidationError):
+        PlaygroundCreate(
+            project_id=uuid4(),
+            raw_messages=[],
+            model=_MODEL,
+        )
+
+
+def test_rejects_all_three_explicit_none():
+    """Same as the zero-source case, but with explicit Nones in the body
+    (e.g. a JSON client that sends nulls instead of omitting fields).
+    Pydantic should treat None and omitted identically."""
+    with pytest.raises(ValidationError):
+        PlaygroundCreate(
+            project_id=uuid4(),
+            prompt_version_id=None,
+            raw_template=None,
+            raw_messages=None,
+            model=_MODEL,
+        )
+
+
+def test_rejects_raw_messages_with_invalid_role():
+    """Pydantic enforces role in {system, human} via the Message Literal;
+    sending an out-of-range role rejects the request before our validator
+    even runs."""
+    with pytest.raises(ValidationError):
+        PlaygroundCreate(
+            project_id=uuid4(),
+            raw_messages=[{"role": "assistant", "content": "hi"}],
+            model=_MODEL,
+        )
diff --git a/services/api/tests/unit/test_playground_render_messages.py b/services/api/tests/unit/test_playground_render_messages.py
@@ -0,0 +1,91 @@
+"""Per-message {{ var }} substitution: each message's content is rendered
+against the same variable dict; roles are preserved verbatim.
+
+Spec decision 9: missing variables render as empty string.
+"""
+
+from __future__ import annotations
+
+from tracebility_api.routers.playground import (
+    Message,
+    _render_messages,
+)
+
+
+def test_renders_variables_per_message():
+    msgs = [
+        Message(role="system", content="You are a {{ tone }} assistant."),
+        Message(role="human", content="Summarize: {{ doc }}"),
+    ]
+    out = _render_messages(msgs, {"tone": "terse", "doc": "lorem ipsum"})
+    assert out == [
+        Message(role="system", content="You are a terse assistant."),
+        Message(role="human", content="Summarize: lorem ipsum"),
+    ]
+
+
+def test_missing_variable_renders_empty():
+    """Per spec decision 9: a placeholder whose key is absent from the
+    variables dict renders as the empty string."""
+    msgs = [Message(role="human", content="Echo: {{ x }}")]
+    out = _render_messages(msgs, {})
+    assert out == [Message(role="human", content="Echo: ")]
+
+
+def test_no_variables_passes_through():
+    msgs = [
+        Message(role="system", content="static prompt"),
+        Message(role="human", content="hi"),
+    ]
+    assert _render_messages(msgs, {"unused": "value"}) == msgs
+
+
+def test_returns_new_list_does_not_mutate_input():
+    msgs = [Message(role="human", content="{{ x }}")]
+    out = _render_messages(msgs, {"x": "y"})
+    assert out is not msgs
+    assert msgs[0].content == "{{ x }}"  # original untouched
+
+
+def test_non_string_value_serializes_via_json():
+    """Non-string variable values serialize via json.dumps so dicts and
+    lists round-trip as readable JSON."""
+    msgs = [Message(role="human", content="ctx={{ ctx }}")]
+    out = _render_messages(msgs, {"ctx": {"a": 1}})
+    assert out == [Message(role="human", content='ctx={"a": 1}')]
+
+
+def test_repeated_variable_in_one_content():
+    """Both occurrences are substituted; re.sub default replaces all."""
+    msgs = [Message(role="human", content="{{ x }} and {{ x }}")]
+    assert _render_messages(msgs, {"x": "hi"}) == [Message(role="human", content="hi and hi")]
+
+
+def test_whitespace_around_placeholder():
+    """The regex tolerates `\\s*` on either side of the var name; both
+    {{x}} and {{   x   }} resolve identically."""
+    msgs = [Message(role="human", content="a={{x}} b={{   x   }}")]
+    out = _render_messages(msgs, {"x": "1"})
+    assert out == [Message(role="human", content="a=1 b=1")]
+
+
+def test_same_var_across_multiple_messages():
+    """A single variables dict is applied to every message in order."""
+    msgs = [
+        Message(role="system", content="tone: {{ tone }}"),
+        Message(role="human", content="again, tone: {{ tone }}"),
+    ]
+    out = _render_messages(msgs, {"tone": "terse"})
+    assert [m.content for m in out] == [
+        "tone: terse",
+        "again, tone: terse",
+    ]
+
+
+def test_returns_fresh_message_objects():
+    """Pydantic equality is value-based; assert object identity too so a
+    future shortcut that returns the input message unchanged on a no-op
+    render would still trip the no-mutation contract."""
+    msgs = [Message(role="human", content="static")]
+    out = _render_messages(msgs, {})
+    assert out[0] is not msgs[0]
diff --git a/services/api/tests/unit/test_playground_resolve_messages.py b/services/api/tests/unit/test_playground_resolve_messages.py
@@ -0,0 +1,135 @@
+"""_resolve_messages picks the right source per the xor validator and
+returns the canonical message list shape. The asyncpg pool is mocked
+because we're testing routing logic, not SQL — the SQL path itself is
+covered by the prompt_version integration test."""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock
+from uuid import uuid4
+
+import pytest
+from fastapi import HTTPException
+from tracebility_api.routers.playground import (
+    Message,
+    PlaygroundCreate,
+    _resolve_messages,
+)
+
+_MODEL = "anthropic/claude-sonnet-4-6"
+
+
+@pytest.mark.asyncio
+async def test_raw_messages_used_verbatim():
+    body = PlaygroundCreate(
+        project_id=uuid4(),
+        raw_messages=[
+            Message(role="system", content="be terse"),
+            Message(role="human", content="echo {{ x }}"),
+        ],
+        variables={},
+        model=_MODEL,
+    )
+    pool = AsyncMock()
+    out, version_row = await _resolve_messages(pool, body)
+
+    assert version_row is None
+    assert out == body.raw_messages
+    pool.fetchrow.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_raw_template_wrapped_as_single_human_message():
+    body = PlaygroundCreate(
+        project_id=uuid4(),
+        raw_template="echo {{ x }}",
+        model=_MODEL,
+    )
+    pool = AsyncMock()
+    out, version_row = await _resolve_messages(pool, body)
+
+    assert version_row is None
+    assert out == [Message(role="human", content="echo {{ x }}")]
+    pool.fetchrow.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_prompt_version_id_reads_template_messages():
+    """When prompt_version_id is set, _resolve_messages reads the
+    template_messages jsonb column and validates each entry."""
+    version_id = uuid4()
+    body = PlaygroundCreate(
+        project_id=uuid4(),
+        prompt_version_id=version_id,
+        model=_MODEL,
+    )
+    pool = AsyncMock()
+    pool.fetchrow.return_value = {
+        "id": version_id,
+        "prompt_id": uuid4(),
+        "template": "ignored legacy field",
+        "template_messages": [
+            {"role": "system", "content": "be terse"},
+            {"role": "human", "content": "echo {{ x }}"},
+        ],
+    }
+
+    out, version_row = await _resolve_messages(pool, body)
+
+    assert version_row is not None
+    assert out == [
+        Message(role="system", content="be terse"),
+        Message(role="human", content="echo {{ x }}"),
+    ]
+    pool.fetchrow.assert_awaited_once()
+
+
+@pytest.mark.asyncio
+async def test_prompt_version_id_handles_jsonb_string_form():
+    """Some asyncpg/codec configs hand back jsonb as a string. The
+    helper decodes defensively."""
+    version_id = uuid4()
+    body = PlaygroundCreate(
+        project_id=uuid4(),
+        prompt_version_id=version_id,
+        model=_MODEL,
+    )
+    pool = AsyncMock()
+    pool.fetchrow.return_value = {
+        "id": version_id,
+        "prompt_id": uuid4(),
+        "template": "x",
+        "template_messages": '[{"role": "human", "content": "x"}]',
+    }
+
+    out, _ = await _resolve_messages(pool, body)
+    assert out == [Message(role="human", content="x")]
+
+
+@pytest.mark.asyncio
+async def test_prompt_version_id_missing_returns_404():
+    body = PlaygroundCreate(
+        project_id=uuid4(),
+        prompt_version_id=uuid4(),
+        model=_MODEL,
+    )
+    pool = AsyncMock()
+    pool.fetchrow.return_value = None
+
+    with pytest.raises(HTTPException) as exc:
+        await _resolve_messages(pool, body)
+    assert exc.value.status_code == 404
+
+
+def test_rendered_prompt_join_format():
+    """create_session computes rendered_prompt = '\\n\\n'.join(m.content
+    for m in rendered_messages). Pin the join semantics so a future
+    refactor of that line doesn't silently change the trace UI's
+    display string. We can't easily run create_session in isolation,
+    but the join is a one-liner that's safe to assert directly."""
+    msgs = [
+        Message(role="system", content="be terse"),
+        Message(role="human", content="echo hello"),
+    ]
+    rendered_prompt = "\n\n".join(m.content for m in msgs)
+    assert rendered_prompt == "be terse\n\necho hello"