Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
-- 0027_playground_session_rendered_messages.sql
-- Add a structured form of the rendered prompt to playground_session.
-- The existing rendered_prompt text stays as a human-readable
-- newline-joined view (used by the trace UI today); the new
-- rendered_messages jsonb is what replay / re-dispatch will read so
-- the message structure round-trips exactly.

begin;

alter table playground_session
add column rendered_messages jsonb;

-- Backfill: existing sessions wrapped as a single human message so the
-- column is never null going forward. This preserves the meaning of
-- old sessions whose rendered_prompt was a single concatenated string.
update playground_session
set rendered_messages = jsonb_build_array(
jsonb_build_object('role', 'human', 'content', rendered_prompt)
)
where rendered_messages is null;

alter table playground_session
alter column rendered_messages set not null;

alter table playground_session
add constraint playground_session_rendered_messages_nonempty
check (jsonb_typeof(rendered_messages) = 'array'
and jsonb_array_length(rendered_messages) > 0);

insert into schema_migrations (version) values ('0027_playground_session_rendered_messages')
on conflict (version) do nothing;

commit;
134 changes: 134 additions & 0 deletions services/api/tests/unit/test_playground_create_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
"""PlaygroundCreate xor: exactly one of prompt_version_id /
raw_template / raw_messages is required per request. Zero or more than
one is a 422 (pydantic ValidationError)."""

from __future__ import annotations

from uuid import uuid4

import pytest
from pydantic import ValidationError
from tracebility_api.routers.playground import Message, PlaygroundCreate

_MODEL = "anthropic/claude-sonnet-4-6"


def test_accepts_raw_messages():
body = PlaygroundCreate(
project_id=uuid4(),
raw_messages=[Message(role="human", content="hi {{ x }}")],
variables={"x": "y"},
model=_MODEL,
)
assert body.raw_messages is not None
assert body.raw_template is None
assert body.prompt_version_id is None


def test_accepts_raw_template():
body = PlaygroundCreate(
project_id=uuid4(),
raw_template="hi {{ x }}",
model=_MODEL,
)
assert body.raw_template == "hi {{ x }}"
assert body.raw_messages is None


def test_accepts_prompt_version_id():
body = PlaygroundCreate(
project_id=uuid4(),
prompt_version_id=uuid4(),
model=_MODEL,
)
assert body.prompt_version_id is not None
assert body.raw_template is None
assert body.raw_messages is None


def test_rejects_zero_template_sources():
with pytest.raises(ValidationError) as exc:
PlaygroundCreate(project_id=uuid4(), model=_MODEL)
assert "required" in str(exc.value).lower()


def test_rejects_template_and_messages_together():
with pytest.raises(ValidationError):
PlaygroundCreate(
project_id=uuid4(),
raw_template="hi",
raw_messages=[Message(role="human", content="hi")],
model=_MODEL,
)


def test_rejects_prompt_id_and_raw_template_together():
with pytest.raises(ValidationError):
PlaygroundCreate(
project_id=uuid4(),
prompt_version_id=uuid4(),
raw_template="hi",
model=_MODEL,
)


def test_rejects_prompt_id_and_raw_messages_together():
with pytest.raises(ValidationError):
PlaygroundCreate(
project_id=uuid4(),
prompt_version_id=uuid4(),
raw_messages=[Message(role="human", content="hi")],
model=_MODEL,
)


def test_rejects_all_three_together():
with pytest.raises(ValidationError) as exc:
PlaygroundCreate(
project_id=uuid4(),
prompt_version_id=uuid4(),
raw_template="hi",
raw_messages=[Message(role="human", content="hi")],
model=_MODEL,
)
assert "mutually exclusive" in str(exc.value).lower()


def test_rejects_empty_raw_messages_list():
"""An empty messages list is not a valid template source - at least
one message is required. (The check constraint on the prompt_version
table enforces this on the storage side; the request-side validator
closes the gap so we don't even attempt a render with zero messages.)
"""
with pytest.raises(ValidationError):
PlaygroundCreate(
project_id=uuid4(),
raw_messages=[],
model=_MODEL,
)


def test_rejects_all_three_explicit_none():
"""Same as the zero-source case, but with explicit Nones in the body
(e.g. a JSON client that sends nulls instead of omitting fields).
Pydantic should treat None and omitted identically."""
with pytest.raises(ValidationError):
PlaygroundCreate(
project_id=uuid4(),
prompt_version_id=None,
raw_template=None,
raw_messages=None,
model=_MODEL,
)


def test_rejects_raw_messages_with_invalid_role():
"""Pydantic enforces role in {system, human} via the Message Literal;
sending an out-of-range role rejects the request before our validator
even runs."""
with pytest.raises(ValidationError):
PlaygroundCreate(
project_id=uuid4(),
raw_messages=[{"role": "assistant", "content": "hi"}],
model=_MODEL,
)
91 changes: 91 additions & 0 deletions services/api/tests/unit/test_playground_render_messages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""Per-message {{ var }} substitution: each message's content is rendered
against the same variable dict; roles are preserved verbatim.

Spec decision 9: missing variables render as empty string.
"""

from __future__ import annotations

from tracebility_api.routers.playground import (
Message,
_render_messages,
)


def test_renders_variables_per_message():
msgs = [
Message(role="system", content="You are a {{ tone }} assistant."),
Message(role="human", content="Summarize: {{ doc }}"),
]
out = _render_messages(msgs, {"tone": "terse", "doc": "lorem ipsum"})
assert out == [
Message(role="system", content="You are a terse assistant."),
Message(role="human", content="Summarize: lorem ipsum"),
]


def test_missing_variable_renders_empty():
"""Per spec decision 9: a placeholder whose key is absent from the
variables dict renders as the empty string."""
msgs = [Message(role="human", content="Echo: {{ x }}")]
out = _render_messages(msgs, {})
assert out == [Message(role="human", content="Echo: ")]


def test_no_variables_passes_through():
msgs = [
Message(role="system", content="static prompt"),
Message(role="human", content="hi"),
]
assert _render_messages(msgs, {"unused": "value"}) == msgs


def test_returns_new_list_does_not_mutate_input():
msgs = [Message(role="human", content="{{ x }}")]
out = _render_messages(msgs, {"x": "y"})
assert out is not msgs
assert msgs[0].content == "{{ x }}" # original untouched


def test_non_string_value_serializes_via_json():
"""Non-string variable values serialize via json.dumps so dicts and
lists round-trip as readable JSON."""
msgs = [Message(role="human", content="ctx={{ ctx }}")]
out = _render_messages(msgs, {"ctx": {"a": 1}})
assert out == [Message(role="human", content='ctx={"a": 1}')]


def test_repeated_variable_in_one_content():
"""Both occurrences are substituted; re.sub default replaces all."""
msgs = [Message(role="human", content="{{ x }} and {{ x }}")]
assert _render_messages(msgs, {"x": "hi"}) == [Message(role="human", content="hi and hi")]


def test_whitespace_around_placeholder():
"""The regex tolerates `\\s*` on either side of the var name; both
{{x}} and {{ x }} resolve identically."""
msgs = [Message(role="human", content="a={{x}} b={{ x }}")]
out = _render_messages(msgs, {"x": "1"})
assert out == [Message(role="human", content="a=1 b=1")]


def test_same_var_across_multiple_messages():
"""A single variables dict is applied to every message in order."""
msgs = [
Message(role="system", content="tone: {{ tone }}"),
Message(role="human", content="again, tone: {{ tone }}"),
]
out = _render_messages(msgs, {"tone": "terse"})
assert [m.content for m in out] == [
"tone: terse",
"again, tone: terse",
]


def test_returns_fresh_message_objects():
"""Pydantic equality is value-based; assert object identity too so a
future shortcut that returns the input message unchanged on a no-op
render would still trip the no-mutation contract."""
msgs = [Message(role="human", content="static")]
out = _render_messages(msgs, {})
assert out[0] is not msgs[0]
135 changes: 135 additions & 0 deletions services/api/tests/unit/test_playground_resolve_messages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
"""_resolve_messages picks the right source per the xor validator and
returns the canonical message list shape. The asyncpg pool is mocked
because we're testing routing logic, not SQL — the SQL path itself is
covered by the prompt_version integration test."""

from __future__ import annotations

from unittest.mock import AsyncMock
from uuid import uuid4

import pytest
from fastapi import HTTPException
from tracebility_api.routers.playground import (
Message,
PlaygroundCreate,
_resolve_messages,
)

_MODEL = "anthropic/claude-sonnet-4-6"


@pytest.mark.asyncio
async def test_raw_messages_used_verbatim():
body = PlaygroundCreate(
project_id=uuid4(),
raw_messages=[
Message(role="system", content="be terse"),
Message(role="human", content="echo {{ x }}"),
],
variables={},
model=_MODEL,
)
pool = AsyncMock()
out, version_row = await _resolve_messages(pool, body)

assert version_row is None
assert out == body.raw_messages
pool.fetchrow.assert_not_called()


@pytest.mark.asyncio
async def test_raw_template_wrapped_as_single_human_message():
body = PlaygroundCreate(
project_id=uuid4(),
raw_template="echo {{ x }}",
model=_MODEL,
)
pool = AsyncMock()
out, version_row = await _resolve_messages(pool, body)

assert version_row is None
assert out == [Message(role="human", content="echo {{ x }}")]
pool.fetchrow.assert_not_called()


@pytest.mark.asyncio
async def test_prompt_version_id_reads_template_messages():
"""When prompt_version_id is set, _resolve_messages reads the
template_messages jsonb column and validates each entry."""
version_id = uuid4()
body = PlaygroundCreate(
project_id=uuid4(),
prompt_version_id=version_id,
model=_MODEL,
)
pool = AsyncMock()
pool.fetchrow.return_value = {
"id": version_id,
"prompt_id": uuid4(),
"template": "ignored legacy field",
"template_messages": [
{"role": "system", "content": "be terse"},
{"role": "human", "content": "echo {{ x }}"},
],
}

out, version_row = await _resolve_messages(pool, body)

assert version_row is not None
assert out == [
Message(role="system", content="be terse"),
Message(role="human", content="echo {{ x }}"),
]
pool.fetchrow.assert_awaited_once()


@pytest.mark.asyncio
async def test_prompt_version_id_handles_jsonb_string_form():
"""Some asyncpg/codec configs hand back jsonb as a string. The
helper decodes defensively."""
version_id = uuid4()
body = PlaygroundCreate(
project_id=uuid4(),
prompt_version_id=version_id,
model=_MODEL,
)
pool = AsyncMock()
pool.fetchrow.return_value = {
"id": version_id,
"prompt_id": uuid4(),
"template": "x",
"template_messages": '[{"role": "human", "content": "x"}]',
}

out, _ = await _resolve_messages(pool, body)
assert out == [Message(role="human", content="x")]


@pytest.mark.asyncio
async def test_prompt_version_id_missing_returns_404():
body = PlaygroundCreate(
project_id=uuid4(),
prompt_version_id=uuid4(),
model=_MODEL,
)
pool = AsyncMock()
pool.fetchrow.return_value = None

with pytest.raises(HTTPException) as exc:
await _resolve_messages(pool, body)
assert exc.value.status_code == 404


def test_rendered_prompt_join_format():
"""create_session computes rendered_prompt = '\\n\\n'.join(m.content
for m in rendered_messages). Pin the join semantics so a future
refactor of that line doesn't silently change the trace UI's
display string. We can't easily run create_session in isolation,
but the join is a one-liner that's safe to assert directly."""
msgs = [
Message(role="system", content="be terse"),
Message(role="human", content="echo hello"),
]
rendered_prompt = "\n\n".join(m.content for m in msgs)
assert rendered_prompt == "be terse\n\necho hello"
Loading
Loading