Skip to content

Commit c3203f5

Browse files
committed
lilac adapter
1 parent 4971888 commit c3203f5

File tree

1 file changed

+265
-0
lines changed

1 file changed

+265
-0
lines changed

eval_protocol/adapters/lilac.py

Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
"""
2+
Lilac ML integration for Eval Protocol.
3+
4+
This adapter provides utilities for converting between EvaluationRow format
5+
and Lilac dataset format, enabling powerful data curation features like:
6+
- Clustering and deduplication
7+
- Semantic search and filtering
8+
- Quality scoring with embeddings
9+
- Interactive data exploration
10+
11+
Prerequisites:
12+
pip install 'lilac[all]'
13+
14+
Example usage:
15+
>>> from eval_protocol.adapters.lilac import (
16+
... evaluation_rows_to_lilac_dataset,
17+
... lilac_dataset_to_evaluation_rows,
18+
... )
19+
>>>
20+
>>> # Convert EvaluationRows to Lilac dataset
21+
>>> dataset = evaluation_rows_to_lilac_dataset(rows, name='my-traces')
22+
>>>
23+
>>> # Do Lilac operations (cluster, filter, etc.)
24+
>>> dataset.cluster('messages_json') # or create your own text column
25+
>>>
26+
>>> # Convert back to EvaluationRows
27+
>>> processed_rows = lilac_dataset_to_evaluation_rows(dataset)
28+
"""
29+
30+
from __future__ import annotations
31+
32+
import json
33+
import logging
34+
from typing import Any, TYPE_CHECKING
35+
36+
import pandas as pd
37+
38+
from eval_protocol.models import (
39+
EvaluateResult,
40+
EvaluationRow,
41+
ExecutionMetadata,
42+
InputMetadata,
43+
Message,
44+
)
45+
46+
if TYPE_CHECKING:
47+
import lilac as ll
48+
49+
logger = logging.getLogger(__name__)
50+
51+
# Check if lilac is available
52+
try:
53+
import lilac as ll
54+
55+
LILAC_AVAILABLE = True
56+
except ImportError:
57+
LILAC_AVAILABLE = False
58+
ll = None # type: ignore
59+
60+
61+
def _ensure_lilac_available() -> None:
62+
"""Raise ImportError if lilac is not installed."""
63+
if not LILAC_AVAILABLE:
64+
raise ImportError("Lilac is not installed. Install it with: pip install 'lilac[all]'")
65+
66+
67+
# =============================================================================
68+
# Core Conversion Functions
69+
# =============================================================================
70+
71+
72+
def _serialize_message(msg: Message) -> dict[str, Any]:
73+
"""Serialize a Message to a dictionary."""
74+
return msg.model_dump(exclude_none=True)
75+
76+
77+
def _deserialize_messages(messages_json: str | None) -> list[Message]:
78+
"""Deserialize messages JSON back to Message objects."""
79+
if not messages_json:
80+
return []
81+
try:
82+
messages_data = json.loads(messages_json)
83+
return [Message.model_validate(m) for m in messages_data]
84+
except (json.JSONDecodeError, ValueError) as e:
85+
logger.warning(f"Failed to deserialize messages: {e}")
86+
return []
87+
88+
89+
def evaluation_row_to_dict(row: EvaluationRow) -> dict[str, Any]:
90+
"""Convert a single EvaluationRow to a dictionary for Lilac.
91+
92+
The output contains JSON-serialized fields that can be reconstructed back
93+
to EvaluationRow. Users can add their own text columns for clustering.
94+
"""
95+
result: dict[str, Any] = {
96+
# Identifiers
97+
"row_id": row.input_metadata.row_id if row.input_metadata else None,
98+
# Full data as JSON (for reconstruction)
99+
"messages_json": json.dumps([_serialize_message(m) for m in row.messages]),
100+
"tools_json": json.dumps(row.tools) if row.tools else None,
101+
"ground_truth_json": json.dumps(row.ground_truth) if row.ground_truth else None,
102+
"input_metadata_json": row.input_metadata.model_dump_json() if row.input_metadata else None,
103+
"execution_metadata_json": row.execution_metadata.model_dump_json() if row.execution_metadata else None,
104+
"evaluation_result_json": row.evaluation_result.model_dump_json() if row.evaluation_result else None,
105+
# Scalar fields for filtering
106+
"score": row.evaluation_result.score if row.evaluation_result else None,
107+
"message_count": len(row.messages),
108+
"has_tools": bool(row.tools),
109+
}
110+
111+
return result
112+
113+
114+
def dict_to_evaluation_row(data: dict[str, Any]) -> EvaluationRow:
115+
"""Convert a Lilac row dictionary back to an EvaluationRow."""
116+
# Parse messages
117+
messages = _deserialize_messages(data.get("messages_json"))
118+
119+
# Parse tools
120+
tools = None
121+
if data.get("tools_json"):
122+
try:
123+
tools = json.loads(data["tools_json"])
124+
except json.JSONDecodeError:
125+
pass
126+
127+
# Parse ground truth
128+
ground_truth = None
129+
if data.get("ground_truth_json"):
130+
try:
131+
ground_truth = json.loads(data["ground_truth_json"])
132+
except json.JSONDecodeError:
133+
pass
134+
135+
# Parse input metadata
136+
input_metadata = InputMetadata()
137+
if data.get("input_metadata_json"):
138+
try:
139+
input_metadata = InputMetadata.model_validate_json(data["input_metadata_json"])
140+
except (json.JSONDecodeError, ValueError):
141+
input_metadata = InputMetadata(row_id=data.get("row_id"))
142+
143+
# Parse execution metadata
144+
execution_metadata = ExecutionMetadata()
145+
if data.get("execution_metadata_json"):
146+
try:
147+
execution_metadata = ExecutionMetadata.model_validate_json(data["execution_metadata_json"])
148+
except (json.JSONDecodeError, ValueError):
149+
pass
150+
151+
# Parse evaluation result
152+
evaluation_result = None
153+
if data.get("evaluation_result_json"):
154+
try:
155+
evaluation_result = EvaluateResult.model_validate_json(data["evaluation_result_json"])
156+
except (json.JSONDecodeError, ValueError):
157+
pass
158+
159+
return EvaluationRow(
160+
messages=messages,
161+
tools=tools,
162+
ground_truth=ground_truth,
163+
input_metadata=input_metadata,
164+
execution_metadata=execution_metadata,
165+
evaluation_result=evaluation_result,
166+
)
167+
168+
169+
# =============================================================================
170+
# Main Conversion Functions
171+
# =============================================================================
172+
173+
174+
def evaluation_rows_to_lilac_dataset(
175+
rows: list[EvaluationRow],
176+
namespace: str = "local",
177+
name: str = "eval-data",
178+
project_dir: str | None = None,
179+
) -> Any:
180+
"""Convert EvaluationRows to a Lilac dataset.
181+
182+
Args:
183+
rows: List of EvaluationRow objects
184+
namespace: Lilac namespace (default: 'local')
185+
name: Dataset name
186+
project_dir: Lilac project directory (uses default if None)
187+
188+
Returns:
189+
Lilac Dataset object ready for clustering, filtering, etc.
190+
191+
Example:
192+
>>> dataset = evaluation_rows_to_lilac_dataset(rows, name='my-traces')
193+
>>>
194+
>>> # Add your own text column for clustering
195+
>>> df = dataset.to_pandas()
196+
>>> df['user_query'] = df['messages_json'].apply(extract_user_query)
197+
>>> # Re-create dataset with new column, then cluster
198+
"""
199+
_ensure_lilac_available()
200+
import lilac as ll_module # Re-import after ensuring available
201+
202+
if project_dir:
203+
ll_module.set_project_dir(project_dir)
204+
205+
# Convert to DataFrame
206+
records = [evaluation_row_to_dict(row) for row in rows]
207+
df = pd.DataFrame(records)
208+
209+
config = ll_module.DatasetConfig(
210+
namespace=namespace,
211+
name=name,
212+
source=ll_module.PandasSource(df),
213+
)
214+
215+
return ll_module.create_dataset(config)
216+
217+
218+
def lilac_dataset_to_evaluation_rows(
219+
dataset: Any,
220+
filters: list[tuple[str, str, Any]] | None = None,
221+
limit: int | None = None,
222+
) -> list[EvaluationRow]:
223+
"""Convert a Lilac dataset back to EvaluationRows.
224+
225+
Args:
226+
dataset: Lilac Dataset object
227+
filters: Optional Lilac filter tuples, e.g. [('score', 'greater', 0.5)]
228+
limit: Maximum number of rows to return
229+
230+
Returns:
231+
List of EvaluationRow objects
232+
"""
233+
_ensure_lilac_available()
234+
235+
# Build query
236+
kwargs: dict[str, Any] = {}
237+
if filters:
238+
kwargs["filters"] = filters
239+
if limit:
240+
kwargs["limit"] = limit
241+
242+
df = dataset.select_rows(**kwargs).df()
243+
return dataframe_to_evaluation_rows(df)
244+
245+
246+
def evaluation_rows_to_dataframe(rows: list[EvaluationRow]) -> pd.DataFrame:
247+
"""Convert EvaluationRows to a pandas DataFrame.
248+
249+
Useful if you want to work with the DataFrame directly.
250+
"""
251+
records = [evaluation_row_to_dict(row) for row in rows]
252+
return pd.DataFrame(records)
253+
254+
255+
def dataframe_to_evaluation_rows(df: pd.DataFrame) -> list[EvaluationRow]:
256+
"""Convert a pandas DataFrame back to EvaluationRows."""
257+
rows = []
258+
for _, row_data in df.iterrows():
259+
try:
260+
row = dict_to_evaluation_row(row_data.to_dict())
261+
rows.append(row)
262+
except Exception as e:
263+
logger.warning(f"Failed to convert row: {e}")
264+
continue
265+
return rows

0 commit comments

Comments
 (0)