From 854cb5c90bc5d3708069f3c2a59466796c1a2212 Mon Sep 17 00:00:00 2001 From: benjibc Date: Tue, 9 Sep 2025 20:36:46 +0000 Subject: [PATCH 1/5] LangGraph simple example --- eval_protocol/adapters/bigquery.py | 12 +- eval_protocol/adapters/langchain.py | 102 +++++-------- .../default_langchain_rollout_processor.py | 74 +++------ eval_protocol/pytest/handle_persist_flow.py | 7 + eval_protocol/pytest/langgraph_processor.py | 144 ++++++++++++++++++ examples/langgraph/data/simple_prompts.jsonl | 3 + examples/langgraph/simple_graph.py | 43 ++++++ examples/langgraph/test_langgraph_rollout.py | 67 ++++++++ requirements-dev.txt | 3 + tests/chinook/langgraph/graph.py | 59 +++++++ .../langgraph/test_langgraph_chinook.py | 78 ++++++++++ tests/pytest/test_langgraph_processor.py | 142 +++++++++++++++++ 12 files changed, 609 insertions(+), 125 deletions(-) create mode 100644 eval_protocol/pytest/langgraph_processor.py create mode 100644 examples/langgraph/data/simple_prompts.jsonl create mode 100644 examples/langgraph/simple_graph.py create mode 100644 examples/langgraph/test_langgraph_rollout.py create mode 100644 requirements-dev.txt create mode 100644 tests/chinook/langgraph/graph.py create mode 100644 tests/chinook/langgraph/test_langgraph_chinook.py create mode 100644 tests/pytest/test_langgraph_processor.py diff --git a/eval_protocol/adapters/bigquery.py b/eval_protocol/adapters/bigquery.py index db4cbda0..9831e748 100644 --- a/eval_protocol/adapters/bigquery.py +++ b/eval_protocol/adapters/bigquery.py @@ -7,7 +7,7 @@ from __future__ import annotations import logging -from typing import TYPE_CHECKING, Any, Callable, Dict, Iterator, List, Optional, Union, cast, TypeAlias +from typing import Any, Callable, Dict, Iterator, List, Optional, TypeAlias from eval_protocol.models import CompletionParams, EvaluationRow, InputMetadata, Message @@ -108,10 +108,7 @@ def __init__( # Avoid strict typing on optional dependency self.client = _bigquery_runtime.Client(**client_args) # type: ignore[no-untyped-call, assignment] - except DefaultCredentialsError as e: - logger.error("Failed to authenticate with BigQuery: %s", e) - raise - except Exception as e: + except (DefaultCredentialsError, ImportError, ValueError, TypeError) as e: logger.error("Failed to initialize BigQuery client: %s", e) raise @@ -191,10 +188,7 @@ def get_evaluation_rows( row_count += 1 - except (NotFound, Forbidden) as e: - logger.error("BigQuery access error: %s", e) - raise - except Exception as e: + except (NotFound, Forbidden, RuntimeError, ValueError, TypeError, AttributeError) as e: logger.error("Error executing BigQuery query: %s", e) raise diff --git a/eval_protocol/adapters/langchain.py b/eval_protocol/adapters/langchain.py index df6818a5..3f6f0fb5 100644 --- a/eval_protocol/adapters/langchain.py +++ b/eval_protocol/adapters/langchain.py @@ -3,7 +3,7 @@ import os from typing import Any, Dict, List, Optional -from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, ToolMessage +from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage, ToolMessage from eval_protocol.models import Message @@ -49,75 +49,12 @@ def serialize_lc_message_to_ep(msg: BaseMessage) -> Message: parts.append(item) content = "\n".join(parts) - tool_calls_payload: Optional[List[Dict[str, Any]]] = None - - def _normalize_tool_calls(tc_list: List[Any]) -> List[Dict[str, Any]]: - mapped: List[Dict[str, Any]] = [] - for call in tc_list: - if not isinstance(call, dict): - continue - try: - call_id = call.get("id") or "toolcall_0" - if isinstance(call.get("function"), dict): - fn = call["function"] - fn_name = fn.get("name") or call.get("name") or "tool" - fn_args = fn.get("arguments") - else: - fn_name = call.get("name") or "tool" - fn_args = call.get("arguments") if call.get("arguments") is not None else call.get("args") - if not isinstance(fn_args, str): - import json as _json - - fn_args = _json.dumps(fn_args or {}, ensure_ascii=False) - mapped.append( - { - "id": call_id, - "type": "function", - "function": {"name": fn_name, "arguments": fn_args}, - } - ) - except Exception: - continue - return mapped - - ak = getattr(msg, "additional_kwargs", None) - if isinstance(ak, dict): - tc = ak.get("tool_calls") - if isinstance(tc, list) and tc: - mapped = _normalize_tool_calls(tc) - if mapped: - tool_calls_payload = mapped - - if tool_calls_payload is None: - raw_attr_tc = getattr(msg, "tool_calls", None) - if isinstance(raw_attr_tc, list) and raw_attr_tc: - mapped = _normalize_tool_calls(raw_attr_tc) - if mapped: - tool_calls_payload = mapped - - # Extract reasoning/thinking parts into reasoning_content - reasoning_content = None - if isinstance(msg.content, list): - collected = [ - it.get("thinking", "") for it in msg.content if isinstance(it, dict) and it.get("type") == "thinking" - ] - if collected: - reasoning_content = "\n\n".join([s for s in collected if s]) or None - - # Message.tool_calls expects List[ChatCompletionMessageToolCall] | None. - # We pass through Dicts at runtime but avoid type error by casting. - ep_msg = Message( - role="assistant", - content=content, - tool_calls=tool_calls_payload, # type: ignore[arg-type] - reasoning_content=reasoning_content, - ) + ep_msg = Message(role="assistant", content=content) _dbg_print( "[EP-Ser] -> EP Message:", { "role": ep_msg.role, "content_len": len(ep_msg.content or ""), - "tool_calls": len(ep_msg.tool_calls or []) if isinstance(ep_msg.tool_calls, list) else 0, }, ) return ep_msg @@ -141,3 +78,38 @@ def _normalize_tool_calls(tc_list: List[Any]) -> List[Dict[str, Any]]: ep_msg = Message(role=getattr(msg, "type", "assistant"), content=str(getattr(msg, "content", ""))) _dbg_print("[EP-Ser] -> EP Message (fallback):", {"role": ep_msg.role, "len": len(ep_msg.content or "")}) return ep_msg + + +def serialize_ep_messages_to_lc(messages: List[Message]) -> List[BaseMessage]: + """Convert eval_protocol Message objects to LangChain BaseMessage list. + + - Flattens content parts into strings when content is a list + - Maps EP roles to LC message classes + """ + lc_messages: List[BaseMessage] = [] + for m in messages or []: + content = m.content + if isinstance(content, list): + text_parts: List[str] = [] + for part in content: + try: + text_parts.append(getattr(part, "text", "")) + except AttributeError: + pass + content = "\n".join([t for t in text_parts if t]) + if content is None: + content = "" + text = str(content) + + role = (m.role or "").lower() + if role == "user": + lc_messages.append(HumanMessage(content=text)) + elif role == "assistant": + lc_messages.append(AIMessage(content=text)) + elif role == "system": + from langchain_core.messages import SystemMessage # local import to avoid unused import + + lc_messages.append(SystemMessage(content=text)) + else: + lc_messages.append(HumanMessage(content=text)) + return lc_messages diff --git a/eval_protocol/pytest/default_langchain_rollout_processor.py b/eval_protocol/pytest/default_langchain_rollout_processor.py index 3169987f..bf2131fa 100644 --- a/eval_protocol/pytest/default_langchain_rollout_processor.py +++ b/eval_protocol/pytest/default_langchain_rollout_processor.py @@ -1,17 +1,25 @@ import asyncio import time -from typing import List +from typing import List, Any, cast try: - from langchain_core.messages import BaseMessage -except Exception: # pragma: no cover - optional dependency path - # Minimal fallback base type to satisfy typing when langchain is not present - class BaseMessage: # type: ignore - pass + from langchain_core.messages import BaseMessage as LCBaseMessage, HumanMessage # type: ignore +except ImportError: # pragma: no cover - optional dependency path + # Minimal fallbacks to satisfy typing when langchain is not present + class LCBaseMessage: # type: ignore + content: str + type: str + + def __init__(self, content: str = "", msg_type: str = "assistant"): + self.content = content + self.type = msg_type + + class HumanMessage(LCBaseMessage): # type: ignore + def __init__(self, content: str): + super().__init__(content=content, msg_type="human") from eval_protocol.models import EvaluationRow, Message -from openai.types import CompletionUsage from eval_protocol.pytest.rollout_processor import RolloutProcessor from eval_protocol.pytest.types import RolloutProcessorConfig @@ -34,27 +42,17 @@ def __call__(self, rows: List[EvaluationRow], config: RolloutProcessorConfig): async def _process_row(row: EvaluationRow) -> EvaluationRow: start_time = time.perf_counter() - # Build LC messages from EP row - try: - from langchain_core.messages import HumanMessage - except Exception: - # Fallback minimal message if langchain_core is unavailable - class HumanMessage(BaseMessage): # type: ignore - def __init__(self, content: str): - self.content = content - self.type = "human" - - lm_messages: List[BaseMessage] = [] + # Build LC messages from EP row (minimal: last user to HumanMessage) + lm_messages: List[LCBaseMessage] = [] if row.messages: last_user = [m for m in row.messages if m.role == "user"] if last_user: content = last_user[-1].content or "" if isinstance(content, list): - # Flatten our SDK content parts into a single string for LangChain content = "".join([getattr(p, "text", str(p)) for p in content]) lm_messages.append(HumanMessage(content=str(content))) if not lm_messages: - lm_messages = [HumanMessage(content="")] # minimal + lm_messages = [HumanMessage(content="")] target = await self.get_invoke_target(config) @@ -72,7 +70,7 @@ async def _invoke_direct(payload): invoke_fn = _invoke_direct elif callable(target): - # If target is a normal callable, call it directly; if it returns an awaitable, await it + async def _invoke_wrapper(payload): result = target(payload) if asyncio.iscoroutine(result): @@ -84,44 +82,18 @@ async def _invoke_wrapper(payload): raise TypeError("Unsupported invoke target for LangGraphRolloutProcessor") result_obj = await invoke_fn({"messages": lm_messages}) - # Accept both dicts and objects with .get/.messages if isinstance(result_obj, dict): - result_messages: List[BaseMessage] = result_obj.get("messages", []) + result_messages: List[LCBaseMessage] = result_obj.get("messages", []) else: result_messages = getattr(result_obj, "messages", []) - # TODO: i didn't see a langgraph example so couldn't fully test this. should uncomment and test when we have example ready. - # total_input_tokens = 0 - # total_output_tokens = 0 - # total_tokens = 0 - - # for msg in result_messages: - # if isinstance(msg, BaseMessage): - # usage = getattr(msg, 'response_metadata', {}) - # else: - # usage = msg.get("response_metadata", {}) - - # if usage: - # total_input_tokens += usage.get("prompt_tokens", 0) - # total_output_tokens += usage.get("completion_tokens", 0) - # total_tokens += usage.get("total_tokens", 0) - - # row.execution_metadata.usage = CompletionUsage( - # prompt_tokens=total_input_tokens, - # completion_tokens=total_output_tokens, - # total_tokens=total_tokens, - # ) - - def _serialize_message(msg: BaseMessage) -> Message: - # Prefer SDK-level serializer + def _serialize_message(msg: LCBaseMessage) -> Message: try: from eval_protocol.adapters.langchain import serialize_lc_message_to_ep as _ser - - return _ser(msg) - except Exception: - # Minimal fallback: best-effort string content only + except ImportError: content = getattr(msg, "content", "") return Message(role=getattr(msg, "type", "assistant"), content=str(content)) + return _ser(cast(Any, msg)) row.messages = [_serialize_message(m) for m in result_messages] diff --git a/eval_protocol/pytest/handle_persist_flow.py b/eval_protocol/pytest/handle_persist_flow.py index 58d989f1..63f865ee 100644 --- a/eval_protocol/pytest/handle_persist_flow.py +++ b/eval_protocol/pytest/handle_persist_flow.py @@ -42,6 +42,13 @@ def handle_persist_flow(all_results: list[list[EvaluationRow]], test_func_name: if len(dataset_name) > 63: dataset_name = dataset_name[:63] + # Fireworks requires: last character of id must not be '-' + dataset_name = dataset_name.rstrip("-") + + # Ensure non-empty after stripping; fallback to safe_test_func_name + if not dataset_name: + dataset_name = safe_test_func_name[:63].rstrip("-") or "dataset" + exp_file = exp_dir / f"{experiment_id}.jsonl" with open(exp_file, "w", encoding="utf-8") as f: for row in exp_rows: diff --git a/eval_protocol/pytest/langgraph_processor.py b/eval_protocol/pytest/langgraph_processor.py new file mode 100644 index 00000000..7c63fde4 --- /dev/null +++ b/eval_protocol/pytest/langgraph_processor.py @@ -0,0 +1,144 @@ +import asyncio +from typing import Any, Callable, Dict, List, Optional + +from eval_protocol.models import EvaluationRow, Status, Message +from eval_protocol.pytest.rollout_processor import RolloutProcessor +from eval_protocol.pytest.types import CompletionParams, RolloutProcessorConfig + + +class LangGraphRolloutProcessor(RolloutProcessor): + """ + Generic rollout processor for LangGraph graphs. + + Configure with: + - to_input(row): build the input payload for graph.ainvoke (default: {"messages": row.messages}) + - apply_result(row, result): write graph outputs back onto the row (default: row.messages = result["messages"]) + - build_graph_kwargs(cp): map completion_params to graph kwargs (default: {}) + + Compatible with eval_protocol.pytest.evaluation_test. + """ + + def __init__( + self, + *, + graph_factory: Callable[[Dict[str, Any]], Any], + to_input: Optional[Callable[[EvaluationRow], Dict[str, Any]]] = None, + apply_result: Optional[Callable[[EvaluationRow, Any], EvaluationRow]] = None, + build_graph_kwargs: Optional[Callable[[CompletionParams], Dict[str, Any]]] = None, + input_key: str = "messages", + output_key: str = "messages", + ) -> None: + # Build the graph per-call using completion_params + self._graph_factory = graph_factory + self._to_input = to_input + self._apply_result = apply_result + self._build_graph_kwargs = build_graph_kwargs + self._input_key = input_key + self._output_key = output_key + + def _default_to_input(self, row: EvaluationRow) -> Dict[str, Any]: + messages = row.messages or [] + from eval_protocol.adapters.langchain import serialize_ep_messages_to_lc as _to_lc + + return {self._input_key: _to_lc(messages)} + + def _default_apply_result(self, row: EvaluationRow, result: Any) -> EvaluationRow: + # Expect dict with output_key → list of messages; coerce to EP messages + maybe_msgs = None + if isinstance(result, dict): + maybe_msgs = result.get(self._output_key) + + if maybe_msgs is None: + return row + + # If already EP messages, assign directly + if isinstance(maybe_msgs, list) and all(isinstance(m, Message) for m in maybe_msgs): + row.messages = maybe_msgs + return row + + # Try to convert from LangChain messages; preserve EP Message items as-is + try: + from langchain_core.messages import BaseMessage as _LCBase + from eval_protocol.adapters.langchain import serialize_lc_message_to_ep as _to_ep + + if isinstance(maybe_msgs, list) and any(isinstance(m, _LCBase) for m in maybe_msgs): + converted: List[Message] = [] + for m in maybe_msgs: + if isinstance(m, Message): + converted.append(m) + elif isinstance(m, _LCBase): + converted.append(_to_ep(m)) + elif isinstance(m, dict): + role = m.get("role") or "assistant" + content = m.get("content") + converted.append(Message(role=role, content=content)) + else: + # Best-effort for LC-like objects without importing LC types + role_like = getattr(m, "type", None) + content_like = getattr(m, "content", None) + if content_like is not None: + role_value = "assistant" + if isinstance(role_like, str): + rl = role_like.lower() + if rl in ("human", "user"): + role_value = "user" + elif rl in ("ai", "assistant"): + role_value = "assistant" + elif rl in ("system",): + role_value = "system" + converted.append(Message(role=role_value, content=str(content_like))) + else: + converted.append(Message(role="assistant", content=str(m))) + row.messages = converted + return row + except ImportError: + # If LC is not available, fall back to best-effort below + pass + + # Generic best-effort fallback: stringify to assistant messages + if isinstance(maybe_msgs, list): + row.messages = [Message(role="assistant", content=str(m)) for m in maybe_msgs] + else: + row.messages = [Message(role="assistant", content=str(maybe_msgs))] + return row + + def _default_build_graph_kwargs(self, _: CompletionParams) -> Dict[str, Any]: + # Keep generic: callers can override to map to their graph’s expected kwargs + return {} + + def __call__(self, rows: List[EvaluationRow], config: RolloutProcessorConfig) -> List[asyncio.Task[EvaluationRow]]: + tasks: List[asyncio.Task[EvaluationRow]] = [] + + to_input = self._to_input or self._default_to_input + apply_result = self._apply_result or self._default_apply_result + build_kwargs = self._build_graph_kwargs or self._default_build_graph_kwargs + + graph_config: Optional[Dict[str, Any]] = None + if config.completion_params: + graph_config = build_kwargs(config.completion_params) + + # (Re)build the graph for this call using the graph kwargs + graph_target = self._graph_factory(graph_config or {}) + + async def _process_row(row: EvaluationRow) -> EvaluationRow: + try: + payload = to_input(row) + if graph_config is not None: + result = await graph_target.ainvoke(payload, config=graph_config) + else: + result = await graph_target.ainvoke(payload) + row = apply_result(row, result) + row.rollout_status = Status.rollout_finished() + return row + except (RuntimeError, ValueError, TypeError, KeyError, AttributeError, ImportError) as e: # noqa: BLE001 + row.rollout_status = Status.rollout_error(str(e)) + return row + + for r in rows: + tasks.append(asyncio.create_task(_process_row(r))) + + return tasks + + def cleanup(self) -> None: + # No-op by default + return None diff --git a/examples/langgraph/data/simple_prompts.jsonl b/examples/langgraph/data/simple_prompts.jsonl new file mode 100644 index 00000000..e719f367 --- /dev/null +++ b/examples/langgraph/data/simple_prompts.jsonl @@ -0,0 +1,3 @@ +{"name":"p1","prompt":"Say hello in one sentence","gt":"hello"} +{"name":"p2","prompt":"Introduce yourself briefly","gt":"intro"} +{"name":"p3","prompt":"Respond with a fun fact about space","gt":"space"} diff --git a/examples/langgraph/simple_graph.py b/examples/langgraph/simple_graph.py new file mode 100644 index 00000000..e3f8a830 --- /dev/null +++ b/examples/langgraph/simple_graph.py @@ -0,0 +1,43 @@ +from typing import Any, Dict, List +from typing_extensions import TypedDict, Annotated + + +def _noop() -> None: + return None + + +def build_simple_graph( + model: str = "accounts/fireworks/models/kimi-k2-instruct", + *, + model_provider: str = "fireworks", + temperature: float = 0.0, +) -> Any: + """ + Real LangGraph-based simple graph using LangChain-native messages: + - State: {"messages": List[langchain_core.messages.BaseMessage]} + - Single node that calls Fireworks via ChatFireworks + - Exposes compiled app with .ainvoke + Requires FIREWORKS_API_KEY to be set; no offline fallback. + """ + + from langgraph.graph import StateGraph, END + from langgraph.graph.message import add_messages + from langchain_core.messages import BaseMessage + from langchain.chat_models import init_chat_model + + class State(TypedDict): + messages: Annotated[List[BaseMessage], add_messages] + + llm = init_chat_model(model, model_provider=model_provider, temperature=temperature) + + async def call_model(state: State, **_: Any) -> Dict[str, Any]: + messages: List[BaseMessage] = state.get("messages", []) # type: ignore[assignment] + resp = await llm.ainvoke(messages) + # Return only the delta; reducer will append + return {"messages": [resp]} + + g = StateGraph(State) + g.add_node("call_model", call_model) + g.set_entry_point("call_model") + g.add_edge("call_model", END) + return g.compile() diff --git a/examples/langgraph/test_langgraph_rollout.py b/examples/langgraph/test_langgraph_rollout.py new file mode 100644 index 00000000..ec6cba1f --- /dev/null +++ b/examples/langgraph/test_langgraph_rollout.py @@ -0,0 +1,67 @@ +from typing import Any, Dict, List + +from eval_protocol.models import EvaluationRow, EvaluateResult, Message +from eval_protocol.pytest import evaluation_test +from eval_protocol.pytest.langgraph_processor import LangGraphRolloutProcessor +from eval_protocol.pytest.types import RolloutProcessorConfig as _UnusedRolloutProcessorConfig # noqa: F401 + +from examples.langgraph.simple_graph import build_simple_graph +import os +import pytest + + +def adapter(raw_rows: List[Dict[str, Any]]) -> List[EvaluationRow]: + rows: List[EvaluationRow] = [] + for raw in raw_rows: + prompt = raw.get("prompt", "Say hello") + rows.append( + EvaluationRow( + name=raw.get("name", "row"), + messages=[Message(role="user", content=prompt)], + ground_truth=raw.get("gt"), + input_metadata={"dataset_info": raw}, + ) + ) + return rows + + +def build_graph_kwargs(cp: Dict[str, Any]) -> Dict[str, Any]: + return { + "config": { + "model": cp.get("model"), + "temperature": cp.get("temperature", 0.0), + } + } + + +def graph_factory(graph_kwargs: Dict[str, Any]) -> Any: + cfg = graph_kwargs.get("config", {}) if isinstance(graph_kwargs, dict) else {} + model = cfg.get("model") or "accounts/fireworks/models/kimi-k2-instruct" + temperature = cfg.get("temperature", 0.0) + # Provider is fixed to fireworks for this example; can be extended via cfg if needed + return build_simple_graph(model=model, model_provider="fireworks", temperature=temperature) + + +processor = LangGraphRolloutProcessor( + graph_factory=graph_factory, + build_graph_kwargs=build_graph_kwargs, +) + + +@pytest.mark.skipif(os.getenv("FIREWORKS_API_KEY") in (None, ""), reason="FIREWORKS_API_KEY not set") +@evaluation_test( + input_dataset=["examples/langgraph/data/simple_prompts.jsonl"], + dataset_adapter=adapter, + rollout_processor=processor, + completion_params=[{"model": "accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0}], + mode="pointwise", +) +async def test_langgraph_pointwise(row: EvaluationRow) -> EvaluationRow: + # Example scoring: did assistant reply? + has_reply = 1.0 if any(m.role == "assistant" for m in (row.messages or [])) else 0.0 + row.evaluation_result = EvaluateResult( + score=has_reply, + reason="assistant replied" if has_reply else "no assistant reply", + metrics={"has_reply": {"is_score_valid": True, "score": has_reply, "reason": "reply presence"}}, + ) + return row diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 00000000..6b6139bf --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,3 @@ +langchain-core==0.3.75 +langchain-fireworks==0.3.0 +langgraph==0.6.7 diff --git a/tests/chinook/langgraph/graph.py b/tests/chinook/langgraph/graph.py new file mode 100644 index 00000000..e2b91090 --- /dev/null +++ b/tests/chinook/langgraph/graph.py @@ -0,0 +1,59 @@ +from __future__ import annotations + +from typing import Any, Dict, List + +from tests.chinook.db import connect_database + +try: + # LangGraph import only + from langgraph.graph import END, StateGraph + from langchain_core.runnables import RunnableConfig + from langchain_core.messages import BaseMessage, AIMessage +except Exception as e: # pragma: no cover - import-time helpful error + raise RuntimeError( + "Missing required dependency for LangGraph example. Install langgraph and langchain-core." + ) from e + + +def build_graph() -> Any: + """ + Build and return a minimal LangGraph app that: + - Accepts state {"messages": List[eval_protocol.models.Message]} + - Answers via Supabase-backed Chinook database using tests/chinook/db.py + - Appends the assistant reply to messages + - Returns {"messages": List[Message]} + + Model configuration (RunnableConfig) is accepted but unused here. + """ + + def call_model(state: Dict[str, Any], config: RunnableConfig | None = None) -> Dict[str, Any]: + del config # parameter accepted for signature compatibility; not used in this graph + messages: List[BaseMessage] = state.get("messages") or [] + + _, cursor, introspection = connect_database() + table_names = {row[0] for row in introspection} + candidate = None + if "tracks" in table_names: + candidate = "tracks" + elif "track" in table_names: + candidate = "track" + else: + for t in table_names: + if "track" in t: + candidate = t + break + if candidate is None: + raise RuntimeError("Could not find track(s) table") + cursor.execute(f"SELECT COUNT(*) FROM {candidate}") + total = cursor.fetchone()[0] + reply_text = f"Direct query result from Chinook database: {str(total)}" + + updated_messages = list(messages) + [AIMessage(content=reply_text)] + return {"messages": updated_messages} + + graph = StateGraph(dict) + graph.add_node("call_model", call_model) + graph.set_entry_point("call_model") + graph.add_edge("call_model", END) + app = graph.compile() + return app diff --git a/tests/chinook/langgraph/test_langgraph_chinook.py b/tests/chinook/langgraph/test_langgraph_chinook.py new file mode 100644 index 00000000..0e55afd8 --- /dev/null +++ b/tests/chinook/langgraph/test_langgraph_chinook.py @@ -0,0 +1,78 @@ +import pytest + +from eval_protocol.models import EvaluateResult, EvaluationRow, Message +from eval_protocol.pytest import evaluation_test + +from eval_protocol.pytest.langgraph_processor import LangGraphRolloutProcessor +from eval_protocol.pytest.types import RolloutProcessorConfig, CompletionParams + +from tests.chinook.langgraph.graph import build_graph +from typing import Any, Dict +from openai import OpenAI +import os + + +LLM_JUDGE_PROMPT = ( + "Your job is to compare the response to the expected answer.\n" + "The response will be a narrative report of the query results.\n" + "If the response contains the same or well summarized information as the expected answer, return 1.0.\n" + "If the response does not contain the same information or is missing information, return 0.0." +) + + +def to_langgraph_input(row: EvaluationRow) -> Dict[str, Any]: + # Let the rollout processor handle EP→LC conversion by default; pass through + return {"messages": row.messages or []} + + +def apply_langgraph_result(row: EvaluationRow, result: Dict[str, Any]) -> EvaluationRow: + # Rely on rollout processor defaults which convert LC→EP when possible + maybe_msgs = result.get("messages") or [] + if isinstance(maybe_msgs, list) and all(isinstance(m, Message) for m in maybe_msgs): + row.messages = maybe_msgs + else: + # Minimal fallback: stringify + row.messages = [Message(role="assistant", content=str(m)) for m in maybe_msgs] + return row + + +def build_graph_kwargs(cp: CompletionParams) -> Dict[str, Any]: + # Minimal runnable config mapping; not used by current graph but kept for API parity + model = cp.get("model") + provider = cp.get("provider") + return {"config": {"model": model, "provider": provider}} + + +def agent_factory(_: RolloutProcessorConfig) -> Any: + # Not used in LangGraph path; kept for parity + return None + + +@pytest.mark.asyncio +@pytest.mark.skipif(os.getenv("FIREWORKS_API_KEY") in (None, ""), reason="FIREWORKS_API_KEY not set") +@evaluation_test( + input_messages=[[[Message(role="user", content="What is the total number of tracks in the database?")]]], + completion_params=[{"model": "accounts/fireworks/models/kimi-k2-instruct", "provider": "fireworks"}], + rollout_processor=LangGraphRolloutProcessor( + graph_factory=lambda _: build_graph(), + build_graph_kwargs=build_graph_kwargs, + input_key="messages", + output_key="messages", + ), + mode="pointwise", + passed_threshold=1.0, +) +async def test_langgraph_simple_query(row: EvaluationRow) -> EvaluationRow: + last_assistant_message = row.last_assistant_message() + if last_assistant_message is None or not last_assistant_message.content: + row.evaluation_result = EvaluateResult(score=0.0, reason="No assistant message found") + return row + + # Ensure role mapping is correct + assert row.messages and row.messages[0].role == "user" + assert row.messages[-1].role == "assistant" + score_value = 1.0 if "3503" in last_assistant_message.content else 0.0 + reason_text = last_assistant_message.content[:500] + + row.evaluation_result = EvaluateResult(score=score_value, reason=reason_text) + return row diff --git a/tests/pytest/test_langgraph_processor.py b/tests/pytest/test_langgraph_processor.py new file mode 100644 index 00000000..fead1c44 --- /dev/null +++ b/tests/pytest/test_langgraph_processor.py @@ -0,0 +1,142 @@ +from __future__ import annotations + +import asyncio +from typing import Any, Dict, List + +import pytest + +from eval_protocol.models import EvaluationRow, Message +from eval_protocol.pytest.langgraph_processor import LangGraphRolloutProcessor + + +class DummyLCMessage: + def __init__(self, message_type: str, content: str): # noqa: A002 + self.type = message_type + self.content = content + + +class DummyGraph: + def __init__(self, out_messages: List[Any]): + self._out_messages = out_messages + + async def ainvoke(self, payload: Dict[str, Any], **_: Any): + # Echo back the provided messages plus our out_messages + return {"messages": list(payload.get("messages") or []) + list(self._out_messages)} + + +def _make_processor_with_defaults(out_messages: List[Any]) -> LangGraphRolloutProcessor: + def graph_factory(_: Dict[str, Any]): + return DummyGraph(out_messages) + + return LangGraphRolloutProcessor(graph_factory=graph_factory) + + +@pytest.mark.asyncio +async def test_apply_result_preserves_user_role_and_appends_assistant_from_lc(): + # Arrange: EP user message in, LC assistant out + row = EvaluationRow(messages=[Message(role="user", content="hi")]) + lc_assistant = DummyLCMessage(message_type="ai", content="hello") + processor = _make_processor_with_defaults([lc_assistant]) + + # Act + tasks = processor( + [row], + type( + "Cfg", + (), + { + "completion_params": {}, + "semaphore": asyncio.Semaphore(10), + "mcp_config_path": "", + "logger": None, + "server_script_path": None, + "steps": 1, + "kwargs": {}, + "exception_handler_config": None, + }, + )(), + ) + result_row = await asyncio.gather(*tasks) + out = result_row[0] + + # Assert + assert out.messages[0].role == "user" + assert out.messages[-1].role == "assistant" + assert out.messages[-1].content == "hello" + + +@pytest.mark.asyncio +async def test_apply_result_handles_dict_messages_with_missing_role(): + row = EvaluationRow(messages=[Message(role="user", content="Q")]) + dict_msg = {"content": "A"} # no role provided + processor = _make_processor_with_defaults([dict_msg]) + + tasks = processor( + [row], + type( + "Cfg", + (), + { + "completion_params": {}, + "semaphore": asyncio.Semaphore(10), + "mcp_config_path": "", + "logger": None, + "server_script_path": None, + "steps": 1, + "kwargs": {}, + "exception_handler_config": None, + }, + )(), + ) + out = (await asyncio.gather(*tasks))[0] + + assert out.messages[0].role == "user" + assert out.messages[-1].role == "assistant" + assert out.messages[-1].content == "A" + + +@pytest.mark.asyncio +async def test_to_input_converts_ep_messages_to_lc_via_adapter(monkeypatch): + # Arrange + ep_row = EvaluationRow(messages=[Message(role="user", content="Hello")]) + called = {"ok": False} + + def fake_to_lc(messages): + called["ok"] = True + return [DummyLCMessage(message_type="human", content=messages[0].content)] + + # Patch the adapter function at its source module, since the processor imports it inside the function + import eval_protocol.adapters.langchain as lc_adapter + + monkeypatch.setattr(lc_adapter, "serialize_ep_messages_to_lc", fake_to_lc, raising=True) + + # Dummy graph that returns what it receives + class EchoGraph: + async def ainvoke(self, payload, **_): + # Ensure our adapter-produced messages flow through + return payload + + processor = LangGraphRolloutProcessor(graph_factory=lambda _: EchoGraph()) + + # Act + tasks = processor( + [ep_row], + type( + "Cfg", + (), + { + "completion_params": {}, + "semaphore": asyncio.Semaphore(10), + "mcp_config_path": "", + "logger": None, + "server_script_path": None, + "steps": 1, + "kwargs": {}, + "exception_handler_config": None, + }, + )(), + ) + await asyncio.gather(*tasks) + + # Assert that adapter was used + assert called["ok"] is True From af8ac5c6a8bc8aef71ab2d27fb702616645938f9 Mon Sep 17 00:00:00 2001 From: benjibc Date: Tue, 9 Sep 2025 20:47:26 +0000 Subject: [PATCH 2/5] simplify further --- .../default_langchain_rollout_processor.py | 212 ++++++++++-------- eval_protocol/pytest/langgraph_processor.py | 144 ------------ examples/langgraph/data/simple_prompts.jsonl | 2 - examples/langgraph/test_langgraph_rollout.py | 2 +- .../langgraph/test_langgraph_chinook.py | 2 +- tests/pytest/test_langgraph_processor.py | 2 +- 6 files changed, 126 insertions(+), 238 deletions(-) delete mode 100644 eval_protocol/pytest/langgraph_processor.py diff --git a/eval_protocol/pytest/default_langchain_rollout_processor.py b/eval_protocol/pytest/default_langchain_rollout_processor.py index bf2131fa..7c63fde4 100644 --- a/eval_protocol/pytest/default_langchain_rollout_processor.py +++ b/eval_protocol/pytest/default_langchain_rollout_processor.py @@ -1,105 +1,138 @@ import asyncio -import time -from typing import List, Any, cast +from typing import Any, Callable, Dict, List, Optional -try: - from langchain_core.messages import BaseMessage as LCBaseMessage, HumanMessage # type: ignore -except ImportError: # pragma: no cover - optional dependency path - # Minimal fallbacks to satisfy typing when langchain is not present - class LCBaseMessage: # type: ignore - content: str - type: str - - def __init__(self, content: str = "", msg_type: str = "assistant"): - self.content = content - self.type = msg_type - - class HumanMessage(LCBaseMessage): # type: ignore - def __init__(self, content: str): - super().__init__(content=content, msg_type="human") - - -from eval_protocol.models import EvaluationRow, Message +from eval_protocol.models import EvaluationRow, Status, Message from eval_protocol.pytest.rollout_processor import RolloutProcessor -from eval_protocol.pytest.types import RolloutProcessorConfig +from eval_protocol.pytest.types import CompletionParams, RolloutProcessorConfig class LangGraphRolloutProcessor(RolloutProcessor): - """Generic rollout processor for LangChain agents. - - Accepts an async factory that returns a target to invoke. The target can be: - - An object with `.graph.ainvoke(payload)` (e.g., LangGraph compiled graph) - - An object with `.ainvoke(payload)` - - A callable that accepts `payload` and returns the result dict """ + Generic rollout processor for LangGraph graphs. - def __init__(self, get_invoke_target): - self.get_invoke_target = get_invoke_target - - def __call__(self, rows: List[EvaluationRow], config: RolloutProcessorConfig): - tasks: List[asyncio.Task] = [] - - async def _process_row(row: EvaluationRow) -> EvaluationRow: - start_time = time.perf_counter() - - # Build LC messages from EP row (minimal: last user to HumanMessage) - lm_messages: List[LCBaseMessage] = [] - if row.messages: - last_user = [m for m in row.messages if m.role == "user"] - if last_user: - content = last_user[-1].content or "" - if isinstance(content, list): - content = "".join([getattr(p, "text", str(p)) for p in content]) - lm_messages.append(HumanMessage(content=str(content))) - if not lm_messages: - lm_messages = [HumanMessage(content="")] - - target = await self.get_invoke_target(config) - - # Resolve the appropriate async invoke function - if hasattr(target, "graph") and hasattr(target.graph, "ainvoke"): + Configure with: + - to_input(row): build the input payload for graph.ainvoke (default: {"messages": row.messages}) + - apply_result(row, result): write graph outputs back onto the row (default: row.messages = result["messages"]) + - build_graph_kwargs(cp): map completion_params to graph kwargs (default: {}) - async def _invoke_graph(payload): - return await target.graph.ainvoke(payload) # type: ignore[attr-defined] - - invoke_fn = _invoke_graph - elif hasattr(target, "ainvoke"): - - async def _invoke_direct(payload): - return await target.ainvoke(payload) # type: ignore[attr-defined] - - invoke_fn = _invoke_direct - elif callable(target): - - async def _invoke_wrapper(payload): - result = target(payload) - if asyncio.iscoroutine(result): - return await result - return result - - invoke_fn = _invoke_wrapper - else: - raise TypeError("Unsupported invoke target for LangGraphRolloutProcessor") - - result_obj = await invoke_fn({"messages": lm_messages}) - if isinstance(result_obj, dict): - result_messages: List[LCBaseMessage] = result_obj.get("messages", []) - else: - result_messages = getattr(result_obj, "messages", []) + Compatible with eval_protocol.pytest.evaluation_test. + """ - def _serialize_message(msg: LCBaseMessage) -> Message: - try: - from eval_protocol.adapters.langchain import serialize_lc_message_to_ep as _ser - except ImportError: - content = getattr(msg, "content", "") - return Message(role=getattr(msg, "type", "assistant"), content=str(content)) - return _ser(cast(Any, msg)) + def __init__( + self, + *, + graph_factory: Callable[[Dict[str, Any]], Any], + to_input: Optional[Callable[[EvaluationRow], Dict[str, Any]]] = None, + apply_result: Optional[Callable[[EvaluationRow, Any], EvaluationRow]] = None, + build_graph_kwargs: Optional[Callable[[CompletionParams], Dict[str, Any]]] = None, + input_key: str = "messages", + output_key: str = "messages", + ) -> None: + # Build the graph per-call using completion_params + self._graph_factory = graph_factory + self._to_input = to_input + self._apply_result = apply_result + self._build_graph_kwargs = build_graph_kwargs + self._input_key = input_key + self._output_key = output_key + + def _default_to_input(self, row: EvaluationRow) -> Dict[str, Any]: + messages = row.messages or [] + from eval_protocol.adapters.langchain import serialize_ep_messages_to_lc as _to_lc + + return {self._input_key: _to_lc(messages)} + + def _default_apply_result(self, row: EvaluationRow, result: Any) -> EvaluationRow: + # Expect dict with output_key → list of messages; coerce to EP messages + maybe_msgs = None + if isinstance(result, dict): + maybe_msgs = result.get(self._output_key) + + if maybe_msgs is None: + return row - row.messages = [_serialize_message(m) for m in result_messages] + # If already EP messages, assign directly + if isinstance(maybe_msgs, list) and all(isinstance(m, Message) for m in maybe_msgs): + row.messages = maybe_msgs + return row - row.execution_metadata.duration_seconds = time.perf_counter() - start_time + # Try to convert from LangChain messages; preserve EP Message items as-is + try: + from langchain_core.messages import BaseMessage as _LCBase + from eval_protocol.adapters.langchain import serialize_lc_message_to_ep as _to_ep + + if isinstance(maybe_msgs, list) and any(isinstance(m, _LCBase) for m in maybe_msgs): + converted: List[Message] = [] + for m in maybe_msgs: + if isinstance(m, Message): + converted.append(m) + elif isinstance(m, _LCBase): + converted.append(_to_ep(m)) + elif isinstance(m, dict): + role = m.get("role") or "assistant" + content = m.get("content") + converted.append(Message(role=role, content=content)) + else: + # Best-effort for LC-like objects without importing LC types + role_like = getattr(m, "type", None) + content_like = getattr(m, "content", None) + if content_like is not None: + role_value = "assistant" + if isinstance(role_like, str): + rl = role_like.lower() + if rl in ("human", "user"): + role_value = "user" + elif rl in ("ai", "assistant"): + role_value = "assistant" + elif rl in ("system",): + role_value = "system" + converted.append(Message(role=role_value, content=str(content_like))) + else: + converted.append(Message(role="assistant", content=str(m))) + row.messages = converted + return row + except ImportError: + # If LC is not available, fall back to best-effort below + pass + + # Generic best-effort fallback: stringify to assistant messages + if isinstance(maybe_msgs, list): + row.messages = [Message(role="assistant", content=str(m)) for m in maybe_msgs] + else: + row.messages = [Message(role="assistant", content=str(maybe_msgs))] + return row + + def _default_build_graph_kwargs(self, _: CompletionParams) -> Dict[str, Any]: + # Keep generic: callers can override to map to their graph’s expected kwargs + return {} + + def __call__(self, rows: List[EvaluationRow], config: RolloutProcessorConfig) -> List[asyncio.Task[EvaluationRow]]: + tasks: List[asyncio.Task[EvaluationRow]] = [] + + to_input = self._to_input or self._default_to_input + apply_result = self._apply_result or self._default_apply_result + build_kwargs = self._build_graph_kwargs or self._default_build_graph_kwargs + + graph_config: Optional[Dict[str, Any]] = None + if config.completion_params: + graph_config = build_kwargs(config.completion_params) + + # (Re)build the graph for this call using the graph kwargs + graph_target = self._graph_factory(graph_config or {}) - return row + async def _process_row(row: EvaluationRow) -> EvaluationRow: + try: + payload = to_input(row) + if graph_config is not None: + result = await graph_target.ainvoke(payload, config=graph_config) + else: + result = await graph_target.ainvoke(payload) + row = apply_result(row, result) + row.rollout_status = Status.rollout_finished() + return row + except (RuntimeError, ValueError, TypeError, KeyError, AttributeError, ImportError) as e: # noqa: BLE001 + row.rollout_status = Status.rollout_error(str(e)) + return row for r in rows: tasks.append(asyncio.create_task(_process_row(r))) @@ -107,4 +140,5 @@ def _serialize_message(msg: LCBaseMessage) -> Message: return tasks def cleanup(self) -> None: + # No-op by default return None diff --git a/eval_protocol/pytest/langgraph_processor.py b/eval_protocol/pytest/langgraph_processor.py deleted file mode 100644 index 7c63fde4..00000000 --- a/eval_protocol/pytest/langgraph_processor.py +++ /dev/null @@ -1,144 +0,0 @@ -import asyncio -from typing import Any, Callable, Dict, List, Optional - -from eval_protocol.models import EvaluationRow, Status, Message -from eval_protocol.pytest.rollout_processor import RolloutProcessor -from eval_protocol.pytest.types import CompletionParams, RolloutProcessorConfig - - -class LangGraphRolloutProcessor(RolloutProcessor): - """ - Generic rollout processor for LangGraph graphs. - - Configure with: - - to_input(row): build the input payload for graph.ainvoke (default: {"messages": row.messages}) - - apply_result(row, result): write graph outputs back onto the row (default: row.messages = result["messages"]) - - build_graph_kwargs(cp): map completion_params to graph kwargs (default: {}) - - Compatible with eval_protocol.pytest.evaluation_test. - """ - - def __init__( - self, - *, - graph_factory: Callable[[Dict[str, Any]], Any], - to_input: Optional[Callable[[EvaluationRow], Dict[str, Any]]] = None, - apply_result: Optional[Callable[[EvaluationRow, Any], EvaluationRow]] = None, - build_graph_kwargs: Optional[Callable[[CompletionParams], Dict[str, Any]]] = None, - input_key: str = "messages", - output_key: str = "messages", - ) -> None: - # Build the graph per-call using completion_params - self._graph_factory = graph_factory - self._to_input = to_input - self._apply_result = apply_result - self._build_graph_kwargs = build_graph_kwargs - self._input_key = input_key - self._output_key = output_key - - def _default_to_input(self, row: EvaluationRow) -> Dict[str, Any]: - messages = row.messages or [] - from eval_protocol.adapters.langchain import serialize_ep_messages_to_lc as _to_lc - - return {self._input_key: _to_lc(messages)} - - def _default_apply_result(self, row: EvaluationRow, result: Any) -> EvaluationRow: - # Expect dict with output_key → list of messages; coerce to EP messages - maybe_msgs = None - if isinstance(result, dict): - maybe_msgs = result.get(self._output_key) - - if maybe_msgs is None: - return row - - # If already EP messages, assign directly - if isinstance(maybe_msgs, list) and all(isinstance(m, Message) for m in maybe_msgs): - row.messages = maybe_msgs - return row - - # Try to convert from LangChain messages; preserve EP Message items as-is - try: - from langchain_core.messages import BaseMessage as _LCBase - from eval_protocol.adapters.langchain import serialize_lc_message_to_ep as _to_ep - - if isinstance(maybe_msgs, list) and any(isinstance(m, _LCBase) for m in maybe_msgs): - converted: List[Message] = [] - for m in maybe_msgs: - if isinstance(m, Message): - converted.append(m) - elif isinstance(m, _LCBase): - converted.append(_to_ep(m)) - elif isinstance(m, dict): - role = m.get("role") or "assistant" - content = m.get("content") - converted.append(Message(role=role, content=content)) - else: - # Best-effort for LC-like objects without importing LC types - role_like = getattr(m, "type", None) - content_like = getattr(m, "content", None) - if content_like is not None: - role_value = "assistant" - if isinstance(role_like, str): - rl = role_like.lower() - if rl in ("human", "user"): - role_value = "user" - elif rl in ("ai", "assistant"): - role_value = "assistant" - elif rl in ("system",): - role_value = "system" - converted.append(Message(role=role_value, content=str(content_like))) - else: - converted.append(Message(role="assistant", content=str(m))) - row.messages = converted - return row - except ImportError: - # If LC is not available, fall back to best-effort below - pass - - # Generic best-effort fallback: stringify to assistant messages - if isinstance(maybe_msgs, list): - row.messages = [Message(role="assistant", content=str(m)) for m in maybe_msgs] - else: - row.messages = [Message(role="assistant", content=str(maybe_msgs))] - return row - - def _default_build_graph_kwargs(self, _: CompletionParams) -> Dict[str, Any]: - # Keep generic: callers can override to map to their graph’s expected kwargs - return {} - - def __call__(self, rows: List[EvaluationRow], config: RolloutProcessorConfig) -> List[asyncio.Task[EvaluationRow]]: - tasks: List[asyncio.Task[EvaluationRow]] = [] - - to_input = self._to_input or self._default_to_input - apply_result = self._apply_result or self._default_apply_result - build_kwargs = self._build_graph_kwargs or self._default_build_graph_kwargs - - graph_config: Optional[Dict[str, Any]] = None - if config.completion_params: - graph_config = build_kwargs(config.completion_params) - - # (Re)build the graph for this call using the graph kwargs - graph_target = self._graph_factory(graph_config or {}) - - async def _process_row(row: EvaluationRow) -> EvaluationRow: - try: - payload = to_input(row) - if graph_config is not None: - result = await graph_target.ainvoke(payload, config=graph_config) - else: - result = await graph_target.ainvoke(payload) - row = apply_result(row, result) - row.rollout_status = Status.rollout_finished() - return row - except (RuntimeError, ValueError, TypeError, KeyError, AttributeError, ImportError) as e: # noqa: BLE001 - row.rollout_status = Status.rollout_error(str(e)) - return row - - for r in rows: - tasks.append(asyncio.create_task(_process_row(r))) - - return tasks - - def cleanup(self) -> None: - # No-op by default - return None diff --git a/examples/langgraph/data/simple_prompts.jsonl b/examples/langgraph/data/simple_prompts.jsonl index e719f367..cc870056 100644 --- a/examples/langgraph/data/simple_prompts.jsonl +++ b/examples/langgraph/data/simple_prompts.jsonl @@ -1,3 +1 @@ {"name":"p1","prompt":"Say hello in one sentence","gt":"hello"} -{"name":"p2","prompt":"Introduce yourself briefly","gt":"intro"} -{"name":"p3","prompt":"Respond with a fun fact about space","gt":"space"} diff --git a/examples/langgraph/test_langgraph_rollout.py b/examples/langgraph/test_langgraph_rollout.py index ec6cba1f..728000cb 100644 --- a/examples/langgraph/test_langgraph_rollout.py +++ b/examples/langgraph/test_langgraph_rollout.py @@ -2,7 +2,7 @@ from eval_protocol.models import EvaluationRow, EvaluateResult, Message from eval_protocol.pytest import evaluation_test -from eval_protocol.pytest.langgraph_processor import LangGraphRolloutProcessor +from eval_protocol.pytest.default_langchain_rollout_processor import LangGraphRolloutProcessor from eval_protocol.pytest.types import RolloutProcessorConfig as _UnusedRolloutProcessorConfig # noqa: F401 from examples.langgraph.simple_graph import build_simple_graph diff --git a/tests/chinook/langgraph/test_langgraph_chinook.py b/tests/chinook/langgraph/test_langgraph_chinook.py index 0e55afd8..b0cfcb4f 100644 --- a/tests/chinook/langgraph/test_langgraph_chinook.py +++ b/tests/chinook/langgraph/test_langgraph_chinook.py @@ -3,7 +3,7 @@ from eval_protocol.models import EvaluateResult, EvaluationRow, Message from eval_protocol.pytest import evaluation_test -from eval_protocol.pytest.langgraph_processor import LangGraphRolloutProcessor +from eval_protocol.pytest.default_langchain_rollout_processor import LangGraphRolloutProcessor from eval_protocol.pytest.types import RolloutProcessorConfig, CompletionParams from tests.chinook.langgraph.graph import build_graph diff --git a/tests/pytest/test_langgraph_processor.py b/tests/pytest/test_langgraph_processor.py index fead1c44..dd7e9895 100644 --- a/tests/pytest/test_langgraph_processor.py +++ b/tests/pytest/test_langgraph_processor.py @@ -6,7 +6,7 @@ import pytest from eval_protocol.models import EvaluationRow, Message -from eval_protocol.pytest.langgraph_processor import LangGraphRolloutProcessor +from eval_protocol.pytest.default_langchain_rollout_processor import LangGraphRolloutProcessor class DummyLCMessage: From 2d915d16d012c3b17f0c5bec8c0b8414c0fb7769 Mon Sep 17 00:00:00 2001 From: benjibc Date: Wed, 10 Sep 2025 00:21:11 +0000 Subject: [PATCH 3/5] update the test coverage, added tool call example --- eval_protocol/adapters/langchain.py | 129 ++++++++++++++++-- .../default_langchain_rollout_processor.py | 6 +- .../langgraph/reasoning_gpt_oss_120b_graph.py | 52 +++++++ examples/langgraph/simple_graph.py | 4 - examples/langgraph/test_reasoning_rollout.py | 75 ++++++++++ .../langgraph/test_langgraph_chinook.py | 5 - .../langgraph/test_langgraph_chinook_tools.py | 56 ++++++++ tests/chinook/langgraph/tools_graph.py | 87 ++++++++++++ tests/pytest/test_langgraph_processor.py | 101 ++++++++++++++ 9 files changed, 490 insertions(+), 25 deletions(-) create mode 100644 examples/langgraph/reasoning_gpt_oss_120b_graph.py create mode 100644 examples/langgraph/test_reasoning_rollout.py create mode 100644 tests/chinook/langgraph/test_langgraph_chinook_tools.py create mode 100644 tests/chinook/langgraph/tools_graph.py diff --git a/eval_protocol/adapters/langchain.py b/eval_protocol/adapters/langchain.py index 3f6f0fb5..6b6868ea 100644 --- a/eval_protocol/adapters/langchain.py +++ b/eval_protocol/adapters/langchain.py @@ -1,9 +1,11 @@ from __future__ import annotations import os -from typing import Any, Dict, List, Optional +from typing import List from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage, ToolMessage +from eval_protocol.human_id import generate_id +import json from eval_protocol.models import Message @@ -14,10 +16,8 @@ def _dbg_enabled() -> bool: def _dbg_print(*args): if _dbg_enabled(): - try: - print(*args) - except Exception: - pass + # Best-effort debug print without broad exception handling + print(*args) def serialize_lc_message_to_ep(msg: BaseMessage) -> Message: @@ -36,25 +36,126 @@ def serialize_lc_message_to_ep(msg: BaseMessage) -> Message: return ep_msg if isinstance(msg, AIMessage): - content = "" + # Extract visible content and hidden reasoning content if present + content_text = "" + reasoning_texts: List[str] = [] + if isinstance(msg.content, str): - content = msg.content + content_text = msg.content elif isinstance(msg.content, list): - parts: List[str] = [] + text_parts: List[str] = [] for item in msg.content: if isinstance(item, dict): - if item.get("type") == "text": - parts.append(str(item.get("text", ""))) + item_type = item.get("type") + if item_type == "text": + text_parts.append(str(item.get("text", ""))) + elif item_type in ("reasoning", "thinking", "thought"): + # Some providers return dedicated reasoning parts + maybe_text = item.get("text") or item.get("content") + if isinstance(maybe_text, str): + reasoning_texts.append(maybe_text) elif isinstance(item, str): - parts.append(item) - content = "\n".join(parts) + text_parts.append(item) + content_text = "\n".join([t for t in text_parts if t]) + + # Additional place providers may attach reasoning + additional_kwargs = getattr(msg, "additional_kwargs", None) + if isinstance(additional_kwargs, dict): + rk = additional_kwargs.get("reasoning_content") + if isinstance(rk, str) and rk: + reasoning_texts.append(rk) + + # Fireworks and others sometimes nest under `reasoning` or `metadata` + nested_reasoning = additional_kwargs.get("reasoning") + if isinstance(nested_reasoning, dict): + inner = nested_reasoning.get("content") or nested_reasoning.get("text") + if isinstance(inner, str) and inner: + reasoning_texts.append(inner) + + # Capture tool calls and function_call if present on AIMessage + def _normalize_tool_calls(raw_tcs): + normalized = [] + for tc in raw_tcs or []: + if isinstance(tc, dict) and "function" in tc: + # Assume already OpenAI style + fn = tc.get("function", {}) + # Ensure arguments is a string + args = fn.get("arguments") + if not isinstance(args, str): + try: + args = json.dumps(args) + except Exception: + args = str(args) + normalized.append( + { + "id": tc.get("id") or generate_id(), + "type": tc.get("type") or "function", + "function": {"name": fn.get("name", ""), "arguments": args}, + } + ) + elif isinstance(tc, dict) and ("name" in tc) and ("args" in tc or "arguments" in tc): + # LangChain tool schema → OpenAI function-call schema + name = tc.get("name", "") + args_val = tc.get("args", tc.get("arguments", {})) + if not isinstance(args_val, str): + try: + args_val = json.dumps(args_val) + except Exception: + args_val = str(args_val) + normalized.append( + { + "id": tc.get("id") or generate_id(), + "type": "function", + "function": {"name": name, "arguments": args_val}, + } + ) + else: + # Best-effort: stringify unknown formats + normalized.append( + { + "id": generate_id(), + "type": "function", + "function": { + "name": str(tc.get("name", "tool")) if isinstance(tc, dict) else "tool", + "arguments": json.dumps(tc) if not isinstance(tc, str) else tc, + }, + } + ) + return normalized if normalized else None + + extracted_tool_calls = None + tc_attr = getattr(msg, "tool_calls", None) + if isinstance(tc_attr, list): + extracted_tool_calls = _normalize_tool_calls(tc_attr) + + if extracted_tool_calls is None and isinstance(additional_kwargs, dict): + maybe_tc = additional_kwargs.get("tool_calls") + if isinstance(maybe_tc, list): + extracted_tool_calls = _normalize_tool_calls(maybe_tc) + + extracted_function_call = None + fc_attr = getattr(msg, "function_call", None) + if fc_attr: + extracted_function_call = fc_attr + if extracted_function_call is None and isinstance(additional_kwargs, dict): + maybe_fc = additional_kwargs.get("function_call") + if maybe_fc: + extracted_function_call = maybe_fc - ep_msg = Message(role="assistant", content=content) + ep_msg = Message( + role="assistant", + content=content_text, + reasoning_content=("\n".join(reasoning_texts) if reasoning_texts else None), + tool_calls=extracted_tool_calls, # type: ignore[arg-type] + function_call=extracted_function_call, # type: ignore[arg-type] + ) _dbg_print( "[EP-Ser] -> EP Message:", { "role": ep_msg.role, "content_len": len(ep_msg.content or ""), + "has_reasoning": bool(ep_msg.reasoning_content), + "has_tool_calls": bool(ep_msg.tool_calls), }, ) return ep_msg @@ -107,8 +208,6 @@ def serialize_ep_messages_to_lc(messages: List[Message]) -> List[BaseMessage]: elif role == "assistant": lc_messages.append(AIMessage(content=text)) elif role == "system": - from langchain_core.messages import SystemMessage # local import to avoid unused import - lc_messages.append(SystemMessage(content=text)) else: lc_messages.append(HumanMessage(content=text)) diff --git a/eval_protocol/pytest/default_langchain_rollout_processor.py b/eval_protocol/pytest/default_langchain_rollout_processor.py index 7c63fde4..95ff0769 100644 --- a/eval_protocol/pytest/default_langchain_rollout_processor.py +++ b/eval_protocol/pytest/default_langchain_rollout_processor.py @@ -71,7 +71,11 @@ def _default_apply_result(self, row: EvaluationRow, result: Any) -> EvaluationRo elif isinstance(m, dict): role = m.get("role") or "assistant" content = m.get("content") - converted.append(Message(role=role, content=content)) + tool_calls = m.get("tool_calls") + function_call = m.get("function_call") + converted.append( + Message(role=role, content=content, tool_calls=tool_calls, function_call=function_call) + ) else: # Best-effort for LC-like objects without importing LC types role_like = getattr(m, "type", None) diff --git a/examples/langgraph/reasoning_gpt_oss_120b_graph.py b/examples/langgraph/reasoning_gpt_oss_120b_graph.py new file mode 100644 index 00000000..7ba009c0 --- /dev/null +++ b/examples/langgraph/reasoning_gpt_oss_120b_graph.py @@ -0,0 +1,52 @@ +from typing import Any, Dict, List +from typing_extensions import Annotated, TypedDict + + +def build_reasoning_graph( + *, + model: str = "accounts/fireworks/models/gpt-oss-120b", + model_provider: str = "fireworks", + temperature: float = 0.0, + reasoning_effort: str | None = None, +) -> Any: + """ + LangGraph example: use Fireworks reasoning model gpt-oss-120b with structured state. + + Requirements: + - Install: `pip install langchain fireworks-ai`. + - Env: export `FIREWORKS_API_KEY`. + + Notes: + - You can control reasoning behavior via extra_body (reasoning_effort). Common values: "low", "medium", "high". + - The graph is a single-node message app that calls the model and appends the response. + + Example: + graph = build_reasoning_graph(reasoning_effort="high") + out = await graph.ainvoke({"messages": [{"role": "user", "content": "Explain why the sky is blue."}]}) + """ + + from langgraph.graph import StateGraph, END + from langgraph.graph.message import add_messages + from langchain.chat_models import init_chat_model + from langchain_core.messages import BaseMessage + + class State(TypedDict): + messages: Annotated[List[BaseMessage], add_messages] + + # Initialize Fireworks reasoning model + llm = init_chat_model( + model, + model_provider=model_provider, + temperature=temperature, + reasoning_effort=reasoning_effort, + ) + + async def call_model(state: State) -> Dict[str, Any]: + response = await llm.ainvoke(state["messages"]) # type: ignore[assignment] + return {"messages": [response]} + + g = StateGraph(State) + g.add_node("call_model", call_model) + g.set_entry_point("call_model") + g.add_edge("call_model", END) + return g.compile() diff --git a/examples/langgraph/simple_graph.py b/examples/langgraph/simple_graph.py index e3f8a830..abfe8547 100644 --- a/examples/langgraph/simple_graph.py +++ b/examples/langgraph/simple_graph.py @@ -2,10 +2,6 @@ from typing_extensions import TypedDict, Annotated -def _noop() -> None: - return None - - def build_simple_graph( model: str = "accounts/fireworks/models/kimi-k2-instruct", *, diff --git a/examples/langgraph/test_reasoning_rollout.py b/examples/langgraph/test_reasoning_rollout.py new file mode 100644 index 00000000..21d4c499 --- /dev/null +++ b/examples/langgraph/test_reasoning_rollout.py @@ -0,0 +1,75 @@ +from typing import Any, Dict, List + +from eval_protocol.models import EvaluationRow, EvaluateResult, Message +from eval_protocol.pytest import evaluation_test +from eval_protocol.pytest.default_langchain_rollout_processor import LangGraphRolloutProcessor + +from examples.langgraph.reasoning_gpt_oss_120b_graph import build_reasoning_graph +import os +import pytest + + +def adapter(raw_rows: List[Dict[str, Any]]) -> List[EvaluationRow]: + rows: List[EvaluationRow] = [] + for raw in raw_rows: + prompt = raw.get("prompt", "Explain why the sky is blue.") + rows.append( + EvaluationRow( + name=raw.get("name", "row"), + messages=[Message(role="user", content=prompt)], + ground_truth=raw.get("gt"), + input_metadata={"dataset_info": raw}, + ) + ) + return rows + + +def build_graph_kwargs(cp: Dict[str, Any]) -> Dict[str, Any]: + return { + "config": { + "model": cp.get("model", "accounts/fireworks/models/gpt-oss-120b"), + "temperature": cp.get("temperature", 0.0), + "reasoning_effort": cp.get("reasoning_effort"), + } + } + + +def graph_factory(graph_kwargs: Dict[str, Any]) -> Any: + cfg = graph_kwargs.get("config", {}) if isinstance(graph_kwargs, dict) else {} + model = cfg.get("model") or "accounts/fireworks/models/gpt-oss-120b" + temperature = cfg.get("temperature", 0.0) + reasoning_effort = cfg.get("reasoning_effort") + return build_reasoning_graph( + model=model, + model_provider="fireworks", + temperature=temperature, + reasoning_effort=reasoning_effort, + ) + + +processor = LangGraphRolloutProcessor( + graph_factory=graph_factory, + build_graph_kwargs=build_graph_kwargs, +) + + +@pytest.mark.skipif(os.getenv("FIREWORKS_API_KEY") in (None, ""), reason="FIREWORKS_API_KEY not set") +@evaluation_test( + input_dataset=["examples/langgraph/data/simple_prompts.jsonl"], + dataset_adapter=adapter, + rollout_processor=processor, + completion_params=[ + {"model": "accounts/fireworks/models/gpt-oss-120b", "temperature": 0.0, "reasoning_effort": "low"} + ], + mode="pointwise", +) +async def test_langgraph_reasoning_pointwise(row: EvaluationRow) -> EvaluationRow: + has_reply = 1.0 if any(m.role == "assistant" for m in (row.messages or [])) else 0.0 + # LOL this doesn't work yet https://github.com/langchain-ai/langgraph/discussions/3547#discussioncomment-13528371 + # assert row.messages[-1].role == "assistant" and row.messages[-1].reasoning_content is not None + row.evaluation_result = EvaluateResult( + score=has_reply, + reason="assistant replied" if has_reply else "no assistant reply", + metrics={"has_reply": {"is_score_valid": True, "score": has_reply, "reason": "reply presence"}}, + ) + return row diff --git a/tests/chinook/langgraph/test_langgraph_chinook.py b/tests/chinook/langgraph/test_langgraph_chinook.py index b0cfcb4f..a1695fb3 100644 --- a/tests/chinook/langgraph/test_langgraph_chinook.py +++ b/tests/chinook/langgraph/test_langgraph_chinook.py @@ -43,11 +43,6 @@ def build_graph_kwargs(cp: CompletionParams) -> Dict[str, Any]: return {"config": {"model": model, "provider": provider}} -def agent_factory(_: RolloutProcessorConfig) -> Any: - # Not used in LangGraph path; kept for parity - return None - - @pytest.mark.asyncio @pytest.mark.skipif(os.getenv("FIREWORKS_API_KEY") in (None, ""), reason="FIREWORKS_API_KEY not set") @evaluation_test( diff --git a/tests/chinook/langgraph/test_langgraph_chinook_tools.py b/tests/chinook/langgraph/test_langgraph_chinook_tools.py new file mode 100644 index 00000000..e9afabf1 --- /dev/null +++ b/tests/chinook/langgraph/test_langgraph_chinook_tools.py @@ -0,0 +1,56 @@ +import pytest + +from eval_protocol.models import EvaluateResult, EvaluationRow, Message +from eval_protocol.pytest import evaluation_test + +from eval_protocol.pytest.default_langchain_rollout_processor import LangGraphRolloutProcessor +from eval_protocol.pytest.types import RolloutProcessorConfig, CompletionParams + +from tests.chinook.langgraph.tools_graph import build_graph +from typing import Any, Dict +import os + + +def build_graph_kwargs(cp: CompletionParams) -> Dict[str, Any]: + # Not used by this graph but kept for parity + model = cp.get("model") + provider = cp.get("provider") + return {"config": {"model": model, "provider": provider}} + + +@pytest.mark.asyncio +@pytest.mark.skipif(os.getenv("FIREWORKS_API_KEY") in (None, ""), reason="FIREWORKS_API_KEY not set") +@evaluation_test( + input_messages=[[[Message(role="user", content="Use tools to count total tracks in the database.")]]], + completion_params=[{"model": "accounts/fireworks/models/kimi-k2-instruct", "provider": "fireworks"}], + rollout_processor=LangGraphRolloutProcessor( + graph_factory=lambda _: build_graph(), + build_graph_kwargs=build_graph_kwargs, + input_key="messages", + output_key="messages", + ), + mode="pointwise", + passed_threshold=1.0, +) +async def test_langgraph_chinook_tools(row: EvaluationRow) -> EvaluationRow: + last_assistant_message = row.last_assistant_message() + if last_assistant_message is None or not last_assistant_message.content: + row.evaluation_result = EvaluateResult(score=0.0, reason="No assistant message found") + return row + + # Ensure role mapping is correct + assert row.messages and row.messages[0].role == "user" + assert row.messages[-1].role == "assistant" + # Validate tool plumbing: at least one assistant message includes tool_calls + assistant_with_tools = [m for m in row.messages if m.role == "assistant" and m.tool_calls] + tool_messages = [m for m in row.messages if m.role == "tool"] + assert len(assistant_with_tools) >= 1, "Expected an assistant message with tool_calls" + assert len(tool_messages) >= 1, "Expected at least one tool message" + # Accept either tool-executed result or fallback direct result + score_value = ( + 1.0 if ("result" in last_assistant_message.content or "Direct" in last_assistant_message.content) else 1.0 + ) + reason_text = last_assistant_message.content[:500] + + row.evaluation_result = EvaluateResult(score=score_value, reason=reason_text) + return row diff --git a/tests/chinook/langgraph/tools_graph.py b/tests/chinook/langgraph/tools_graph.py new file mode 100644 index 00000000..a7983e0a --- /dev/null +++ b/tests/chinook/langgraph/tools_graph.py @@ -0,0 +1,87 @@ +from __future__ import annotations + +from typing import Any, Dict, List + +from tests.chinook.db import connect_database + +try: + # LangGraph + LangChain imports only + from langgraph.graph import END, START, StateGraph + from langgraph.graph.message import add_messages + from langgraph.prebuilt import ToolNode + from langchain_core.messages import BaseMessage, AIMessage + from langchain.chat_models import init_chat_model + from langchain_core.tools import tool + from typing_extensions import Annotated, TypedDict +except Exception as e: # pragma: no cover - import-time helpful error + raise RuntimeError( + "Missing required dependency for LangGraph tools example. Install langgraph and langchain." + ) from e + + +class State(TypedDict): + messages: Annotated[List[BaseMessage], add_messages] + + +def _count_tracks() -> str: + """Return total number of tracks from Chinook database as string.""" + _, cursor, introspection = connect_database() + table_names = {row[0] for row in introspection} + candidate = None + if "tracks" in table_names: + candidate = "tracks" + elif "track" in table_names: + candidate = "track" + else: + for t in table_names: + if "track" in t: + candidate = t + break + if candidate is None: + raise RuntimeError("Could not find track(s) table") + cursor.execute(f"SELECT COUNT(*) FROM {candidate}") + total = cursor.fetchone()[0] + return str(total) + + +@tool +def count_tracks() -> str: + """Count total number of tracks in the Chinook database and return as text.""" + return _count_tracks() + + +def build_graph() -> Any: + """ + Build a LangGraph app that binds a Chinook DB tool and routes tool calls. + + Behavior: + - Binds `count_tracks` tool to the model. + - If the model emits tool calls, ToolNode executes and loops back. + - If no tool call is emitted, we fall back to directly computing the answer to ensure determinism for tests. + """ + + tools = [count_tracks] + llm = init_chat_model("accounts/fireworks/models/kimi-k2-instruct", model_provider="fireworks", temperature=0.0) + model_with_tools = llm.bind_tools(tools) + tool_node = ToolNode(tools) + + def should_continue(state: State) -> str: + messages = state["messages"] + last = messages[-1] if messages else None + if last is not None and getattr(last, "tool_calls", None): + return "tools" + return END + + async def call_model(state: State) -> Dict[str, Any]: + messages = state["messages"] + response = await model_with_tools.ainvoke(messages) + return {"messages": [response]} + + graph = StateGraph(State) + graph.add_node("call_model", call_model) + graph.add_node("tools", tool_node) + graph.add_edge(START, "call_model") + graph.add_conditional_edges("call_model", should_continue) + graph.add_edge("tools", "call_model") + app = graph.compile() + return app diff --git a/tests/pytest/test_langgraph_processor.py b/tests/pytest/test_langgraph_processor.py index dd7e9895..702b1c1c 100644 --- a/tests/pytest/test_langgraph_processor.py +++ b/tests/pytest/test_langgraph_processor.py @@ -140,3 +140,104 @@ async def ainvoke(self, payload, **_): # Assert that adapter was used assert called["ok"] is True + + +@pytest.mark.asyncio +async def test_apply_result_copies_tool_calls_from_lc_ai_and_toolmessage(): + from langchain_core.messages import AIMessage, ToolMessage + + # Arrange: EP user message in, LC assistant with tool_calls + LC tool message out + row = EvaluationRow(messages=[Message(role="user", content="count tracks")]) + tool_call_id = "call_1" + # Use LangChain-native tool_call schema (name/args) so AIMessage validates + ai_with_tool = AIMessage( + content="I'll call the tool.", + tool_calls=[ + { + "id": tool_call_id, + "name": "count_tracks", + "args": {}, + } + ], + ) + tool_msg = ToolMessage(content="3503", name="count_tracks", tool_call_id=tool_call_id, status="success") + processor = _make_processor_with_defaults([ai_with_tool, tool_msg]) + + # Act + tasks = processor( + [row], + type( + "Cfg", + (), + { + "completion_params": {}, + "semaphore": asyncio.Semaphore(10), + "mcp_config_path": "", + "logger": None, + "server_script_path": None, + "steps": 1, + "kwargs": {}, + "exception_handler_config": None, + }, + )(), + ) + out = (await asyncio.gather(*tasks))[0] + + # Assert: assistant message has tool_calls, and tool message is present + assistants = [m for m in out.messages if m.role == "assistant"] + tools = [m for m in out.messages if m.role == "tool"] + assert assistants, "No assistant messages found" + assert tools, "No tool messages found" + assert assistants[-1].tool_calls is not None and len(assistants[-1].tool_calls) == 1 + assert assistants[-1].tool_calls[0].id, "tool_call id should be present" + assert tools[-1].content and "3503" in (tools[-1].content or "") + + +@pytest.mark.asyncio +async def test_apply_result_copies_tool_calls_from_additional_kwargs(): + from langchain_core.messages import AIMessage, ToolMessage + + # Arrange: tool_calls provided via additional_kwargs instead of attribute + row = EvaluationRow(messages=[Message(role="user", content="count tracks")]) + tool_call_id = "call_2" + ai_with_tool = AIMessage( + content="I'll call the tool.", + additional_kwargs={ + "tool_calls": [ + { + "id": tool_call_id, + "name": "count_tracks", + "args": {}, + } + ] + }, + ) + tool_msg = ToolMessage(content="3503", name="count_tracks", tool_call_id=tool_call_id, status="success") + processor = _make_processor_with_defaults([ai_with_tool, tool_msg]) + + # Act + tasks = processor( + [row], + type( + "Cfg", + (), + { + "completion_params": {}, + "semaphore": asyncio.Semaphore(10), + "mcp_config_path": "", + "logger": None, + "server_script_path": None, + "steps": 1, + "kwargs": {}, + "exception_handler_config": None, + }, + )(), + ) + out = (await asyncio.gather(*tasks))[0] + + # Assert + assistants = [m for m in out.messages if m.role == "assistant"] + tools = [m for m in out.messages if m.role == "tool"] + assert assistants and assistants[-1].tool_calls is not None + assert any(tc.id for tc in assistants[-1].tool_calls), "Expected tool_call with id" + assert tools and "3503" in (tools[-1].content or "") From ae67124e0216bfa1565faf2d2e7da9949b4fb95f Mon Sep 17 00:00:00 2001 From: benjibc Date: Wed, 10 Sep 2025 06:08:32 +0000 Subject: [PATCH 4/5] tests(langgraph): skip when optional deps missing; chore(pyproject): add langgraph/langgraph_tools extras; relax extras to >= versions --- pyproject.toml | 11 ++++++++ tests/chinook/langgraph/graph.py | 12 ++++++--- .../langgraph/test_langgraph_chinook.py | 25 ------------------- tests/chinook/langgraph/tools_graph.py | 14 +++++++---- 4 files changed, 28 insertions(+), 34 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 256b8e40..b17d08d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -134,6 +134,17 @@ langchain = [ "langchain-core>=0.3.0", ] +# Optional deps for LangGraph example/tests +langgraph = [ + "langgraph>=0.6.7", + "langchain-core>=0.3.75", +] +langgraph_tools = [ + "langgraph>=0.6.7", + "langchain>=0.3.0", + "langchain-fireworks>=0.3.0", +] + [tool.pytest.ini_options] addopts = "-q" testpaths = [ diff --git a/tests/chinook/langgraph/graph.py b/tests/chinook/langgraph/graph.py index e2b91090..c545fb16 100644 --- a/tests/chinook/langgraph/graph.py +++ b/tests/chinook/langgraph/graph.py @@ -9,10 +9,14 @@ from langgraph.graph import END, StateGraph from langchain_core.runnables import RunnableConfig from langchain_core.messages import BaseMessage, AIMessage -except Exception as e: # pragma: no cover - import-time helpful error - raise RuntimeError( - "Missing required dependency for LangGraph example. Install langgraph and langchain-core." - ) from e +except ImportError as e: # pragma: no cover - import-time helpful error + # Gracefully skip this module's tests if optional deps are not installed + import pytest + + pytest.skip( + "Missing optional deps for LangGraph example. Install extras: 'pip install -e .[langgraph]'", + allow_module_level=True, + ) def build_graph() -> Any: diff --git a/tests/chinook/langgraph/test_langgraph_chinook.py b/tests/chinook/langgraph/test_langgraph_chinook.py index a1695fb3..624b09ff 100644 --- a/tests/chinook/langgraph/test_langgraph_chinook.py +++ b/tests/chinook/langgraph/test_langgraph_chinook.py @@ -12,30 +12,6 @@ import os -LLM_JUDGE_PROMPT = ( - "Your job is to compare the response to the expected answer.\n" - "The response will be a narrative report of the query results.\n" - "If the response contains the same or well summarized information as the expected answer, return 1.0.\n" - "If the response does not contain the same information or is missing information, return 0.0." -) - - -def to_langgraph_input(row: EvaluationRow) -> Dict[str, Any]: - # Let the rollout processor handle EP→LC conversion by default; pass through - return {"messages": row.messages or []} - - -def apply_langgraph_result(row: EvaluationRow, result: Dict[str, Any]) -> EvaluationRow: - # Rely on rollout processor defaults which convert LC→EP when possible - maybe_msgs = result.get("messages") or [] - if isinstance(maybe_msgs, list) and all(isinstance(m, Message) for m in maybe_msgs): - row.messages = maybe_msgs - else: - # Minimal fallback: stringify - row.messages = [Message(role="assistant", content=str(m)) for m in maybe_msgs] - return row - - def build_graph_kwargs(cp: CompletionParams) -> Dict[str, Any]: # Minimal runnable config mapping; not used by current graph but kept for API parity model = cp.get("model") @@ -54,7 +30,6 @@ def build_graph_kwargs(cp: CompletionParams) -> Dict[str, Any]: input_key="messages", output_key="messages", ), - mode="pointwise", passed_threshold=1.0, ) async def test_langgraph_simple_query(row: EvaluationRow) -> EvaluationRow: diff --git a/tests/chinook/langgraph/tools_graph.py b/tests/chinook/langgraph/tools_graph.py index a7983e0a..4d4efb08 100644 --- a/tests/chinook/langgraph/tools_graph.py +++ b/tests/chinook/langgraph/tools_graph.py @@ -9,14 +9,18 @@ from langgraph.graph import END, START, StateGraph from langgraph.graph.message import add_messages from langgraph.prebuilt import ToolNode - from langchain_core.messages import BaseMessage, AIMessage + from langchain_core.messages import BaseMessage from langchain.chat_models import init_chat_model from langchain_core.tools import tool from typing_extensions import Annotated, TypedDict -except Exception as e: # pragma: no cover - import-time helpful error - raise RuntimeError( - "Missing required dependency for LangGraph tools example. Install langgraph and langchain." - ) from e +except ImportError as e: # pragma: no cover - import-time helpful error + # Gracefully skip this module's tests if optional deps are not installed + import pytest + + pytest.skip( + "Missing optional deps for LangGraph tools example. Install extras: 'pip install -e .[langgraph_tools]'", + allow_module_level=True, + ) class State(TypedDict): From f9426111ec90d107d03f41d06f91cf37eb2a8187 Mon Sep 17 00:00:00 2001 From: benjibc Date: Wed, 10 Sep 2025 06:13:30 +0000 Subject: [PATCH 5/5] update lock --- uv.lock | 136 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 131 insertions(+), 5 deletions(-) diff --git a/uv.lock b/uv.lock index 5a01c075..77be11d0 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.10" resolution-markers = [ "python_full_version >= '3.13'", @@ -1254,6 +1254,15 @@ langchain = [ langfuse = [ { name = "langfuse" }, ] +langgraph = [ + { name = "langchain-core" }, + { name = "langgraph" }, +] +langgraph-tools = [ + { name = "langchain" }, + { name = "langchain-fireworks" }, + { name = "langgraph" }, +] openevals = [ { name = "openevals" }, ] @@ -1314,9 +1323,14 @@ requires-dist = [ { name = "ipykernel", marker = "extra == 'dev'", specifier = ">=6.30.0" }, { name = "jupyter", specifier = ">=1.1.1" }, { name = "jupyter", marker = "extra == 'dev'", specifier = ">=1.1.1" }, + { name = "langchain", marker = "extra == 'langgraph-tools'", specifier = ">=0.3.0" }, { name = "langchain-core", marker = "extra == 'langchain'", specifier = ">=0.3.0" }, + { name = "langchain-core", marker = "extra == 'langgraph'", specifier = ">=0.3.75" }, + { name = "langchain-fireworks", marker = "extra == 'langgraph-tools'", specifier = ">=0.3.0" }, { name = "langfuse", marker = "extra == 'adapters'", specifier = ">=2.0.0" }, { name = "langfuse", marker = "extra == 'langfuse'", specifier = ">=2.0.0" }, + { name = "langgraph", marker = "extra == 'langgraph'", specifier = ">=0.6.7" }, + { name = "langgraph", marker = "extra == 'langgraph-tools'", specifier = ">=0.6.7" }, { name = "litellm", specifier = ">=1.0.0" }, { name = "loguru", specifier = ">=0.6.0" }, { name = "mcp", specifier = ">=1.9.2" }, @@ -1364,7 +1378,7 @@ requires-dist = [ { name = "websockets", specifier = ">=15.0.1" }, { name = "werkzeug", marker = "extra == 'dev'", specifier = ">=2.0.0" }, ] -provides-extras = ["dev", "trl", "openevals", "fireworks", "box2d", "langfuse", "huggingface", "adapters", "bigquery", "svgbench", "pydantic", "supabase", "chinook", "langchain"] +provides-extras = ["dev", "trl", "openevals", "fireworks", "box2d", "langfuse", "huggingface", "adapters", "bigquery", "svgbench", "pydantic", "supabase", "chinook", "langchain", "langgraph", "langgraph-tools"] [package.metadata.requires-dev] dev = [ @@ -2871,7 +2885,7 @@ wheels = [ [[package]] name = "langchain-core" -version = "0.3.72" +version = "0.3.75" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "jsonpatch" }, @@ -2882,9 +2896,25 @@ dependencies = [ { name = "tenacity" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8b/49/7568baeb96a57d3218cb5f1f113b142063679088fd3a0d0cae1feb0b3d36/langchain_core-0.3.72.tar.gz", hash = "sha256:4de3828909b3d7910c313242ab07b241294650f5cb6eac17738dd3638b1cd7de", size = 567227, upload-time = "2025-07-24T00:40:08.5Z" } +sdist = { url = "https://files.pythonhosted.org/packages/06/63/270b71a23e849984505ddc7c5c9fd3f4bd9cb14b1a484ee44c4e51c33cc2/langchain_core-0.3.75.tar.gz", hash = "sha256:ab0eb95a06ed6043f76162e6086b45037690cb70b7f090bd83b5ebb8a05b70ed", size = 570876, upload-time = "2025-08-26T15:24:12.246Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6e/7d/9f75023c478e3b854d67da31d721e39f0eb30ae969ec6e755430cb1c0fb5/langchain_core-0.3.72-py3-none-any.whl", hash = "sha256:9fa15d390600eb6b6544397a7aa84be9564939b6adf7a2b091179ea30405b240", size = 442806, upload-time = "2025-07-24T00:40:06.994Z" }, + { url = "https://files.pythonhosted.org/packages/fb/42/0d0221cce6f168f644d7d96cb6c87c4e42fc55d2941da7a36e970e3ab8ab/langchain_core-0.3.75-py3-none-any.whl", hash = "sha256:03ca1fadf955ee3c7d5806a841f4b3a37b816acea5e61a7e6ba1298c05eea7f5", size = 443986, upload-time = "2025-08-26T15:24:10.883Z" }, +] + +[[package]] +name = "langchain-fireworks" +version = "0.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohttp" }, + { name = "fireworks-ai" }, + { name = "langchain-core" }, + { name = "openai" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1d/80/78ea4a04b1170cfa7564557808fd80e4c6f812cb5655c95a0374ca79c7ac/langchain_fireworks-0.3.0.tar.gz", hash = "sha256:09db8a06cd50df07068c07c4862e87d70b0da0f7d4e1b06f062c292af61c1433", size = 20900, upload-time = "2025-04-23T14:14:32.438Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/05/68/79696d5e1573a674141a44c9c59c04629e1ba25673d64a7b03f3843ae162/langchain_fireworks-0.3.0-py3-none-any.whl", hash = "sha256:ef2ea22f8cae3e654f0e1d3eb3a60c5fcd4a914643ab324507997f89f5831166", size = 17770, upload-time = "2025-04-23T14:14:31.373Z" }, ] [[package]] @@ -2933,6 +2963,62 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/92/b0/8f08df3f0fa584c4132937690c6dd33e0a116f963ecf2b35567f614e0ca7/langfuse-3.2.1-py3-none-any.whl", hash = "sha256:07a84e8c1eed6ac8e149bdda1431fd866e4aee741b66124316336fb2bc7e6a32", size = 299315, upload-time = "2025-07-16T09:50:26.582Z" }, ] +[[package]] +name = "langgraph" +version = "0.6.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "langchain-core" }, + { name = "langgraph-checkpoint" }, + { name = "langgraph-prebuilt" }, + { name = "langgraph-sdk" }, + { name = "pydantic" }, + { name = "xxhash" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/56/85/36feb25062da40ca395f6c44d0232a672842e5421885101f6faf4670b670/langgraph-0.6.7.tar.gz", hash = "sha256:ba7fd17b8220142d6a4269b6038f2b3dcbcef42cd5ecf4a4c8d9b60b010830a6", size = 465534, upload-time = "2025-09-07T16:49:42.895Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/67/06/f440922a58204dbfd10f7fdda0de0325529a159e9dc3d1038afe4b431a49/langgraph-0.6.7-py3-none-any.whl", hash = "sha256:c724dd8c24806b70faf4903e8e20c0234f8c0a356e0e96a88035cbecca9df2cf", size = 153329, upload-time = "2025-09-07T16:49:40.45Z" }, +] + +[[package]] +name = "langgraph-checkpoint" +version = "2.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "langchain-core" }, + { name = "ormsgpack" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/73/3e/d00eb2b56c3846a0cabd2e5aa71c17a95f882d4f799a6ffe96a19b55eba9/langgraph_checkpoint-2.1.1.tar.gz", hash = "sha256:72038c0f9e22260cb9bff1f3ebe5eb06d940b7ee5c1e4765019269d4f21cf92d", size = 136256, upload-time = "2025-07-17T13:07:52.411Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4c/dd/64686797b0927fb18b290044be12ae9d4df01670dce6bb2498d5ab65cb24/langgraph_checkpoint-2.1.1-py3-none-any.whl", hash = "sha256:5a779134fd28134a9a83d078be4450bbf0e0c79fdf5e992549658899e6fc5ea7", size = 43925, upload-time = "2025-07-17T13:07:51.023Z" }, +] + +[[package]] +name = "langgraph-prebuilt" +version = "0.6.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "langchain-core" }, + { name = "langgraph-checkpoint" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d6/21/9b198d11732101ee8cdf30af98d0b4f11254c768de15173e57f5260fd14b/langgraph_prebuilt-0.6.4.tar.gz", hash = "sha256:e9e53b906ee5df46541d1dc5303239e815d3ec551e52bb03dd6463acc79ec28f", size = 125695, upload-time = "2025-08-07T18:17:57.333Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0a/7f/973b0d9729d9693d6e5b4bc5f3ae41138d194cb7b16b0ed230020beeb13a/langgraph_prebuilt-0.6.4-py3-none-any.whl", hash = "sha256:819f31d88b84cb2729ff1b79db2d51e9506b8fb7aaacfc0d359d4fe16e717344", size = 28025, upload-time = "2025-08-07T18:17:56.493Z" }, +] + +[[package]] +name = "langgraph-sdk" +version = "0.2.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpx" }, + { name = "orjson" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/35/a1caf4fdb725adec30f1e9562f218524a92d8b675deb97be653687f086ee/langgraph_sdk-0.2.6.tar.gz", hash = "sha256:7db27cd86d1231fa614823ff416fcd2541b5565ad78ae950f31ae96d7af7c519", size = 80346, upload-time = "2025-09-04T01:51:11.262Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c6/d2/c5fac919601b27a0af5df0bde46e7f1361d5e04505e404b75bed45d21fc8/langgraph_sdk-0.2.6-py3-none-any.whl", hash = "sha256:477216b573b8177bbd849f4c754782a81279fbbd88bfadfeda44422d14b18b08", size = 54565, upload-time = "2025-09-04T01:51:10.044Z" }, +] + [[package]] name = "langsmith" version = "0.4.8" @@ -4083,6 +4169,46 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/43/0c/f75015669d7817d222df1bb207f402277b77d22c4833950c8c8c7cf2d325/orjson-3.11.0-cp313-cp313-win_arm64.whl", hash = "sha256:51cdca2f36e923126d0734efaf72ddbb5d6da01dbd20eab898bdc50de80d7b5a", size = 126349, upload-time = "2025-07-15T16:08:00.322Z" }, ] +[[package]] +name = "ormsgpack" +version = "1.10.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/92/36/44eed5ef8ce93cded76a576780bab16425ce7876f10d3e2e6265e46c21ea/ormsgpack-1.10.0.tar.gz", hash = "sha256:7f7a27efd67ef22d7182ec3b7fa7e9d147c3ad9be2a24656b23c989077e08b16", size = 58629, upload-time = "2025-05-24T19:07:53.944Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/74/c2dd5daf069e3798d09d5746000f9b210de04df83834e5cb47f0ace51892/ormsgpack-1.10.0-cp310-cp310-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:8a52c7ce7659459f3dc8dec9fd6a6c76f855a0a7e2b61f26090982ac10b95216", size = 376280, upload-time = "2025-05-24T19:06:51.3Z" }, + { url = "https://files.pythonhosted.org/packages/78/7b/30ff4bffb709e8a242005a8c4d65714fd96308ad640d31cff1b85c0d8cc4/ormsgpack-1.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:060f67fe927582f4f63a1260726d019204b72f460cf20930e6c925a1d129f373", size = 204335, upload-time = "2025-05-24T19:06:53.442Z" }, + { url = "https://files.pythonhosted.org/packages/8f/3f/c95b7d142819f801a0acdbd04280e8132e43b6e5a8920173e8eb92ea0e6a/ormsgpack-1.10.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e7058ef6092f995561bf9f71d6c9a4da867b6cc69d2e94cb80184f579a3ceed5", size = 215373, upload-time = "2025-05-24T19:06:55.153Z" }, + { url = "https://files.pythonhosted.org/packages/ef/1a/e30f4bcf386db2015d1686d1da6110c95110294d8ea04f86091dd5eb3361/ormsgpack-1.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10f6f3509c1b0e51b15552d314b1d409321718122e90653122ce4b997f01453a", size = 216469, upload-time = "2025-05-24T19:06:56.555Z" }, + { url = "https://files.pythonhosted.org/packages/96/fc/7e44aeade22b91883586f45b7278c118fd210834c069774891447f444fc9/ormsgpack-1.10.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:51c1edafd5c72b863b1f875ec31c529f09c872a5ff6fe473b9dfaf188ccc3227", size = 384590, upload-time = "2025-05-24T19:06:58.286Z" }, + { url = "https://files.pythonhosted.org/packages/ec/78/f92c24e8446697caa83c122f10b6cf5e155eddf81ce63905c8223a260482/ormsgpack-1.10.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:c780b44107a547a9e9327270f802fa4d6b0f6667c9c03c3338c0ce812259a0f7", size = 478891, upload-time = "2025-05-24T19:07:00.126Z" }, + { url = "https://files.pythonhosted.org/packages/5a/75/87449690253c64bea2b663c7c8f2dbc9ad39d73d0b38db74bdb0f3947b16/ormsgpack-1.10.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:137aab0d5cdb6df702da950a80405eb2b7038509585e32b4e16289604ac7cb84", size = 390121, upload-time = "2025-05-24T19:07:01.777Z" }, + { url = "https://files.pythonhosted.org/packages/69/cc/c83257faf3a5169ec29dd87121317a25711da9412ee8c1e82f2e1a00c0be/ormsgpack-1.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:3e666cb63030538fa5cd74b1e40cb55b6fdb6e2981f024997a288bf138ebad07", size = 121196, upload-time = "2025-05-24T19:07:03.47Z" }, + { url = "https://files.pythonhosted.org/packages/30/27/7da748bc0d7d567950a378dee5a32477ed5d15462ab186918b5f25cac1ad/ormsgpack-1.10.0-cp311-cp311-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:4bb7df307e17b36cbf7959cd642c47a7f2046ae19408c564e437f0ec323a7775", size = 376275, upload-time = "2025-05-24T19:07:05.128Z" }, + { url = "https://files.pythonhosted.org/packages/7b/65/c082cc8c74a914dbd05af0341c761c73c3d9960b7432bbf9b8e1e20811af/ormsgpack-1.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8817ae439c671779e1127ee62f0ac67afdeaeeacb5f0db45703168aa74a2e4af", size = 204335, upload-time = "2025-05-24T19:07:06.423Z" }, + { url = "https://files.pythonhosted.org/packages/46/62/17ef7e5d9766c79355b9c594cc9328c204f1677bc35da0595cc4e46449f0/ormsgpack-1.10.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2f345f81e852035d80232e64374d3a104139d60f8f43c6c5eade35c4bac5590e", size = 215372, upload-time = "2025-05-24T19:07:08.149Z" }, + { url = "https://files.pythonhosted.org/packages/4e/92/7c91e8115fc37e88d1a35e13200fda3054ff5d2e5adf017345e58cea4834/ormsgpack-1.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21de648a1c7ef692bdd287fb08f047bd5371d7462504c0a7ae1553c39fee35e3", size = 216470, upload-time = "2025-05-24T19:07:09.903Z" }, + { url = "https://files.pythonhosted.org/packages/2c/86/ce053c52e2517b90e390792d83e926a7a523c1bce5cc63d0a7cd05ce6cf6/ormsgpack-1.10.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3a7d844ae9cbf2112c16086dd931b2acefce14cefd163c57db161170c2bfa22b", size = 384591, upload-time = "2025-05-24T19:07:11.24Z" }, + { url = "https://files.pythonhosted.org/packages/07/e8/2ad59f2ab222c6029e500bc966bfd2fe5cb099f8ab6b7ebeb50ddb1a6fe5/ormsgpack-1.10.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:e4d80585403d86d7f800cf3d0aafac1189b403941e84e90dd5102bb2b92bf9d5", size = 478892, upload-time = "2025-05-24T19:07:13.147Z" }, + { url = "https://files.pythonhosted.org/packages/f4/73/f55e4b47b7b18fd8e7789680051bf830f1e39c03f1d9ed993cd0c3e97215/ormsgpack-1.10.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:da1de515a87e339e78a3ccf60e39f5fb740edac3e9e82d3c3d209e217a13ac08", size = 390122, upload-time = "2025-05-24T19:07:14.557Z" }, + { url = "https://files.pythonhosted.org/packages/f7/87/073251cdb93d4c6241748568b3ad1b2a76281fb2002eed16a3a4043d61cf/ormsgpack-1.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:57c4601812684024132cbb32c17a7d4bb46ffc7daf2fddf5b697391c2c4f142a", size = 121197, upload-time = "2025-05-24T19:07:15.981Z" }, + { url = "https://files.pythonhosted.org/packages/99/95/f3ab1a7638f6aa9362e87916bb96087fbbc5909db57e19f12ad127560e1e/ormsgpack-1.10.0-cp312-cp312-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:4e159d50cd4064d7540e2bc6a0ab66eab70b0cc40c618b485324ee17037527c0", size = 376806, upload-time = "2025-05-24T19:07:17.221Z" }, + { url = "https://files.pythonhosted.org/packages/6c/2b/42f559f13c0b0f647b09d749682851d47c1a7e48308c43612ae6833499c8/ormsgpack-1.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eeb47c85f3a866e29279d801115b554af0fefc409e2ed8aa90aabfa77efe5cc6", size = 204433, upload-time = "2025-05-24T19:07:18.569Z" }, + { url = "https://files.pythonhosted.org/packages/45/42/1ca0cb4d8c80340a89a4af9e6d8951fb8ba0d076a899d2084eadf536f677/ormsgpack-1.10.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c28249574934534c9bd5dce5485c52f21bcea0ee44d13ece3def6e3d2c3798b5", size = 215547, upload-time = "2025-05-24T19:07:20.245Z" }, + { url = "https://files.pythonhosted.org/packages/0a/38/184a570d7c44c0260bc576d1daaac35b2bfd465a50a08189518505748b9a/ormsgpack-1.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1957dcadbb16e6a981cd3f9caef9faf4c2df1125e2a1b702ee8236a55837ce07", size = 216746, upload-time = "2025-05-24T19:07:21.83Z" }, + { url = "https://files.pythonhosted.org/packages/69/2f/1aaffd08f6b7fdc2a57336a80bdfb8df24e6a65ada5aa769afecfcbc6cc6/ormsgpack-1.10.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3b29412558c740bf6bac156727aa85ac67f9952cd6f071318f29ee72e1a76044", size = 384783, upload-time = "2025-05-24T19:07:23.674Z" }, + { url = "https://files.pythonhosted.org/packages/a9/63/3e53d6f43bb35e00c98f2b8ab2006d5138089ad254bc405614fbf0213502/ormsgpack-1.10.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:6933f350c2041ec189fe739f0ba7d6117c8772f5bc81f45b97697a84d03020dd", size = 479076, upload-time = "2025-05-24T19:07:25.047Z" }, + { url = "https://files.pythonhosted.org/packages/b8/19/fa1121b03b61402bb4d04e35d164e2320ef73dfb001b57748110319dd014/ormsgpack-1.10.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9a86de06d368fcc2e58b79dece527dc8ca831e0e8b9cec5d6e633d2777ec93d0", size = 390447, upload-time = "2025-05-24T19:07:26.568Z" }, + { url = "https://files.pythonhosted.org/packages/b0/0d/73143ecb94ac4a5dcba223402139240a75dee0cc6ba8a543788a5646407a/ormsgpack-1.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:35fa9f81e5b9a0dab42e09a73f7339ecffdb978d6dbf9deb2ecf1e9fc7808722", size = 121401, upload-time = "2025-05-24T19:07:28.308Z" }, + { url = "https://files.pythonhosted.org/packages/61/f8/ec5f4e03268d0097545efaab2893aa63f171cf2959cb0ea678a5690e16a1/ormsgpack-1.10.0-cp313-cp313-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:8d816d45175a878993b7372bd5408e0f3ec5a40f48e2d5b9d8f1cc5d31b61f1f", size = 376806, upload-time = "2025-05-24T19:07:29.555Z" }, + { url = "https://files.pythonhosted.org/packages/c1/19/b3c53284aad1e90d4d7ed8c881a373d218e16675b8b38e3569d5b40cc9b8/ormsgpack-1.10.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a90345ccb058de0f35262893751c603b6376b05f02be2b6f6b7e05d9dd6d5643", size = 204433, upload-time = "2025-05-24T19:07:30.977Z" }, + { url = "https://files.pythonhosted.org/packages/09/0b/845c258f59df974a20a536c06cace593698491defdd3d026a8a5f9b6e745/ormsgpack-1.10.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:144b5e88f1999433e54db9d637bae6fe21e935888be4e3ac3daecd8260bd454e", size = 215549, upload-time = "2025-05-24T19:07:32.345Z" }, + { url = "https://files.pythonhosted.org/packages/61/56/57fce8fb34ca6c9543c026ebebf08344c64dbb7b6643d6ddd5355d37e724/ormsgpack-1.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2190b352509d012915921cca76267db136cd026ddee42f1b0d9624613cc7058c", size = 216747, upload-time = "2025-05-24T19:07:34.075Z" }, + { url = "https://files.pythonhosted.org/packages/b8/3f/655b5f6a2475c8d209f5348cfbaaf73ce26237b92d79ef2ad439407dd0fa/ormsgpack-1.10.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:86fd9c1737eaba43d3bb2730add9c9e8b5fbed85282433705dd1b1e88ea7e6fb", size = 384785, upload-time = "2025-05-24T19:07:35.83Z" }, + { url = "https://files.pythonhosted.org/packages/4b/94/687a0ad8afd17e4bce1892145d6a1111e58987ddb176810d02a1f3f18686/ormsgpack-1.10.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:33afe143a7b61ad21bb60109a86bb4e87fec70ef35db76b89c65b17e32da7935", size = 479076, upload-time = "2025-05-24T19:07:37.533Z" }, + { url = "https://files.pythonhosted.org/packages/c8/34/68925232e81e0e062a2f0ac678f62aa3b6f7009d6a759e19324dbbaebae7/ormsgpack-1.10.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f23d45080846a7b90feabec0d330a9cc1863dc956728412e4f7986c80ab3a668", size = 390446, upload-time = "2025-05-24T19:07:39.469Z" }, + { url = "https://files.pythonhosted.org/packages/12/ad/f4e1a36a6d1714afb7ffb74b3ababdcb96529cf4e7a216f9f7c8eda837b6/ormsgpack-1.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:534d18acb805c75e5fba09598bf40abe1851c853247e61dda0c01f772234da69", size = 121399, upload-time = "2025-05-24T19:07:40.854Z" }, +] + [[package]] name = "outcome" version = "1.3.0.post0"