Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions eval_protocol/benchmarks/test_livebench_data_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ def _read_jsonl_table_from_text(text: str, header_cols: List[str]):

reader = _read_df_v1 if version == "v1" else _read_df_v2
gt_df = reader(output_fmt, ground_truth)
assert gt_df is not None, "GT dataframe is None"

llm_clean = _clean_llm_output(llm_answer)
llm_clean = _remove_initial_phrase(llm_clean)
Expand Down
1 change: 1 addition & 0 deletions eval_protocol/benchmarks/test_tau_bench_airline.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ def test_tau_bench_airline_evaluation(row: EvaluationRow) -> EvaluationRow:
task = Task(
id="Filler", evaluation_criteria=evaluation_criteria, user_scenario=UserScenario(instructions="Filler")
) # id and user_scenario are required for the Task type but not used in calculating reward
assert task.evaluation_criteria is not None, "Task evaluation criteria is None"

if RewardType.DB in task.evaluation_criteria.reward_basis:
env_reward_info = EnvironmentEvaluator.calculate_reward(
Expand Down
1 change: 1 addition & 0 deletions eval_protocol/execution/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ async def _execute_standard_generation(
if system_prompt_content:
current_messages_for_rollout.append({"role": "system", "content": system_prompt_content})
current_messages_for_rollout.append({"role": "user", "content": user_query})
assert self.model_client is not None, "at this point model client needs to be initialized"

generation_output_std = await self.model_client.generate(
messages=current_messages_for_rollout,
Expand Down
17 changes: 13 additions & 4 deletions eval_protocol/mcp/client/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import logging
import time
from contextlib import AsyncExitStack
from typing import Any, Dict, List, Optional, Tuple
from typing import Any, Dict, List, Optional, Tuple, cast

import httpx
from mcp.client.session import ClientSession
Expand Down Expand Up @@ -276,7 +276,10 @@ async def _get_initial_state_from_mcp_resource(self, session: MCPSession) -> Any
try:
# List available resources - this is where initial state should come from
logger.debug(f"Session {session.session_id}: Discovering MCP resources for initial state...")
resources_response = await mcp_session.list_resources()
mcp_session_local = session._mcp_session
if mcp_session_local is None:
raise RuntimeError("Session not initialized while listing resources")
resources_response = await mcp_session_local.list_resources()
resources = resources_response.resources if hasattr(resources_response, "resources") else []
logger.debug(f"Session {session.session_id}: Found {len(resources)} MCP resources")
for resource in resources:
Expand All @@ -303,7 +306,10 @@ async def _get_initial_state_from_mcp_resource(self, session: MCPSession) -> Any
f"Session {session.session_id}: Reading initial state from resource: {initial_state_resource.uri}"
)

resource_content = await mcp_session.read_resource(initial_state_resource.uri)
mcp_session_for_read = session._mcp_session
if mcp_session_for_read is None:
raise RuntimeError("Session not initialized while reading resource")
resource_content = await mcp_session_for_read.read_resource(initial_state_resource.uri)

# Handle the new ResourceContents format
text_value = getattr(resource_content, "text", None)
Expand Down Expand Up @@ -348,7 +354,10 @@ async def _get_initial_state_from_mcp_resource(self, session: MCPSession) -> Any
f"Session {session.session_id}: About to call mcp_session.read_resource with fallback URI: {first_resource.uri}"
)

resource_content = await mcp_session.read_resource(first_resource.uri)
mcp_session_for_fallback_read = session._mcp_session
if mcp_session_for_fallback_read is None:
raise RuntimeError("Session not initialized while reading fallback resource")
resource_content = await mcp_session_for_fallback_read.read_resource(first_resource.uri)

logger.debug(
f"Session {session.session_id}: fallback read_resource returned type: {type(resource_content)}"
Expand Down
541 changes: 0 additions & 541 deletions eval_protocol/mcp_agent/intermediary_server.py

This file was deleted.

307 changes: 0 additions & 307 deletions eval_protocol/mcp_agent/orchestration/remote_http_client.py

This file was deleted.

Loading
Loading