eval-protocol
diff --git a/‎README.md‎
Lines changed: 12 additions & 10 deletions b/‎README.md‎
Lines changed: 12 additions & 10 deletions
diff --git a/‎eval_protocol/adapters/langfuse.py‎
Lines changed: 4 additions & 4 deletions b/‎eval_protocol/adapters/langfuse.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎eval_protocol/adapters/langsmith.py‎
Lines changed: 7 additions & 4 deletions b/‎eval_protocol/adapters/langsmith.py‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎examples/langsmith/README.md‎
Lines changed: 0 additions & 24 deletions b/‎examples/langsmith/README.md‎
Lines changed: 0 additions & 24 deletions
diff --git a/‎examples/langsmith/dump_traces_langsmith.py‎
Lines changed: 0 additions & 115 deletions b/‎examples/langsmith/dump_traces_langsmith.py‎
Lines changed: 0 additions & 115 deletions
diff --git a/‎examples/langsmith/emit_tool_calls.py‎
Lines changed: 0 additions & 116 deletions b/‎examples/langsmith/emit_tool_calls.py‎
Lines changed: 0 additions & 116 deletions
@@ -4,7 +4,18 @@
 
 **The open-source toolkit for building your internal model leaderboard.**
 
-When you have multiple AI models to choose from—different versions, providers, or configurations—how do you know which one is best for your use case?
+When you have multiple AI models to choose from—different versions, providers,
+or configurations—how do you know which one is best for your use case?
+
+## 🚀 Features
+
+- **Custom Evaluations**: Write evaluations tailored to your specific business needs
+- **Auto-Evaluation**: Stack-rank models using LLMs as judges with just model traces using out-of-the-box evaluators
+- **RL Environments via MCP**: Build reinforcement learning environments using the Model Control Protocol (MCP) to simulate user interactions and advanced evaluation scenarios
+- **Consistent Testing**: Test across various models and configurations with a unified framework
+- **Resilient Runtime**: Automatic retries for unstable LLM APIs and concurrent execution for long-running evaluations
+- **Rich Visualizations**: Built-in pivot tables and visualizations for result analysis
+- **Data-Driven Decisions**: Make informed model deployment decisions based on comprehensive evaluation results
 
 ## Quick Examples
 
@@ -69,15 +80,6 @@ def test_math_reasoning(row: EvaluationRow) -> EvaluationRow:
     return row
 ```
 
-## 🚀 Features
-
-- **Custom Evaluations**: Write evaluations tailored to your specific business needs
-- **Auto-Evaluation**: Stack-rank models using LLMs as judges with just model traces
-- **Model Context Protocol (MCP) Integration**: Build reinforcement learning environments and trigger user simulations for complex scenarios
-- **Consistent Testing**: Test across various models and configurations with a unified framework
-- **Resilient Runtime**: Automatic retries for unstable LLM APIs and concurrent execution for long-running evaluations
-- **Rich Visualizations**: Built-in pivot tables and visualizations for result analysis
-- **Data-Driven Decisions**: Make informed model deployment decisions based on comprehensive evaluation results
 
 ## 📚 Resources
 
 
@@ -5,13 +5,13 @@
 """
 
 from __future__ import annotations
-
 import logging
 import random
 import time
 from datetime import datetime, timedelta
-from typing import Any, Dict, List, Optional, Protocol, TYPE_CHECKING
+from typing import Any, Dict, List, Optional, Protocol, TYPE_CHECKING, cast
 
+from langfuse.api.resources.commons.types.observations_view import ObservationsView
 from eval_protocol.models import EvaluationRow, InputMetadata, Message
 from .base import BaseAdapter
 from .utils import extract_messages_from_data
@@ -232,12 +232,12 @@ class LangfuseAdapter(BaseAdapter):
         ... ))
     """
 
-    def __init__(self):
+    def __init__(self, client: Optional[Any] = None):
         """Initialize the Langfuse adapter."""
         if not LANGFUSE_AVAILABLE:
             raise ImportError("Langfuse not installed. Install with: pip install 'eval-protocol[langfuse]'")
 
-        self.client = get_client()
+        self.client = client or cast(Any, get_client)()
 
     def get_evaluation_rows(
         self,
 
@@ -10,7 +10,7 @@
 from __future__ import annotations
 
 import logging
-from typing import Any, Dict, List, Optional, Iterable
+from typing import Any, Dict, List, Optional, Iterable, cast
 
 from eval_protocol.models import EvaluationRow, InputMetadata, Message
 from .base import BaseAdapter
@@ -23,6 +23,7 @@
     LANGSMITH_AVAILABLE = True
 except ImportError:
     LANGSMITH_AVAILABLE = False
+    Client = None  # type: ignore[misc]
 
 
 class LangSmithAdapter(BaseAdapter):
@@ -38,9 +39,11 @@ class LangSmithAdapter(BaseAdapter):
     def __init__(self, client: Optional[Any] = None) -> None:
         if not LANGSMITH_AVAILABLE:
             raise ImportError("LangSmith not installed. Install with: pip install 'eval-protocol[langsmith]'")
-        # Client is provided by langsmith package; typing is relaxed to Any to avoid
-        # static analysis issues when stubs aren't available.
-        self.client = client or Client()  # type: ignore[reportCallIssue]
+        if client is not None:
+            self.client = client
+        else:
+            assert Client is not None
+            self.client = cast(Any, Client)()
 
     def get_evaluation_rows(
         self,