Skip to content

Commit f1a6e72

Browse files
author
Dylan Huang
committed
Merge branch 'main' into rollout-logs-integration-part-2
2 parents f8a2e4c + 003a140 commit f1a6e72

File tree

12 files changed

+658
-97
lines changed

12 files changed

+658
-97
lines changed

eval_protocol/__init__.py

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,6 @@
2323
test_mcp,
2424
)
2525
from .data_loader import DynamicDataLoader, InlineDataLoader
26-
27-
# Try to import FireworksPolicy if available
28-
try:
29-
from .mcp_env import FireworksPolicy
30-
31-
_FIREWORKS_AVAILABLE = True
32-
except (ImportError, AttributeError):
33-
_FIREWORKS_AVAILABLE = False
34-
# Import submodules to make them available via eval_protocol.rewards, etc.
3526
from . import mcp, rewards
3627
from .models import EvaluateResult, Message, MetricResult, EvaluationRow, InputMetadata, Status
3728
from .playback_policy import PlaybackPolicyBase
@@ -42,6 +33,13 @@
4233
from .pytest import evaluation_test, SingleTurnRolloutProcessor, RemoteRolloutProcessor
4334
from .pytest.parameterize import DefaultParameterIdGenerator
4435

36+
from .types.remote_rollout_processor import (
37+
InitRequest,
38+
RolloutMetadata,
39+
StatusResponse,
40+
create_langfuse_config_tags,
41+
)
42+
4543
try:
4644
from .adapters import OpenAIResponsesAdapter
4745
except ImportError:
@@ -62,14 +60,6 @@
6260
except ImportError:
6361
LangSmithAdapter = None
6462

65-
# Remote server types
66-
from .types.remote_rollout_processor import (
67-
InitRequest,
68-
RolloutMetadata,
69-
StatusResponse,
70-
create_langfuse_config_tags,
71-
)
72-
7363
warnings.filterwarnings("default", category=DeprecationWarning, module="eval_protocol")
7464

7565
__all__ = [

eval_protocol/adapters/huggingface.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,9 @@
1313
logger = logging.getLogger(__name__)
1414

1515
try:
16-
from datasets import Dataset, DatasetDict, load_dataset
17-
18-
DATASETS_AVAILABLE = True
16+
from datasets import Dataset, DatasetDict, load_dataset # pyright: ignore[reportAttributeAccessIssue]
1917
except ImportError:
20-
DATASETS_AVAILABLE = False
21-
logger.warning("HuggingFace datasets not installed. Install with: pip install 'eval-protocol[huggingface]'")
18+
raise ImportError("HuggingFace datasets not installed. Install with: pip install 'eval-protocol[huggingface]'")
2219

2320
# Type alias for transformation function
2421
TransformFunction = Callable[[Dict[str, Any]], Dict[str, Any]]
@@ -80,11 +77,6 @@ def __init__(
8077
revision: Optional dataset revision/commit hash
8178
**load_dataset_kwargs: Additional arguments to pass to load_dataset
8279
"""
83-
if not DATASETS_AVAILABLE:
84-
raise ImportError(
85-
"HuggingFace datasets not installed. Install with: pip install 'eval-protocol[huggingface]'"
86-
)
87-
8880
self.dataset_id = dataset_id
8981
self.transform_fn = transform_fn
9082
self.config_name = config_name

eval_protocol/cli.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from .cli_commands.logs import logs_command
2929
from .cli_commands.preview import preview_command
3030
from .cli_commands.run_eval_cmd import hydra_cli_entry_point
31+
from .cli_commands.upload import upload_command
3132

3233

3334
def parse_args(args=None):
@@ -291,6 +292,44 @@ def parse_args(args=None):
291292
logs_parser = subparsers.add_parser("logs", help="Serve logs with file watching and real-time updates")
292293
logs_parser.add_argument("--port", type=int, default=8000, help="Port to bind to (default: 8000)")
293294

295+
# Upload command
296+
upload_parser = subparsers.add_parser(
297+
"upload",
298+
help="Scan for evaluation tests, select, and upload as Fireworks evaluators",
299+
)
300+
upload_parser.add_argument(
301+
"--path",
302+
default=".",
303+
help="Path to search for evaluation tests (default: current directory)",
304+
)
305+
upload_parser.add_argument(
306+
"--entry",
307+
help="Entrypoint of evaluation test to upload (module:function or path::function). For multiple, separate by commas.",
308+
)
309+
upload_parser.add_argument(
310+
"--id",
311+
help="Evaluator ID to use (if multiple selections, a numeric suffix is appended)",
312+
)
313+
upload_parser.add_argument(
314+
"--display-name",
315+
help="Display name for evaluator (defaults to ID)",
316+
)
317+
upload_parser.add_argument(
318+
"--description",
319+
help="Description for evaluator",
320+
)
321+
upload_parser.add_argument(
322+
"--force",
323+
action="store_true",
324+
help="Overwrite existing evaluator with the same ID",
325+
)
326+
upload_parser.add_argument(
327+
"--yes",
328+
"-y",
329+
action="store_true",
330+
help="Non-interactive: upload all discovered evaluation tests",
331+
)
332+
294333
# Run command (for Hydra-based evaluations)
295334
# This subparser intentionally defines no arguments itself.
296335
# All arguments after 'run' will be passed to Hydra by parse_known_args.
@@ -346,6 +385,8 @@ def main():
346385
return agent_eval_command(args)
347386
elif args.command == "logs":
348387
return logs_command(args)
388+
elif args.command == "upload":
389+
return upload_command(args)
349390
elif args.command == "run":
350391
# For the 'run' command, Hydra takes over argument parsing.
351392

0 commit comments

Comments
 (0)