diff --git a/eval_protocol/cli.py b/eval_protocol/cli.py index 90b620c1..e8125390 100644 --- a/eval_protocol/cli.py +++ b/eval_protocol/cli.py @@ -47,255 +47,256 @@ def parse_args(args=None): subparsers = parser.add_subparsers(dest="command", help="Command to run") - # Preview command - preview_parser = subparsers.add_parser("preview", help="Preview an evaluator with sample data") - preview_parser.add_argument( - "--metrics-folders", - "-m", - nargs="+", - help="Metric folders in format 'name=path', e.g., 'clarity=./metrics/clarity'", - ) - - # Make samples optional to allow HF dataset option - preview_parser.add_argument( - "--samples", - "-s", - required=False, - help="Path to JSONL file containing sample data", - ) - preview_parser.add_argument( - "--max-samples", - type=int, - default=5, - help="Maximum number of samples to process (default: 5)", - ) - - # Add HuggingFace dataset options - hf_group = preview_parser.add_argument_group("HuggingFace Dataset Options") - hf_group.add_argument( - "--huggingface-dataset", - "--hf", - help="HuggingFace dataset name (e.g., 'deepseek-ai/DeepSeek-ProverBench')", - ) - hf_group.add_argument( - "--huggingface-split", - default="train", - help="Dataset split to use (default: 'train')", - ) - hf_group.add_argument( - "--huggingface-prompt-key", - default="prompt", - help="Key in the dataset containing the prompt text (default: 'prompt')", - ) - hf_group.add_argument( - "--huggingface-response-key", - default="response", - help="Key in the dataset containing the response text (default: 'response')", - ) - hf_group.add_argument( - "--huggingface-key-map", - help="JSON mapping of dataset keys to Eval Protocol message keys", - ) - preview_parser.add_argument( - "--remote-url", - help="URL of a remote reward function endpoint to preview against. If provided, metrics-folders might be ignored.", - ) - - # Deploy command - deploy_parser = subparsers.add_parser("deploy", help="Create and deploy an evaluator, or register a remote one") - deploy_parser.add_argument("--id", required=True, help="ID for the evaluator") - deploy_parser.add_argument( - "--metrics-folders", - "-m", - nargs="+", - required=False, # No longer strictly required if --remote-url is used - help="Metric folders in format 'name=path', e.g., 'clarity=./metrics/clarity'. Required if not using --remote-url.", - ) - deploy_parser.add_argument( - "--display-name", - help="Display name for the evaluator (defaults to ID if not provided)", - ) - deploy_parser.add_argument("--description", help="Description for the evaluator") - deploy_parser.add_argument( - "--force", - "-f", - action="store_true", - help="Force update if evaluator already exists", - ) - - # Add HuggingFace dataset options to deploy command - hf_deploy_group = deploy_parser.add_argument_group("HuggingFace Dataset Options") - hf_deploy_group.add_argument( - "--huggingface-dataset", - "--hf", - help="HuggingFace dataset name (e.g., 'deepseek-ai/DeepSeek-ProverBench')", - ) - hf_deploy_group.add_argument( - "--huggingface-split", - default="train", - help="Dataset split to use (default: 'train')", - ) - hf_deploy_group.add_argument( - "--huggingface-prompt-key", - default="prompt", - help="Key in the dataset containing the prompt text (default: 'prompt')", - ) - hf_deploy_group.add_argument( - "--huggingface-response-key", - default="response", - help="Key in the dataset containing the response text (default: 'response')", - ) - hf_deploy_group.add_argument( - "--huggingface-key-map", - help="JSON mapping of dataset keys to Eval Protocol message keys", - ) - deploy_parser.add_argument( - "--remote-url", - help="URL of a pre-deployed remote reward function. If provided, deploys by registering this URL with Fireworks AI.", - ) - - # Deployment target options - target_group = deploy_parser.add_argument_group("Deployment Target Options") - target_group.add_argument( - "--target", - choices=["fireworks", "gcp-cloud-run", "local-serve"], - default="fireworks", - help="Deployment target. 'fireworks' for standard Fireworks platform deployment, 'gcp-cloud-run' for Google Cloud Run, 'local-serve' for local serving with Serveo tunneling.", - ) - target_group.add_argument( - "--function-ref", - help="Reference to the reward function to deploy (e.g., 'my_module.reward_func'). Required for 'gcp-cloud-run' and 'local-serve' targets.", - ) - - # Local serving options (relevant if --target is local-serve) - local_serve_group = deploy_parser.add_argument_group("Local Serving Options (used if --target is local-serve)") - local_serve_group.add_argument( - "--local-port", - type=int, - default=8001, - help="Port for the local reward function server to listen on (default: 8001). Used with --target local-serve.", - ) - - # GCP deployment options - gcp_group = deploy_parser.add_argument_group( - "GCP Cloud Run Deployment Options (used if --target is gcp-cloud-run)" - ) - # --function-ref is now in target_group - gcp_group.add_argument( - "--gcp-project", - required=False, - help="Google Cloud Project ID. Must be provided via CLI or rewardkit.yaml.", - ) - gcp_group.add_argument( - "--gcp-region", - required=False, - help="Google Cloud Region for deployment (e.g., 'us-central1'). Must be provided via CLI or rewardkit.yaml.", - ) - gcp_group.add_argument( - "--gcp-ar-repo", - required=False, - help="Google Artifact Registry repository name. Optional, defaults to value in rewardkit.yaml or 'eval-protocol-evaluators' if not specified.", - ) - gcp_group.add_argument( - "--service-account", - help="Email of the GCP service account to run the Cloud Run service. Optional.", - ) - gcp_group.add_argument( - "--entry-point", - default="reward_function", - help="The name of the entry point function within your --function-ref module (default: reward_function). Only for gcp-cloud-run.", - ) - gcp_group.add_argument( - "--runtime", - default="python311", # Or a sensible default - help="The Cloud Functions/Run runtime (e.g., python311). Only for gcp-cloud-run.", - ) - gcp_group.add_argument( - "--gcp-auth-mode", - choices=["open", "api-key"], # Add 'iam' later - default=None, # Default will be resolved in deploy_command - help="Authentication mode for the deployed GCP Cloud Run service. " - "'open': Publicly accessible. " - "'api-key': Service is publicly accessible but requires an API key in requests (handled by the application). " - "If not specified, defaults to value in rewardkit.yaml or 'api-key'. Optional.", - ) - - # Deploy MCP command - deploy_mcp_parser = subparsers.add_parser("deploy-mcp", help="Deploy an MCP server to Google Cloud Run") - deploy_mcp_parser.add_argument("--id", required=True, help="Unique ID for the MCP server deployment") - deploy_mcp_parser.add_argument( - "--mcp-server-module", - help="Python module containing the MCP server (e.g., 'examples.frozen_lake_mcp.frozen_lake_mcp_server'). Required if --dockerfile is not provided.", - ) - deploy_mcp_parser.add_argument( - "--dockerfile", - help="Path to Dockerfile to use for deployment (recommended for tested local Dockerfiles). When provided, --mcp-server-module is not required.", - ) - deploy_mcp_parser.add_argument( - "--gcp-project", - help="Google Cloud Project ID. Can also be set in rewardkit.yaml", - ) - deploy_mcp_parser.add_argument( - "--gcp-region", - help="Google Cloud Region (e.g., 'us-central1'). Can also be set in rewardkit.yaml", - ) - deploy_mcp_parser.add_argument( - "--gcp-ar-repo", - help="Google Artifact Registry repository name. Defaults to 'eval-protocol-mcp-servers'", - ) - deploy_mcp_parser.add_argument( - "--port", - type=int, - default=8000, - help="Port for the MCP server to listen on (default: 8000)", - ) - deploy_mcp_parser.add_argument( - "--python-version", - default="3.11", - help="Python version for the container (default: 3.11)", - ) - deploy_mcp_parser.add_argument("--requirements", help="Additional pip requirements (newline separated)") - deploy_mcp_parser.add_argument("--env-vars", nargs="*", help="Environment variables in KEY=VALUE format") - - # Agent-eval command - agent_eval_parser = subparsers.add_parser( - "agent-eval", help="Run agent evaluation using the ForkableResource framework." - ) - agent_eval_parser.add_argument( - "--task-def", - required=True, - help="Path to task definition file or directory containing task definitions.", - ) - agent_eval_parser.add_argument( - "--parallel", - action="store_true", - help="Execute tasks in parallel when multiple tasks are specified.", - ) - agent_eval_parser.add_argument( - "--max-concurrency", - type=int, - default=3, - help="Maximum number of tasks to execute in parallel (default: 3).", - ) - agent_eval_parser.add_argument( - "--filter", - nargs="+", - help="Run only tasks matching the specified task IDs.", - ) - agent_eval_parser.add_argument( - "--output-dir", - default="./agent_runs", - help="Directory to store agent evaluation run results (default: ./agent_runs).", - ) - agent_eval_parser.add_argument( - "--model", - help="Override MODEL_AGENT environment variable (format: provider/model_name).", - ) - agent_eval_parser.add_argument( - "--num-rollouts", - type=int, - help="Override the number of parallel rollouts to execute for each task.", - ) + # NOTE: The following commands are hidden/disabled. Uncomment to re-enable. + # # Preview command + # preview_parser = subparsers.add_parser("preview", help="Preview an evaluator with sample data") + # preview_parser.add_argument( + # "--metrics-folders", + # "-m", + # nargs="+", + # help="Metric folders in format 'name=path', e.g., 'clarity=./metrics/clarity'", + # ) + # + # # Make samples optional to allow HF dataset option + # preview_parser.add_argument( + # "--samples", + # "-s", + # required=False, + # help="Path to JSONL file containing sample data", + # ) + # preview_parser.add_argument( + # "--max-samples", + # type=int, + # default=5, + # help="Maximum number of samples to process (default: 5)", + # ) + # + # # Add HuggingFace dataset options + # hf_group = preview_parser.add_argument_group("HuggingFace Dataset Options") + # hf_group.add_argument( + # "--huggingface-dataset", + # "--hf", + # help="HuggingFace dataset name (e.g., 'deepseek-ai/DeepSeek-ProverBench')", + # ) + # hf_group.add_argument( + # "--huggingface-split", + # default="train", + # help="Dataset split to use (default: 'train')", + # ) + # hf_group.add_argument( + # "--huggingface-prompt-key", + # default="prompt", + # help="Key in the dataset containing the prompt text (default: 'prompt')", + # ) + # hf_group.add_argument( + # "--huggingface-response-key", + # default="response", + # help="Key in the dataset containing the response text (default: 'response')", + # ) + # hf_group.add_argument( + # "--huggingface-key-map", + # help="JSON mapping of dataset keys to Eval Protocol message keys", + # ) + # preview_parser.add_argument( + # "--remote-url", + # help="URL of a remote reward function endpoint to preview against. If provided, metrics-folders might be ignored.", + # ) + # + # # Deploy command + # deploy_parser = subparsers.add_parser("deploy", help="Create and deploy an evaluator, or register a remote one") + # deploy_parser.add_argument("--id", required=True, help="ID for the evaluator") + # deploy_parser.add_argument( + # "--metrics-folders", + # "-m", + # nargs="+", + # required=False, # No longer strictly required if --remote-url is used + # help="Metric folders in format 'name=path', e.g., 'clarity=./metrics/clarity'. Required if not using --remote-url.", + # ) + # deploy_parser.add_argument( + # "--display-name", + # help="Display name for the evaluator (defaults to ID if not provided)", + # ) + # deploy_parser.add_argument("--description", help="Description for the evaluator") + # deploy_parser.add_argument( + # "--force", + # "-f", + # action="store_true", + # help="Force update if evaluator already exists", + # ) + # + # # Add HuggingFace dataset options to deploy command + # hf_deploy_group = deploy_parser.add_argument_group("HuggingFace Dataset Options") + # hf_deploy_group.add_argument( + # "--huggingface-dataset", + # "--hf", + # help="HuggingFace dataset name (e.g., 'deepseek-ai/DeepSeek-ProverBench')", + # ) + # hf_deploy_group.add_argument( + # "--huggingface-split", + # default="train", + # help="Dataset split to use (default: 'train')", + # ) + # hf_deploy_group.add_argument( + # "--huggingface-prompt-key", + # default="prompt", + # help="Key in the dataset containing the prompt text (default: 'prompt')", + # ) + # hf_deploy_group.add_argument( + # "--huggingface-response-key", + # default="response", + # help="Key in the dataset containing the response text (default: 'response')", + # ) + # hf_deploy_group.add_argument( + # "--huggingface-key-map", + # help="JSON mapping of dataset keys to Eval Protocol message keys", + # ) + # deploy_parser.add_argument( + # "--remote-url", + # help="URL of a pre-deployed remote reward function. If provided, deploys by registering this URL with Fireworks AI.", + # ) + # + # # Deployment target options + # target_group = deploy_parser.add_argument_group("Deployment Target Options") + # target_group.add_argument( + # "--target", + # choices=["fireworks", "gcp-cloud-run", "local-serve"], + # default="fireworks", + # help="Deployment target. 'fireworks' for standard Fireworks platform deployment, 'gcp-cloud-run' for Google Cloud Run, 'local-serve' for local serving with Serveo tunneling.", + # ) + # target_group.add_argument( + # "--function-ref", + # help="Reference to the reward function to deploy (e.g., 'my_module.reward_func'). Required for 'gcp-cloud-run' and 'local-serve' targets.", + # ) + # + # # Local serving options (relevant if --target is local-serve) + # local_serve_group = deploy_parser.add_argument_group("Local Serving Options (used if --target is local-serve)") + # local_serve_group.add_argument( + # "--local-port", + # type=int, + # default=8001, + # help="Port for the local reward function server to listen on (default: 8001). Used with --target local-serve.", + # ) + # + # # GCP deployment options + # gcp_group = deploy_parser.add_argument_group( + # "GCP Cloud Run Deployment Options (used if --target is gcp-cloud-run)" + # ) + # # --function-ref is now in target_group + # gcp_group.add_argument( + # "--gcp-project", + # required=False, + # help="Google Cloud Project ID. Must be provided via CLI or rewardkit.yaml.", + # ) + # gcp_group.add_argument( + # "--gcp-region", + # required=False, + # help="Google Cloud Region for deployment (e.g., 'us-central1'). Must be provided via CLI or rewardkit.yaml.", + # ) + # gcp_group.add_argument( + # "--gcp-ar-repo", + # required=False, + # help="Google Artifact Registry repository name. Optional, defaults to value in rewardkit.yaml or 'eval-protocol-evaluators' if not specified.", + # ) + # gcp_group.add_argument( + # "--service-account", + # help="Email of the GCP service account to run the Cloud Run service. Optional.", + # ) + # gcp_group.add_argument( + # "--entry-point", + # default="reward_function", + # help="The name of the entry point function within your --function-ref module (default: reward_function). Only for gcp-cloud-run.", + # ) + # gcp_group.add_argument( + # "--runtime", + # default="python311", # Or a sensible default + # help="The Cloud Functions/Run runtime (e.g., python311). Only for gcp-cloud-run.", + # ) + # gcp_group.add_argument( + # "--gcp-auth-mode", + # choices=["open", "api-key"], # Add 'iam' later + # default=None, # Default will be resolved in deploy_command + # help="Authentication mode for the deployed GCP Cloud Run service. " + # "'open': Publicly accessible. " + # "'api-key': Service is publicly accessible but requires an API key in requests (handled by the application). " + # "If not specified, defaults to value in rewardkit.yaml or 'api-key'. Optional.", + # ) + # + # # Deploy MCP command + # deploy_mcp_parser = subparsers.add_parser("deploy-mcp", help="Deploy an MCP server to Google Cloud Run") + # deploy_mcp_parser.add_argument("--id", required=True, help="Unique ID for the MCP server deployment") + # deploy_mcp_parser.add_argument( + # "--mcp-server-module", + # help="Python module containing the MCP server (e.g., 'examples.frozen_lake_mcp.frozen_lake_mcp_server'). Required if --dockerfile is not provided.", + # ) + # deploy_mcp_parser.add_argument( + # "--dockerfile", + # help="Path to Dockerfile to use for deployment (recommended for tested local Dockerfiles). When provided, --mcp-server-module is not required.", + # ) + # deploy_mcp_parser.add_argument( + # "--gcp-project", + # help="Google Cloud Project ID. Can also be set in rewardkit.yaml", + # ) + # deploy_mcp_parser.add_argument( + # "--gcp-region", + # help="Google Cloud Region (e.g., 'us-central1'). Can also be set in rewardkit.yaml", + # ) + # deploy_mcp_parser.add_argument( + # "--gcp-ar-repo", + # help="Google Artifact Registry repository name. Defaults to 'eval-protocol-mcp-servers'", + # ) + # deploy_mcp_parser.add_argument( + # "--port", + # type=int, + # default=8000, + # help="Port for the MCP server to listen on (default: 8000)", + # ) + # deploy_mcp_parser.add_argument( + # "--python-version", + # default="3.11", + # help="Python version for the container (default: 3.11)", + # ) + # deploy_mcp_parser.add_argument("--requirements", help="Additional pip requirements (newline separated)") + # deploy_mcp_parser.add_argument("--env-vars", nargs="*", help="Environment variables in KEY=VALUE format") + # + # # Agent-eval command + # agent_eval_parser = subparsers.add_parser( + # "agent-eval", help="Run agent evaluation using the ForkableResource framework." + # ) + # agent_eval_parser.add_argument( + # "--task-def", + # required=True, + # help="Path to task definition file or directory containing task definitions.", + # ) + # agent_eval_parser.add_argument( + # "--parallel", + # action="store_true", + # help="Execute tasks in parallel when multiple tasks are specified.", + # ) + # agent_eval_parser.add_argument( + # "--max-concurrency", + # type=int, + # default=3, + # help="Maximum number of tasks to execute in parallel (default: 3).", + # ) + # agent_eval_parser.add_argument( + # "--filter", + # nargs="+", + # help="Run only tasks matching the specified task IDs.", + # ) + # agent_eval_parser.add_argument( + # "--output-dir", + # default="./agent_runs", + # help="Directory to store agent evaluation run results (default: ./agent_runs).", + # ) + # agent_eval_parser.add_argument( + # "--model", + # help="Override MODEL_AGENT environment variable (format: provider/model_name).", + # ) + # agent_eval_parser.add_argument( + # "--num-rollouts", + # type=int, + # help="Override the number of parallel rollouts to execute for each task.", + # ) # Logs command logs_parser = subparsers.add_parser("logs", help="Serve logs with file watching and real-time updates") @@ -485,13 +486,13 @@ def parse_args(args=None): help="Extra flags to pass to 'docker run' (quoted string, e.g. \"--env-file .env --memory=8g\")", ) - # Run command (for Hydra-based evaluations) - # This subparser intentionally defines no arguments itself. - # All arguments after 'run' will be passed to Hydra by parse_known_args. - subparsers.add_parser( - "run", - help="Run an evaluation using a Hydra configuration. All arguments after 'run' are passed to Hydra.", - ) + # # Run command (for Hydra-based evaluations) + # # This subparser intentionally defines no arguments itself. + # # All arguments after 'run' will be passed to Hydra by parse_known_args. + # subparsers.add_parser( + # "run", + # help="Run an evaluation using a Hydra configuration. All arguments after 'run' are passed to Hydra.", + # ) # Use parse_known_args to allow Hydra to handle its own arguments return parser.parse_known_args(args) @@ -586,23 +587,24 @@ def _extract_flag_value(argv_list, flag_name): setup_logging(args.verbose, getattr(args, "debug", False)) - if args.command == "preview": - if preview_command is None: - raise ImportError("preview_command is unavailable") - return preview_command(args) - elif args.command == "deploy": - if deploy_command is None: - raise ImportError("deploy_command is unavailable") - return deploy_command(args) - elif args.command == "deploy-mcp": - from .cli_commands.deploy_mcp import deploy_mcp_command - - return deploy_mcp_command(args) - elif args.command == "agent-eval": - from .cli_commands.agent_eval_cmd import agent_eval_command - - return agent_eval_command(args) - elif args.command == "logs": + # NOTE: The following command handlers are disabled. Uncomment to re-enable. + # if args.command == "preview": + # if preview_command is None: + # raise ImportError("preview_command is unavailable") + # return preview_command(args) + # elif args.command == "deploy": + # if deploy_command is None: + # raise ImportError("deploy_command is unavailable") + # return deploy_command(args) + # elif args.command == "deploy-mcp": + # from .cli_commands.deploy_mcp import deploy_mcp_command + # + # return deploy_mcp_command(args) + # elif args.command == "agent-eval": + # from .cli_commands.agent_eval_cmd import agent_eval_command + # + # return agent_eval_command(args) + if args.command == "logs": from .cli_commands.logs import logs_command return logs_command(args) @@ -621,89 +623,89 @@ def _extract_flag_value(argv_list, flag_name): from .cli_commands.local_test import local_test_command return local_test_command(args) - elif args.command == "run": - # For the 'run' command, Hydra takes over argument parsing. - - # Filter out the initial '--' if present in remaining_argv, which parse_known_args might add - hydra_specific_args = [arg for arg in remaining_argv if arg != "--"] - - # Auto-detect local conf directory and add it to config path if not explicitly provided - has_config_path = any(arg.startswith("--config-path") for arg in hydra_specific_args) - current_dir = os.getcwd() - local_conf_dir = os.path.join(current_dir, "conf") - - if not has_config_path and os.path.isdir(local_conf_dir): - logger.info("Auto-detected local conf directory: %s", local_conf_dir) - hydra_specific_args = [ - "--config-path", - local_conf_dir, - ] + hydra_specific_args - - processed_hydra_args = [] - i = 0 - while i < len(hydra_specific_args): - arg = hydra_specific_args[i] - if arg == "--config-path": - processed_hydra_args.append(arg) - i += 1 - if i < len(hydra_specific_args): - path_val = hydra_specific_args[i] - abs_path = os.path.abspath(path_val) - logger.debug( - "Converting relative --config-path '%s' (space separated) to absolute '%s'", - path_val, - abs_path, - ) - processed_hydra_args.append(abs_path) - else: - logger.error("--config-path specified without a value.") - elif arg.startswith("--config-path="): - flag_part, path_val = arg.split("=", 1) - processed_hydra_args.append(flag_part) - abs_path = os.path.abspath(path_val) - logger.debug( - "Converting relative --config-path '%s' (equals separated) to absolute '%s'", - path_val, - abs_path, - ) - processed_hydra_args.append(abs_path) - else: - processed_hydra_args.append(arg) - i += 1 - - sys.argv = [sys.argv[0]] + processed_hydra_args - logger.info("SYSCALL_ARGV_FOR_HYDRA (after potential abspath conversion): %s", sys.argv) - - try: - from .cli_commands.run_eval_cmd import hydra_cli_entry_point - - hydra_entry = cast(Any, hydra_cli_entry_point) - hydra_entry() # type: ignore # pylint: disable=no-value-for-parameter - return 0 - except Exception as e: # pylint: disable=broad-except - error_msg = str(e) - logger.error("Evaluation failed: %s", e) - - # Provide helpful suggestions for common Hydra/config errors - if "Cannot find primary config" in error_msg: - logger.error("HINT: Configuration file not found.") - logger.error("SOLUTION: Ensure you have a config file in ./conf/ directory") - logger.error("Try: eval-protocol run --config-name simple_uipath_eval") - elif "missing from config" in error_msg or "MissingMandatoryValue" in error_msg: - logger.error("HINT: Required configuration values are missing.") - logger.error("SOLUTION: Check your config file for missing required fields") - elif "Config search path" in error_msg: - logger.error("HINT: Hydra cannot find the configuration directory.") - logger.error("SOLUTION: Create a ./conf directory with your config files") - elif "ValidationError" in error_msg: - logger.error("HINT: Configuration validation failed.") - logger.error("SOLUTION: Run 'eval-protocol validate-data --file your_data.jsonl' to check data") - - logger.error("\nQuick fix suggestions:") - logger.error("1. Use the simplified setup: eval-protocol run --config-name simple_uipath_eval") - logger.error("2. Validate your data first: eval-protocol validate-data --file data.jsonl --schema agent") - logger.error("3. Ensure you have: ./conf/simple_uipath_eval.yaml and ./uipath_reward.py") - return 1 + # elif args.command == "run": + # # For the 'run' command, Hydra takes over argument parsing. + # + # # Filter out the initial '--' if present in remaining_argv, which parse_known_args might add + # hydra_specific_args = [arg for arg in remaining_argv if arg != "--"] + # + # # Auto-detect local conf directory and add it to config path if not explicitly provided + # has_config_path = any(arg.startswith("--config-path") for arg in hydra_specific_args) + # current_dir = os.getcwd() + # local_conf_dir = os.path.join(current_dir, "conf") + # + # if not has_config_path and os.path.isdir(local_conf_dir): + # logger.info("Auto-detected local conf directory: %s", local_conf_dir) + # hydra_specific_args = [ + # "--config-path", + # local_conf_dir, + # ] + hydra_specific_args + # + # processed_hydra_args = [] + # i = 0 + # while i < len(hydra_specific_args): + # arg = hydra_specific_args[i] + # if arg == "--config-path": + # processed_hydra_args.append(arg) + # i += 1 + # if i < len(hydra_specific_args): + # path_val = hydra_specific_args[i] + # abs_path = os.path.abspath(path_val) + # logger.debug( + # "Converting relative --config-path '%s' (space separated) to absolute '%s'", + # path_val, + # abs_path, + # ) + # processed_hydra_args.append(abs_path) + # else: + # logger.error("--config-path specified without a value.") + # elif arg.startswith("--config-path="): + # flag_part, path_val = arg.split("=", 1) + # processed_hydra_args.append(flag_part) + # abs_path = os.path.abspath(path_val) + # logger.debug( + # "Converting relative --config-path '%s' (equals separated) to absolute '%s'", + # path_val, + # abs_path, + # ) + # processed_hydra_args.append(abs_path) + # else: + # processed_hydra_args.append(arg) + # i += 1 + # + # sys.argv = [sys.argv[0]] + processed_hydra_args + # logger.info("SYSCALL_ARGV_FOR_HYDRA (after potential abspath conversion): %s", sys.argv) + # + # try: + # from .cli_commands.run_eval_cmd import hydra_cli_entry_point + # + # hydra_entry = cast(Any, hydra_cli_entry_point) + # hydra_entry() # type: ignore # pylint: disable=no-value-for-parameter + # return 0 + # except Exception as e: # pylint: disable=broad-except + # error_msg = str(e) + # logger.error("Evaluation failed: %s", e) + # + # # Provide helpful suggestions for common Hydra/config errors + # if "Cannot find primary config" in error_msg: + # logger.error("HINT: Configuration file not found.") + # logger.error("SOLUTION: Ensure you have a config file in ./conf/ directory") + # logger.error("Try: eval-protocol run --config-name simple_uipath_eval") + # elif "missing from config" in error_msg or "MissingMandatoryValue" in error_msg: + # logger.error("HINT: Required configuration values are missing.") + # logger.error("SOLUTION: Check your config file for missing required fields") + # elif "Config search path" in error_msg: + # logger.error("HINT: Hydra cannot find the configuration directory.") + # logger.error("SOLUTION: Create a ./conf directory with your config files") + # elif "ValidationError" in error_msg: + # logger.error("HINT: Configuration validation failed.") + # logger.error("SOLUTION: Run 'eval-protocol validate-data --file your_data.jsonl' to check data") + # + # logger.error("\nQuick fix suggestions:") + # logger.error("1. Use the simplified setup: eval-protocol run --config-name simple_uipath_eval") + # logger.error("2. Validate your data first: eval-protocol validate-data --file data.jsonl --schema agent") + # logger.error("3. Ensure you have: ./conf/simple_uipath_eval.yaml and ./uipath_reward.py") + # return 1 else: temp_parser = argparse.ArgumentParser(prog=os.path.basename(original_script_name)) temp_parser.print_help() diff --git a/tests/test_cli.py b/tests/test_cli.py index 8e852940..050b98d6 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -11,6 +11,7 @@ class TestCLI: """Tests for the CLI functionality.""" + @pytest.mark.skip(reason="preview and deploy commands are currently disabled in cli.py") def test_parse_args(self): """Test the argument parser.""" # Test preview command diff --git a/tests/test_cli_args.py b/tests/test_cli_args.py index 21817879..9eccf9e0 100644 --- a/tests/test_cli_args.py +++ b/tests/test_cli_args.py @@ -6,6 +6,7 @@ from eval_protocol.cli import parse_args +@pytest.mark.skip(reason="preview and deploy commands are currently disabled in cli.py") class TestCliArgParsing: # --- Tests for 'preview' command --- def test_preview_with_remote_url_and_samples(self): diff --git a/tests/test_minimal.py b/tests/test_minimal.py index 3c55f8a8..94b143cb 100644 --- a/tests/test_minimal.py +++ b/tests/test_minimal.py @@ -14,6 +14,7 @@ import pytest +@pytest.mark.skip(reason="agent-eval command is currently disabled in cli.py") def test_cli_help(): """Test that the CLI help message works.""" result = subprocess.run(["eval-protocol", "--help"], capture_output=True, text=True, check=False) @@ -25,6 +26,7 @@ def test_cli_help(): assert "agent-eval" in result.stdout +@pytest.mark.skip(reason="agent-eval command is currently disabled in cli.py") def test_cli_agent_eval_help(): """Test that the agent-eval help message works.""" result = subprocess.run(