diff --git a/eval_protocol/cli.py b/eval_protocol/cli.py
index 90b620c1..e8125390 100644
--- a/eval_protocol/cli.py
+++ b/eval_protocol/cli.py
@@ -47,255 +47,256 @@ def parse_args(args=None):
 
     subparsers = parser.add_subparsers(dest="command", help="Command to run")
 
-    # Preview command
-    preview_parser = subparsers.add_parser("preview", help="Preview an evaluator with sample data")
-    preview_parser.add_argument(
-        "--metrics-folders",
-        "-m",
-        nargs="+",
-        help="Metric folders in format 'name=path', e.g., 'clarity=./metrics/clarity'",
-    )
-
-    # Make samples optional to allow HF dataset option
-    preview_parser.add_argument(
-        "--samples",
-        "-s",
-        required=False,
-        help="Path to JSONL file containing sample data",
-    )
-    preview_parser.add_argument(
-        "--max-samples",
-        type=int,
-        default=5,
-        help="Maximum number of samples to process (default: 5)",
-    )
-
-    # Add HuggingFace dataset options
-    hf_group = preview_parser.add_argument_group("HuggingFace Dataset Options")
-    hf_group.add_argument(
-        "--huggingface-dataset",
-        "--hf",
-        help="HuggingFace dataset name (e.g., 'deepseek-ai/DeepSeek-ProverBench')",
-    )
-    hf_group.add_argument(
-        "--huggingface-split",
-        default="train",
-        help="Dataset split to use (default: 'train')",
-    )
-    hf_group.add_argument(
-        "--huggingface-prompt-key",
-        default="prompt",
-        help="Key in the dataset containing the prompt text (default: 'prompt')",
-    )
-    hf_group.add_argument(
-        "--huggingface-response-key",
-        default="response",
-        help="Key in the dataset containing the response text (default: 'response')",
-    )
-    hf_group.add_argument(
-        "--huggingface-key-map",
-        help="JSON mapping of dataset keys to Eval Protocol message keys",
-    )
-    preview_parser.add_argument(
-        "--remote-url",
-        help="URL of a remote reward function endpoint to preview against. If provided, metrics-folders might be ignored.",
-    )
-
-    # Deploy command
-    deploy_parser = subparsers.add_parser("deploy", help="Create and deploy an evaluator, or register a remote one")
-    deploy_parser.add_argument("--id", required=True, help="ID for the evaluator")
-    deploy_parser.add_argument(
-        "--metrics-folders",
-        "-m",
-        nargs="+",
-        required=False,  # No longer strictly required if --remote-url is used
-        help="Metric folders in format 'name=path', e.g., 'clarity=./metrics/clarity'. Required if not using --remote-url.",
-    )
-    deploy_parser.add_argument(
-        "--display-name",
-        help="Display name for the evaluator (defaults to ID if not provided)",
-    )
-    deploy_parser.add_argument("--description", help="Description for the evaluator")
-    deploy_parser.add_argument(
-        "--force",
-        "-f",
-        action="store_true",
-        help="Force update if evaluator already exists",
-    )
-
-    # Add HuggingFace dataset options to deploy command
-    hf_deploy_group = deploy_parser.add_argument_group("HuggingFace Dataset Options")
-    hf_deploy_group.add_argument(
-        "--huggingface-dataset",
-        "--hf",
-        help="HuggingFace dataset name (e.g., 'deepseek-ai/DeepSeek-ProverBench')",
-    )
-    hf_deploy_group.add_argument(
-        "--huggingface-split",
-        default="train",
-        help="Dataset split to use (default: 'train')",
-    )
-    hf_deploy_group.add_argument(
-        "--huggingface-prompt-key",
-        default="prompt",
-        help="Key in the dataset containing the prompt text (default: 'prompt')",
-    )
-    hf_deploy_group.add_argument(
-        "--huggingface-response-key",
-        default="response",
-        help="Key in the dataset containing the response text (default: 'response')",
-    )
-    hf_deploy_group.add_argument(
-        "--huggingface-key-map",
-        help="JSON mapping of dataset keys to Eval Protocol message keys",
-    )
-    deploy_parser.add_argument(
-        "--remote-url",
-        help="URL of a pre-deployed remote reward function. If provided, deploys by registering this URL with Fireworks AI.",
-    )
-
-    # Deployment target options
-    target_group = deploy_parser.add_argument_group("Deployment Target Options")
-    target_group.add_argument(
-        "--target",
-        choices=["fireworks", "gcp-cloud-run", "local-serve"],
-        default="fireworks",
-        help="Deployment target. 'fireworks' for standard Fireworks platform deployment, 'gcp-cloud-run' for Google Cloud Run, 'local-serve' for local serving with Serveo tunneling.",
-    )
-    target_group.add_argument(
-        "--function-ref",
-        help="Reference to the reward function to deploy (e.g., 'my_module.reward_func'). Required for 'gcp-cloud-run' and 'local-serve' targets.",
-    )
-
-    # Local serving options (relevant if --target is local-serve)
-    local_serve_group = deploy_parser.add_argument_group("Local Serving Options (used if --target is local-serve)")
-    local_serve_group.add_argument(
-        "--local-port",
-        type=int,
-        default=8001,
-        help="Port for the local reward function server to listen on (default: 8001). Used with --target local-serve.",
-    )
-
-    # GCP deployment options
-    gcp_group = deploy_parser.add_argument_group(
-        "GCP Cloud Run Deployment Options (used if --target is gcp-cloud-run)"
-    )
-    # --function-ref is now in target_group
-    gcp_group.add_argument(
-        "--gcp-project",
-        required=False,
-        help="Google Cloud Project ID. Must be provided via CLI or rewardkit.yaml.",
-    )
-    gcp_group.add_argument(
-        "--gcp-region",
-        required=False,
-        help="Google Cloud Region for deployment (e.g., 'us-central1'). Must be provided via CLI or rewardkit.yaml.",
-    )
-    gcp_group.add_argument(
-        "--gcp-ar-repo",
-        required=False,
-        help="Google Artifact Registry repository name. Optional, defaults to value in rewardkit.yaml or 'eval-protocol-evaluators' if not specified.",
-    )
-    gcp_group.add_argument(
-        "--service-account",
-        help="Email of the GCP service account to run the Cloud Run service. Optional.",
-    )
-    gcp_group.add_argument(
-        "--entry-point",
-        default="reward_function",
-        help="The name of the entry point function within your --function-ref module (default: reward_function). Only for gcp-cloud-run.",
-    )
-    gcp_group.add_argument(
-        "--runtime",
-        default="python311",  # Or a sensible default
-        help="The Cloud Functions/Run runtime (e.g., python311). Only for gcp-cloud-run.",
-    )
-    gcp_group.add_argument(
-        "--gcp-auth-mode",
-        choices=["open", "api-key"],  # Add 'iam' later
-        default=None,  # Default will be resolved in deploy_command
-        help="Authentication mode for the deployed GCP Cloud Run service. "
-        "'open': Publicly accessible. "
-        "'api-key': Service is publicly accessible but requires an API key in requests (handled by the application). "
-        "If not specified, defaults to value in rewardkit.yaml or 'api-key'. Optional.",
-    )
-
-    # Deploy MCP command
-    deploy_mcp_parser = subparsers.add_parser("deploy-mcp", help="Deploy an MCP server to Google Cloud Run")
-    deploy_mcp_parser.add_argument("--id", required=True, help="Unique ID for the MCP server deployment")
-    deploy_mcp_parser.add_argument(
-        "--mcp-server-module",
-        help="Python module containing the MCP server (e.g., 'examples.frozen_lake_mcp.frozen_lake_mcp_server'). Required if --dockerfile is not provided.",
-    )
-    deploy_mcp_parser.add_argument(
-        "--dockerfile",
-        help="Path to Dockerfile to use for deployment (recommended for tested local Dockerfiles). When provided, --mcp-server-module is not required.",
-    )
-    deploy_mcp_parser.add_argument(
-        "--gcp-project",
-        help="Google Cloud Project ID. Can also be set in rewardkit.yaml",
-    )
-    deploy_mcp_parser.add_argument(
-        "--gcp-region",
-        help="Google Cloud Region (e.g., 'us-central1'). Can also be set in rewardkit.yaml",
-    )
-    deploy_mcp_parser.add_argument(
-        "--gcp-ar-repo",
-        help="Google Artifact Registry repository name. Defaults to 'eval-protocol-mcp-servers'",
-    )
-    deploy_mcp_parser.add_argument(
-        "--port",
-        type=int,
-        default=8000,
-        help="Port for the MCP server to listen on (default: 8000)",
-    )
-    deploy_mcp_parser.add_argument(
-        "--python-version",
-        default="3.11",
-        help="Python version for the container (default: 3.11)",
-    )
-    deploy_mcp_parser.add_argument("--requirements", help="Additional pip requirements (newline separated)")
-    deploy_mcp_parser.add_argument("--env-vars", nargs="*", help="Environment variables in KEY=VALUE format")
-
-    # Agent-eval command
-    agent_eval_parser = subparsers.add_parser(
-        "agent-eval", help="Run agent evaluation using the ForkableResource framework."
-    )
-    agent_eval_parser.add_argument(
-        "--task-def",
-        required=True,
-        help="Path to task definition file or directory containing task definitions.",
-    )
-    agent_eval_parser.add_argument(
-        "--parallel",
-        action="store_true",
-        help="Execute tasks in parallel when multiple tasks are specified.",
-    )
-    agent_eval_parser.add_argument(
-        "--max-concurrency",
-        type=int,
-        default=3,
-        help="Maximum number of tasks to execute in parallel (default: 3).",
-    )
-    agent_eval_parser.add_argument(
-        "--filter",
-        nargs="+",
-        help="Run only tasks matching the specified task IDs.",
-    )
-    agent_eval_parser.add_argument(
-        "--output-dir",
-        default="./agent_runs",
-        help="Directory to store agent evaluation run results (default: ./agent_runs).",
-    )
-    agent_eval_parser.add_argument(
-        "--model",
-        help="Override MODEL_AGENT environment variable (format: provider/model_name).",
-    )
-    agent_eval_parser.add_argument(
-        "--num-rollouts",
-        type=int,
-        help="Override the number of parallel rollouts to execute for each task.",
-    )
+    # NOTE: The following commands are hidden/disabled. Uncomment to re-enable.
+    # # Preview command
+    # preview_parser = subparsers.add_parser("preview", help="Preview an evaluator with sample data")
+    # preview_parser.add_argument(
+    #     "--metrics-folders",
+    #     "-m",
+    #     nargs="+",
+    #     help="Metric folders in format 'name=path', e.g., 'clarity=./metrics/clarity'",
+    # )
+    #
+    # # Make samples optional to allow HF dataset option
+    # preview_parser.add_argument(
+    #     "--samples",
+    #     "-s",
+    #     required=False,
+    #     help="Path to JSONL file containing sample data",
+    # )
+    # preview_parser.add_argument(
+    #     "--max-samples",
+    #     type=int,
+    #     default=5,
+    #     help="Maximum number of samples to process (default: 5)",
+    # )
+    #
+    # # Add HuggingFace dataset options
+    # hf_group = preview_parser.add_argument_group("HuggingFace Dataset Options")
+    # hf_group.add_argument(
+    #     "--huggingface-dataset",
+    #     "--hf",
+    #     help="HuggingFace dataset name (e.g., 'deepseek-ai/DeepSeek-ProverBench')",
+    # )
+    # hf_group.add_argument(
+    #     "--huggingface-split",
+    #     default="train",
+    #     help="Dataset split to use (default: 'train')",
+    # )
+    # hf_group.add_argument(
+    #     "--huggingface-prompt-key",
+    #     default="prompt",
+    #     help="Key in the dataset containing the prompt text (default: 'prompt')",
+    # )
+    # hf_group.add_argument(
+    #     "--huggingface-response-key",
+    #     default="response",
+    #     help="Key in the dataset containing the response text (default: 'response')",
+    # )
+    # hf_group.add_argument(
+    #     "--huggingface-key-map",
+    #     help="JSON mapping of dataset keys to Eval Protocol message keys",
+    # )
+    # preview_parser.add_argument(
+    #     "--remote-url",
+    #     help="URL of a remote reward function endpoint to preview against. If provided, metrics-folders might be ignored.",
+    # )
+    #
+    # # Deploy command
+    # deploy_parser = subparsers.add_parser("deploy", help="Create and deploy an evaluator, or register a remote one")
+    # deploy_parser.add_argument("--id", required=True, help="ID for the evaluator")
+    # deploy_parser.add_argument(
+    #     "--metrics-folders",
+    #     "-m",
+    #     nargs="+",
+    #     required=False,  # No longer strictly required if --remote-url is used
+    #     help="Metric folders in format 'name=path', e.g., 'clarity=./metrics/clarity'. Required if not using --remote-url.",
+    # )
+    # deploy_parser.add_argument(
+    #     "--display-name",
+    #     help="Display name for the evaluator (defaults to ID if not provided)",
+    # )
+    # deploy_parser.add_argument("--description", help="Description for the evaluator")
+    # deploy_parser.add_argument(
+    #     "--force",
+    #     "-f",
+    #     action="store_true",
+    #     help="Force update if evaluator already exists",
+    # )
+    #
+    # # Add HuggingFace dataset options to deploy command
+    # hf_deploy_group = deploy_parser.add_argument_group("HuggingFace Dataset Options")
+    # hf_deploy_group.add_argument(
+    #     "--huggingface-dataset",
+    #     "--hf",
+    #     help="HuggingFace dataset name (e.g., 'deepseek-ai/DeepSeek-ProverBench')",
+    # )
+    # hf_deploy_group.add_argument(
+    #     "--huggingface-split",
+    #     default="train",
+    #     help="Dataset split to use (default: 'train')",
+    # )
+    # hf_deploy_group.add_argument(
+    #     "--huggingface-prompt-key",
+    #     default="prompt",
+    #     help="Key in the dataset containing the prompt text (default: 'prompt')",
+    # )
+    # hf_deploy_group.add_argument(
+    #     "--huggingface-response-key",
+    #     default="response",
+    #     help="Key in the dataset containing the response text (default: 'response')",
+    # )
+    # hf_deploy_group.add_argument(
+    #     "--huggingface-key-map",
+    #     help="JSON mapping of dataset keys to Eval Protocol message keys",
+    # )
+    # deploy_parser.add_argument(
+    #     "--remote-url",
+    #     help="URL of a pre-deployed remote reward function. If provided, deploys by registering this URL with Fireworks AI.",
+    # )
+    #
+    # # Deployment target options
+    # target_group = deploy_parser.add_argument_group("Deployment Target Options")
+    # target_group.add_argument(
+    #     "--target",
+    #     choices=["fireworks", "gcp-cloud-run", "local-serve"],
+    #     default="fireworks",
+    #     help="Deployment target. 'fireworks' for standard Fireworks platform deployment, 'gcp-cloud-run' for Google Cloud Run, 'local-serve' for local serving with Serveo tunneling.",
+    # )
+    # target_group.add_argument(
+    #     "--function-ref",
+    #     help="Reference to the reward function to deploy (e.g., 'my_module.reward_func'). Required for 'gcp-cloud-run' and 'local-serve' targets.",
+    # )
+    #
+    # # Local serving options (relevant if --target is local-serve)
+    # local_serve_group = deploy_parser.add_argument_group("Local Serving Options (used if --target is local-serve)")
+    # local_serve_group.add_argument(
+    #     "--local-port",
+    #     type=int,
+    #     default=8001,
+    #     help="Port for the local reward function server to listen on (default: 8001). Used with --target local-serve.",
+    # )
+    #
+    # # GCP deployment options
+    # gcp_group = deploy_parser.add_argument_group(
+    #     "GCP Cloud Run Deployment Options (used if --target is gcp-cloud-run)"
+    # )
+    # # --function-ref is now in target_group
+    # gcp_group.add_argument(
+    #     "--gcp-project",
+    #     required=False,
+    #     help="Google Cloud Project ID. Must be provided via CLI or rewardkit.yaml.",
+    # )
+    # gcp_group.add_argument(
+    #     "--gcp-region",
+    #     required=False,
+    #     help="Google Cloud Region for deployment (e.g., 'us-central1'). Must be provided via CLI or rewardkit.yaml.",
+    # )
+    # gcp_group.add_argument(
+    #     "--gcp-ar-repo",
+    #     required=False,
+    #     help="Google Artifact Registry repository name. Optional, defaults to value in rewardkit.yaml or 'eval-protocol-evaluators' if not specified.",
+    # )
+    # gcp_group.add_argument(
+    #     "--service-account",
+    #     help="Email of the GCP service account to run the Cloud Run service. Optional.",
+    # )
+    # gcp_group.add_argument(
+    #     "--entry-point",
+    #     default="reward_function",
+    #     help="The name of the entry point function within your --function-ref module (default: reward_function). Only for gcp-cloud-run.",
+    # )
+    # gcp_group.add_argument(
+    #     "--runtime",
+    #     default="python311",  # Or a sensible default
+    #     help="The Cloud Functions/Run runtime (e.g., python311). Only for gcp-cloud-run.",
+    # )
+    # gcp_group.add_argument(
+    #     "--gcp-auth-mode",
+    #     choices=["open", "api-key"],  # Add 'iam' later
+    #     default=None,  # Default will be resolved in deploy_command
+    #     help="Authentication mode for the deployed GCP Cloud Run service. "
+    #     "'open': Publicly accessible. "
+    #     "'api-key': Service is publicly accessible but requires an API key in requests (handled by the application). "
+    #     "If not specified, defaults to value in rewardkit.yaml or 'api-key'. Optional.",
+    # )
+    #
+    # # Deploy MCP command
+    # deploy_mcp_parser = subparsers.add_parser("deploy-mcp", help="Deploy an MCP server to Google Cloud Run")
+    # deploy_mcp_parser.add_argument("--id", required=True, help="Unique ID for the MCP server deployment")
+    # deploy_mcp_parser.add_argument(
+    #     "--mcp-server-module",
+    #     help="Python module containing the MCP server (e.g., 'examples.frozen_lake_mcp.frozen_lake_mcp_server'). Required if --dockerfile is not provided.",
+    # )
+    # deploy_mcp_parser.add_argument(
+    #     "--dockerfile",
+    #     help="Path to Dockerfile to use for deployment (recommended for tested local Dockerfiles). When provided, --mcp-server-module is not required.",
+    # )
+    # deploy_mcp_parser.add_argument(
+    #     "--gcp-project",
+    #     help="Google Cloud Project ID. Can also be set in rewardkit.yaml",
+    # )
+    # deploy_mcp_parser.add_argument(
+    #     "--gcp-region",
+    #     help="Google Cloud Region (e.g., 'us-central1'). Can also be set in rewardkit.yaml",
+    # )
+    # deploy_mcp_parser.add_argument(
+    #     "--gcp-ar-repo",
+    #     help="Google Artifact Registry repository name. Defaults to 'eval-protocol-mcp-servers'",
+    # )
+    # deploy_mcp_parser.add_argument(
+    #     "--port",
+    #     type=int,
+    #     default=8000,
+    #     help="Port for the MCP server to listen on (default: 8000)",
+    # )
+    # deploy_mcp_parser.add_argument(
+    #     "--python-version",
+    #     default="3.11",
+    #     help="Python version for the container (default: 3.11)",
+    # )
+    # deploy_mcp_parser.add_argument("--requirements", help="Additional pip requirements (newline separated)")
+    # deploy_mcp_parser.add_argument("--env-vars", nargs="*", help="Environment variables in KEY=VALUE format")
+    #
+    # # Agent-eval command
+    # agent_eval_parser = subparsers.add_parser(
+    #     "agent-eval", help="Run agent evaluation using the ForkableResource framework."
+    # )
+    # agent_eval_parser.add_argument(
+    #     "--task-def",
+    #     required=True,
+    #     help="Path to task definition file or directory containing task definitions.",
+    # )
+    # agent_eval_parser.add_argument(
+    #     "--parallel",
+    #     action="store_true",
+    #     help="Execute tasks in parallel when multiple tasks are specified.",
+    # )
+    # agent_eval_parser.add_argument(
+    #     "--max-concurrency",
+    #     type=int,
+    #     default=3,
+    #     help="Maximum number of tasks to execute in parallel (default: 3).",
+    # )
+    # agent_eval_parser.add_argument(
+    #     "--filter",
+    #     nargs="+",
+    #     help="Run only tasks matching the specified task IDs.",
+    # )
+    # agent_eval_parser.add_argument(
+    #     "--output-dir",
+    #     default="./agent_runs",
+    #     help="Directory to store agent evaluation run results (default: ./agent_runs).",
+    # )
+    # agent_eval_parser.add_argument(
+    #     "--model",
+    #     help="Override MODEL_AGENT environment variable (format: provider/model_name).",
+    # )
+    # agent_eval_parser.add_argument(
+    #     "--num-rollouts",
+    #     type=int,
+    #     help="Override the number of parallel rollouts to execute for each task.",
+    # )
 
     # Logs command
     logs_parser = subparsers.add_parser("logs", help="Serve logs with file watching and real-time updates")
@@ -485,13 +486,13 @@ def parse_args(args=None):
         help="Extra flags to pass to 'docker run' (quoted string, e.g. \"--env-file .env --memory=8g\")",
     )
 
-    # Run command (for Hydra-based evaluations)
-    # This subparser intentionally defines no arguments itself.
-    # All arguments after 'run' will be passed to Hydra by parse_known_args.
-    subparsers.add_parser(
-        "run",
-        help="Run an evaluation using a Hydra configuration. All arguments after 'run' are passed to Hydra.",
-    )
+    # # Run command (for Hydra-based evaluations)
+    # # This subparser intentionally defines no arguments itself.
+    # # All arguments after 'run' will be passed to Hydra by parse_known_args.
+    # subparsers.add_parser(
+    #     "run",
+    #     help="Run an evaluation using a Hydra configuration. All arguments after 'run' are passed to Hydra.",
+    # )
 
     # Use parse_known_args to allow Hydra to handle its own arguments
     return parser.parse_known_args(args)
@@ -586,23 +587,24 @@ def _extract_flag_value(argv_list, flag_name):
 
     setup_logging(args.verbose, getattr(args, "debug", False))
 
-    if args.command == "preview":
-        if preview_command is None:
-            raise ImportError("preview_command is unavailable")
-        return preview_command(args)
-    elif args.command == "deploy":
-        if deploy_command is None:
-            raise ImportError("deploy_command is unavailable")
-        return deploy_command(args)
-    elif args.command == "deploy-mcp":
-        from .cli_commands.deploy_mcp import deploy_mcp_command
-
-        return deploy_mcp_command(args)
-    elif args.command == "agent-eval":
-        from .cli_commands.agent_eval_cmd import agent_eval_command
-
-        return agent_eval_command(args)
-    elif args.command == "logs":
+    # NOTE: The following command handlers are disabled. Uncomment to re-enable.
+    # if args.command == "preview":
+    #     if preview_command is None:
+    #         raise ImportError("preview_command is unavailable")
+    #     return preview_command(args)
+    # elif args.command == "deploy":
+    #     if deploy_command is None:
+    #         raise ImportError("deploy_command is unavailable")
+    #     return deploy_command(args)
+    # elif args.command == "deploy-mcp":
+    #     from .cli_commands.deploy_mcp import deploy_mcp_command
+    #
+    #     return deploy_mcp_command(args)
+    # elif args.command == "agent-eval":
+    #     from .cli_commands.agent_eval_cmd import agent_eval_command
+    #
+    #     return agent_eval_command(args)
+    if args.command == "logs":
         from .cli_commands.logs import logs_command
 
         return logs_command(args)
@@ -621,89 +623,89 @@ def _extract_flag_value(argv_list, flag_name):
         from .cli_commands.local_test import local_test_command
 
         return local_test_command(args)
-    elif args.command == "run":
-        # For the 'run' command, Hydra takes over argument parsing.
-
-        # Filter out the initial '--' if present in remaining_argv, which parse_known_args might add
-        hydra_specific_args = [arg for arg in remaining_argv if arg != "--"]
-
-        # Auto-detect local conf directory and add it to config path if not explicitly provided
-        has_config_path = any(arg.startswith("--config-path") for arg in hydra_specific_args)
-        current_dir = os.getcwd()
-        local_conf_dir = os.path.join(current_dir, "conf")
-
-        if not has_config_path and os.path.isdir(local_conf_dir):
-            logger.info("Auto-detected local conf directory: %s", local_conf_dir)
-            hydra_specific_args = [
-                "--config-path",
-                local_conf_dir,
-            ] + hydra_specific_args
-
-        processed_hydra_args = []
-        i = 0
-        while i < len(hydra_specific_args):
-            arg = hydra_specific_args[i]
-            if arg == "--config-path":
-                processed_hydra_args.append(arg)
-                i += 1
-                if i < len(hydra_specific_args):
-                    path_val = hydra_specific_args[i]
-                    abs_path = os.path.abspath(path_val)
-                    logger.debug(
-                        "Converting relative --config-path '%s' (space separated) to absolute '%s'",
-                        path_val,
-                        abs_path,
-                    )
-                    processed_hydra_args.append(abs_path)
-                else:
-                    logger.error("--config-path specified without a value.")
-            elif arg.startswith("--config-path="):
-                flag_part, path_val = arg.split("=", 1)
-                processed_hydra_args.append(flag_part)
-                abs_path = os.path.abspath(path_val)
-                logger.debug(
-                    "Converting relative --config-path '%s' (equals separated) to absolute '%s'",
-                    path_val,
-                    abs_path,
-                )
-                processed_hydra_args.append(abs_path)
-            else:
-                processed_hydra_args.append(arg)
-            i += 1
-
-        sys.argv = [sys.argv[0]] + processed_hydra_args
-        logger.info("SYSCALL_ARGV_FOR_HYDRA (after potential abspath conversion): %s", sys.argv)
-
-        try:
-            from .cli_commands.run_eval_cmd import hydra_cli_entry_point
-
-            hydra_entry = cast(Any, hydra_cli_entry_point)
-            hydra_entry()  # type: ignore  # pylint: disable=no-value-for-parameter
-            return 0
-        except Exception as e:  # pylint: disable=broad-except
-            error_msg = str(e)
-            logger.error("Evaluation failed: %s", e)
-
-            # Provide helpful suggestions for common Hydra/config errors
-            if "Cannot find primary config" in error_msg:
-                logger.error("HINT: Configuration file not found.")
-                logger.error("SOLUTION: Ensure you have a config file in ./conf/ directory")
-                logger.error("Try: eval-protocol run --config-name simple_uipath_eval")
-            elif "missing from config" in error_msg or "MissingMandatoryValue" in error_msg:
-                logger.error("HINT: Required configuration values are missing.")
-                logger.error("SOLUTION: Check your config file for missing required fields")
-            elif "Config search path" in error_msg:
-                logger.error("HINT: Hydra cannot find the configuration directory.")
-                logger.error("SOLUTION: Create a ./conf directory with your config files")
-            elif "ValidationError" in error_msg:
-                logger.error("HINT: Configuration validation failed.")
-                logger.error("SOLUTION: Run 'eval-protocol validate-data --file your_data.jsonl' to check data")
-
-            logger.error("\nQuick fix suggestions:")
-            logger.error("1. Use the simplified setup: eval-protocol run --config-name simple_uipath_eval")
-            logger.error("2. Validate your data first: eval-protocol validate-data --file data.jsonl --schema agent")
-            logger.error("3. Ensure you have: ./conf/simple_uipath_eval.yaml and ./uipath_reward.py")
-            return 1
+    # elif args.command == "run":
+    #     # For the 'run' command, Hydra takes over argument parsing.
+    #
+    #     # Filter out the initial '--' if present in remaining_argv, which parse_known_args might add
+    #     hydra_specific_args = [arg for arg in remaining_argv if arg != "--"]
+    #
+    #     # Auto-detect local conf directory and add it to config path if not explicitly provided
+    #     has_config_path = any(arg.startswith("--config-path") for arg in hydra_specific_args)
+    #     current_dir = os.getcwd()
+    #     local_conf_dir = os.path.join(current_dir, "conf")
+    #
+    #     if not has_config_path and os.path.isdir(local_conf_dir):
+    #         logger.info("Auto-detected local conf directory: %s", local_conf_dir)
+    #         hydra_specific_args = [
+    #             "--config-path",
+    #             local_conf_dir,
+    #         ] + hydra_specific_args
+    #
+    #     processed_hydra_args = []
+    #     i = 0
+    #     while i < len(hydra_specific_args):
+    #         arg = hydra_specific_args[i]
+    #         if arg == "--config-path":
+    #             processed_hydra_args.append(arg)
+    #             i += 1
+    #             if i < len(hydra_specific_args):
+    #                 path_val = hydra_specific_args[i]
+    #                 abs_path = os.path.abspath(path_val)
+    #                 logger.debug(
+    #                     "Converting relative --config-path '%s' (space separated) to absolute '%s'",
+    #                     path_val,
+    #                     abs_path,
+    #                 )
+    #                 processed_hydra_args.append(abs_path)
+    #             else:
+    #                 logger.error("--config-path specified without a value.")
+    #         elif arg.startswith("--config-path="):
+    #             flag_part, path_val = arg.split("=", 1)
+    #             processed_hydra_args.append(flag_part)
+    #             abs_path = os.path.abspath(path_val)
+    #             logger.debug(
+    #                 "Converting relative --config-path '%s' (equals separated) to absolute '%s'",
+    #                 path_val,
+    #                 abs_path,
+    #             )
+    #             processed_hydra_args.append(abs_path)
+    #         else:
+    #             processed_hydra_args.append(arg)
+    #         i += 1
+    #
+    #     sys.argv = [sys.argv[0]] + processed_hydra_args
+    #     logger.info("SYSCALL_ARGV_FOR_HYDRA (after potential abspath conversion): %s", sys.argv)
+    #
+    #     try:
+    #         from .cli_commands.run_eval_cmd import hydra_cli_entry_point
+    #
+    #         hydra_entry = cast(Any, hydra_cli_entry_point)
+    #         hydra_entry()  # type: ignore  # pylint: disable=no-value-for-parameter
+    #         return 0
+    #     except Exception as e:  # pylint: disable=broad-except
+    #         error_msg = str(e)
+    #         logger.error("Evaluation failed: %s", e)
+    #
+    #         # Provide helpful suggestions for common Hydra/config errors
+    #         if "Cannot find primary config" in error_msg:
+    #             logger.error("HINT: Configuration file not found.")
+    #             logger.error("SOLUTION: Ensure you have a config file in ./conf/ directory")
+    #             logger.error("Try: eval-protocol run --config-name simple_uipath_eval")
+    #         elif "missing from config" in error_msg or "MissingMandatoryValue" in error_msg:
+    #             logger.error("HINT: Required configuration values are missing.")
+    #             logger.error("SOLUTION: Check your config file for missing required fields")
+    #         elif "Config search path" in error_msg:
+    #             logger.error("HINT: Hydra cannot find the configuration directory.")
+    #             logger.error("SOLUTION: Create a ./conf directory with your config files")
+    #         elif "ValidationError" in error_msg:
+    #             logger.error("HINT: Configuration validation failed.")
+    #             logger.error("SOLUTION: Run 'eval-protocol validate-data --file your_data.jsonl' to check data")
+    #
+    #         logger.error("\nQuick fix suggestions:")
+    #         logger.error("1. Use the simplified setup: eval-protocol run --config-name simple_uipath_eval")
+    #         logger.error("2. Validate your data first: eval-protocol validate-data --file data.jsonl --schema agent")
+    #         logger.error("3. Ensure you have: ./conf/simple_uipath_eval.yaml and ./uipath_reward.py")
+    #         return 1
     else:
         temp_parser = argparse.ArgumentParser(prog=os.path.basename(original_script_name))
         temp_parser.print_help()
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 8e852940..050b98d6 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -11,6 +11,7 @@
 class TestCLI:
     """Tests for the CLI functionality."""
 
+    @pytest.mark.skip(reason="preview and deploy commands are currently disabled in cli.py")
     def test_parse_args(self):
         """Test the argument parser."""
         # Test preview command
diff --git a/tests/test_cli_args.py b/tests/test_cli_args.py
index 21817879..9eccf9e0 100644
--- a/tests/test_cli_args.py
+++ b/tests/test_cli_args.py
@@ -6,6 +6,7 @@
 from eval_protocol.cli import parse_args
 
 
+@pytest.mark.skip(reason="preview and deploy commands are currently disabled in cli.py")
 class TestCliArgParsing:
     # --- Tests for 'preview' command ---
     def test_preview_with_remote_url_and_samples(self):
diff --git a/tests/test_minimal.py b/tests/test_minimal.py
index 3c55f8a8..94b143cb 100644
--- a/tests/test_minimal.py
+++ b/tests/test_minimal.py
@@ -14,6 +14,7 @@
 import pytest
 
 
+@pytest.mark.skip(reason="agent-eval command is currently disabled in cli.py")
 def test_cli_help():
     """Test that the CLI help message works."""
     result = subprocess.run(["eval-protocol", "--help"], capture_output=True, text=True, check=False)
@@ -25,6 +26,7 @@ def test_cli_help():
     assert "agent-eval" in result.stdout
 
 
+@pytest.mark.skip(reason="agent-eval command is currently disabled in cli.py")
 def test_cli_agent_eval_help():
     """Test that the agent-eval help message works."""
     result = subprocess.run(