From d0f58fa5d2a81b7a18233135f1aee027065aa68d Mon Sep 17 00:00:00 2001
From: benjibc <youfychenbc5000@gmail.com>
Date: Wed, 8 Oct 2025 01:19:50 +0000
Subject: [PATCH 1/3] support fireworks dev tier

---
 eval_protocol/auth.py                | 122 +++++++++++++++++++++------
 eval_protocol/cli.py                 | 105 ++++++++++++++++++-----
 eval_protocol/cli_commands/upload.py |  11 +++
 eval_protocol/evaluation.py          |   9 ++
 4 files changed, 199 insertions(+), 48 deletions(-)
diff --git a/eval_protocol/auth.py b/eval_protocol/auth.py
index c90c6aef..63f939a9 100644
--- a/eval_protocol/auth.py
+++ b/eval_protocol/auth.py
@@ -6,10 +6,48 @@
 
 logger = logging.getLogger(__name__)
 
+# Default locations (used for tests and as fallback). Actual resolution is dynamic via _get_auth_ini_file().
 FIREWORKS_CONFIG_DIR = Path.home() / ".fireworks"
 AUTH_INI_FILE = FIREWORKS_CONFIG_DIR / "auth.ini"
 
 
+def _get_profile_base_dir() -> Path:
+    """
+    Resolve the Fireworks configuration base directory following firectl behavior:
+    - Default: ~/.fireworks
+    - If FIREWORKS_PROFILE is set and non-empty: ~/.fireworks/profiles/<profile>
+    """
+    profile_name = os.environ.get("FIREWORKS_PROFILE", "").strip()
+    base_dir = Path.home() / ".fireworks"
+    if profile_name:
+        base_dir = base_dir / "profiles" / profile_name
+    return base_dir
+
+
+def _get_auth_ini_file() -> Path:
+    """
+    Determine the auth.ini file path.
+    Priority:
+      1) FIREWORKS_AUTH_FILE env var when set
+      2) ~/.fireworks[/profiles/<profile>]/auth.ini (profile driven)
+    """
+    auth_file_env = os.environ.get("FIREWORKS_AUTH_FILE")
+    if auth_file_env:
+        return Path(auth_file_env)
+    return _get_profile_base_dir() / "auth.ini"
+
+
+def _is_profile_active() -> bool:
+    """
+    Returns True if a specific profile or explicit auth file is active.
+    In this case, profile-based credentials should take precedence over env vars.
+    """
+    if os.environ.get("FIREWORKS_AUTH_FILE"):
+        return True
+    prof = os.environ.get("FIREWORKS_PROFILE", "").strip()
+    return bool(prof)
+
+
 def _parse_simple_auth_file(file_path: Path) -> Dict[str, str]:
     """
     Parses an auth file with simple key=value lines.
@@ -20,7 +58,7 @@ def _parse_simple_auth_file(file_path: Path) -> Dict[str, str]:
     if not file_path.exists():
         return creds
     try:
-        with open(file_path, "r") as f:
+        with open(file_path, "r", encoding="utf-8") as f:
             for line in f:
                 line = line.strip()
                 if not line or line.startswith("#") or line.startswith(";"):
@@ -39,7 +77,7 @@ def _parse_simple_auth_file(file_path: Path) -> Dict[str, str]:
                     if key in ["api_key", "account_id"] and value:
                         creds[key] = value
     except Exception as e:
-        logger.warning(f"Error during simple parsing of {file_path}: {e}")
+        logger.warning("Error during simple parsing of %s: %s", str(file_path), e)
     return creds
 
 
@@ -48,13 +86,14 @@ def _get_credential_from_config_file(key_name: str) -> Optional[str]:
     Helper to get a specific credential (api_key or account_id) from auth.ini.
     Tries simple parsing first, then configparser.
     """
-    if not AUTH_INI_FILE.exists():
+    auth_ini_path = _get_auth_ini_file()
+    if not auth_ini_path.exists():
         return None
 
     # 1. Try simple key-value parsing first
-    simple_creds = _parse_simple_auth_file(AUTH_INI_FILE)
+    simple_creds = _parse_simple_auth_file(auth_ini_path)
     if key_name in simple_creds:
-        logger.debug(f"Using {key_name} from simple key-value parsing of {AUTH_INI_FILE}.")
+        logger.debug("Using %s from simple key-value parsing of %s.", key_name, str(auth_ini_path))
         return simple_creds[key_name]
 
     # 2. Fallback to configparser if not found via simple parsing or if simple parsing failed
@@ -62,30 +101,35 @@ def _get_credential_from_config_file(key_name: str) -> Optional[str]:
     #    but only if simple parsing didn't yield the key.
     try:
         config = configparser.ConfigParser()
-        config.read(AUTH_INI_FILE)
+        config.read(auth_ini_path)
 
         # Try [fireworks] section
         if "fireworks" in config and config.has_option("fireworks", key_name):
             value_from_file = config.get("fireworks", key_name)
             if value_from_file:
-                logger.debug(f"Using {key_name} from [fireworks] section in {AUTH_INI_FILE}.")
+                logger.debug("Using %s from [fireworks] section in %s.", key_name, str(auth_ini_path))
                 return value_from_file
 
         # Try default section (configparser might place items without section header here)
         if config.has_option(config.default_section, key_name):
             value_from_default = config.get(config.default_section, key_name)
             if value_from_default:
-                logger.debug(f"Using {key_name} from default section [{config.default_section}] in {AUTH_INI_FILE}.")
+                logger.debug(
+                    "Using %s from default section [%s] in %s.",
+                    key_name,
+                    config.default_section,
+                    str(auth_ini_path),
+                )
                 return value_from_default
 
     except configparser.MissingSectionHeaderError:
         # This error implies the file is purely key-value, which simple parsing should have handled.
         # If simple parsing failed to get the key, then it's likely not there or malformed.
-        logger.debug(f"{AUTH_INI_FILE} has no section headers, and simple parsing did not find {key_name}.")
+        logger.debug("%s has no section headers, and simple parsing did not find %s.", str(auth_ini_path), key_name)
     except configparser.Error as e_config:
-        logger.warning(f"Configparser error reading {AUTH_INI_FILE} for {key_name}: {e_config}")
+        logger.warning("Configparser error reading %s for %s: %s", str(auth_ini_path), key_name, e_config)
     except Exception as e_general:
-        logger.warning(f"Unexpected error reading {AUTH_INI_FILE} for {key_name}: {e_general}")
+        logger.warning("Unexpected error reading %s for %s: %s", str(auth_ini_path), key_name, e_general)
 
     return None
 
@@ -101,14 +145,24 @@ def get_fireworks_api_key() -> Optional[str]:
     Returns:
         The API key if found, otherwise None.
     """
-    api_key = os.environ.get("FIREWORKS_API_KEY")
-    if api_key:
-        logger.debug("Using FIREWORKS_API_KEY from environment variable.")
-        return api_key
-
-    api_key_from_file = _get_credential_from_config_file("api_key")
-    if api_key_from_file:
-        return api_key_from_file
+    # If a profile is active, prefer profile file first, then env
+    if _is_profile_active():
+        api_key_from_file = _get_credential_from_config_file("api_key")
+        if api_key_from_file:
+            return api_key_from_file
+        api_key = os.environ.get("FIREWORKS_API_KEY")
+        if api_key:
+            logger.debug("Using FIREWORKS_API_KEY from environment variable (profile active but file missing).")
+            return api_key
+    else:
+        # Default behavior: env overrides file
+        api_key = os.environ.get("FIREWORKS_API_KEY")
+        if api_key:
+            logger.debug("Using FIREWORKS_API_KEY from environment variable.")
+            return api_key
+        api_key_from_file = _get_credential_from_config_file("api_key")
+        if api_key_from_file:
+            return api_key_from_file
 
     logger.debug("Fireworks API key not found in environment variables or auth.ini.")
     return None
@@ -125,14 +179,26 @@ def get_fireworks_account_id() -> Optional[str]:
     Returns:
         The Account ID if found, otherwise None.
     """
-    account_id = os.environ.get("FIREWORKS_ACCOUNT_ID")
-    if account_id:
-        logger.debug("Using FIREWORKS_ACCOUNT_ID from environment variable.")
-        return account_id
-
-    account_id_from_file = _get_credential_from_config_file("account_id")
-    if account_id_from_file:
-        return account_id_from_file
+    # If a profile is active, prefer profile file first, then env
+    if _is_profile_active():
+        account_id_from_file = _get_credential_from_config_file("account_id")
+        if account_id_from_file:
+            return account_id_from_file
+        account_id = os.environ.get("FIREWORKS_ACCOUNT_ID")
+        if account_id:
+            logger.debug(
+                "Using FIREWORKS_ACCOUNT_ID from environment variable (profile active but file missing)."
+            )
+            return account_id
+    else:
+        # Default behavior: env overrides file
+        account_id = os.environ.get("FIREWORKS_ACCOUNT_ID")
+        if account_id:
+            logger.debug("Using FIREWORKS_ACCOUNT_ID from environment variable.")
+            return account_id
+        account_id_from_file = _get_credential_from_config_file("account_id")
+        if account_id_from_file:
+            return account_id_from_file
 
     logger.debug("Fireworks Account ID not found in environment variables or auth.ini.")
     return None
@@ -152,5 +218,5 @@ def get_fireworks_api_base() -> str:
     if os.environ.get("FIREWORKS_API_BASE"):
         logger.debug("Using FIREWORKS_API_BASE from environment variable.")
     else:
-        logger.debug(f"FIREWORKS_API_BASE not set in environment, defaulting to {api_base}.")
+        logger.debug("FIREWORKS_API_BASE not set in environment, defaulting to %s.", api_base)
     return api_base
diff --git a/eval_protocol/cli.py b/eval_protocol/cli.py
index c0858f92..da12144b 100644
--- a/eval_protocol/cli.py
+++ b/eval_protocol/cli.py
@@ -3,32 +3,30 @@
 """
 
 import argparse
-import asyncio
-import json
 import logging
 import os
 import sys
-import traceback
-import uuid
 from pathlib import Path
+from typing import Any, cast
 
 logger = logging.getLogger(__name__)
 
 
-from .cli_commands.agent_eval_cmd import agent_eval_command
 from .cli_commands.common import setup_logging
-from .cli_commands.deploy import deploy_command
-from .cli_commands.deploy_mcp import deploy_mcp_command
-from .cli_commands.logs import logs_command
-from .cli_commands.preview import preview_command
-from .cli_commands.run_eval_cmd import hydra_cli_entry_point
-from .cli_commands.upload import upload_command
 
 
 def parse_args(args=None):
     """Parse command line arguments"""
     parser = argparse.ArgumentParser(description="eval-protocol: Tools for evaluation and reward modeling")
     parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose logging")
+    parser.add_argument(
+        "--profile",
+        help="Fireworks profile to use (reads ~/.fireworks/profiles/<name>/auth.ini and settings.ini)",
+    )
+    parser.add_argument(
+        "--server",
+        help="Fireworks API server hostname or URL (e.g., dev.api.fireworks.ai or https://dev.api.fireworks.ai)",
+    )
 
     subparsers = parser.add_subparsers(dest="command", help="Command to run")
 
@@ -356,12 +354,68 @@ def main():
             os.environ["PYTHONPATH"] = f"{current_dir}{os.pathsep}{current_pythonpath}"
         else:
             os.environ["PYTHONPATH"] = current_dir
-        logger.debug(f"Added current directory to PYTHONPATH: {current_dir}")
+        logger.debug("Added current directory to PYTHONPATH: %s", current_dir)
 
         # Also add to sys.path so it takes effect immediately for the current process
         if current_dir not in sys.path:
             sys.path.insert(0, current_dir)
 
+    # Pre-scan raw argv for global flags anywhere (before parsing or imports)
+    raw_argv = sys.argv[1:]
+
+    def _extract_flag_value(argv_list, flag_name):
+        # Supports --flag value and --flag=value
+        for i, tok in enumerate(argv_list):
+            if tok == flag_name:
+                if i + 1 < len(argv_list):
+                    return argv_list[i + 1]
+            elif tok.startswith(flag_name + "="):
+                return tok.split("=", 1)[1]
+        return None
+
+    pre_profile = _extract_flag_value(raw_argv, "--profile")
+    pre_server = _extract_flag_value(raw_argv, "--server")
+
+    # Handle Fireworks profile selection early so downstream modules see the env
+    profile = pre_profile
+    if profile:
+        try:
+            os.environ["FIREWORKS_PROFILE"] = profile
+            # Mirror firectl behavior: ~/.fireworks[/profiles/<profile>]
+            base_dir = Path.home() / ".fireworks"
+            if profile:
+                base_dir = base_dir / "profiles" / profile
+            os.makedirs(str(base_dir), mode=0o700, exist_ok=True)
+
+            # Provide helpful env hints for consumers (optional)
+            os.environ["FIREWORKS_AUTH_FILE"] = str(base_dir / "auth.ini")
+            os.environ["FIREWORKS_SETTINGS_FILE"] = str(base_dir / "settings.ini")
+            logger.debug("Using Fireworks profile '%s' at %s", profile, base_dir)
+        except OSError as e:
+            logger.warning("Failed to initialize Fireworks profile '%s': %s", profile, e)
+
+        # Proactively resolve and export account_id from the active profile to avoid stale .env overrides
+        try:
+            from eval_protocol.auth import get_fireworks_account_id as _resolve_account_id
+
+            resolved_account = _resolve_account_id()
+            if resolved_account:
+                os.environ["FIREWORKS_ACCOUNT_ID"] = resolved_account
+                logger.debug("Resolved account_id from profile '%s': %s", profile, resolved_account)
+        except Exception as e:  # noqa: B902
+            logger.debug("Unable to resolve account_id from profile '%s': %s", profile, e)
+
+    # Handle Fireworks server selection early
+    server = pre_server
+    if server:
+        # Normalize to full URL if just a hostname is supplied
+        normalized = server.strip()
+        if not normalized.startswith("http://") and not normalized.startswith("https://"):
+            normalized = f"https://{normalized}"
+        os.environ["FIREWORKS_API_BASE"] = normalized
+        logger.debug("Using Fireworks API base: %s", normalized)
+
+    # Now parse args normally (so help/commands work), after globals applied
     # Store original sys.argv[0] because Hydra might manipulate it
     # and we need it if we're not calling a Hydra app.
     original_script_name = sys.argv[0]
@@ -370,16 +424,22 @@ def main():
     setup_logging(args.verbose, getattr(args, "debug", False))
 
     if args.command == "preview":
+        from .cli_commands.preview import preview_command
         return preview_command(args)
     elif args.command == "deploy":
+        from .cli_commands.deploy import deploy_command
         return deploy_command(args)
     elif args.command == "deploy-mcp":
+        from .cli_commands.deploy_mcp import deploy_mcp_command
         return deploy_mcp_command(args)
     elif args.command == "agent-eval":
+        from .cli_commands.agent_eval_cmd import agent_eval_command
         return agent_eval_command(args)
     elif args.command == "logs":
+        from .cli_commands.logs import logs_command
         return logs_command(args)
     elif args.command == "upload":
+        from .cli_commands.upload import upload_command
         return upload_command(args)
     elif args.command == "run":
         # For the 'run' command, Hydra takes over argument parsing.
@@ -393,7 +453,7 @@ def main():
         local_conf_dir = os.path.join(current_dir, "conf")
 
         if not has_config_path and os.path.isdir(local_conf_dir):
-            logger.info(f"Auto-detected local conf directory: {local_conf_dir}")
+            logger.info("Auto-detected local conf directory: %s", local_conf_dir)
             hydra_specific_args = [
                 "--config-path",
                 local_conf_dir,
@@ -410,18 +470,21 @@ def main():
                     path_val = hydra_specific_args[i]
                     abs_path = os.path.abspath(path_val)
                     logger.debug(
-                        f"Converting relative --config-path '{path_val}' (space separated) to absolute '{abs_path}'"
+                        "Converting relative --config-path '%s' (space separated) to absolute '%s'",
+                        path_val,
+                        abs_path,
                     )
                     processed_hydra_args.append(abs_path)
                 else:
                     logger.error("--config-path specified without a value.")
-                    pass
             elif arg.startswith("--config-path="):
                 flag_part, path_val = arg.split("=", 1)
                 processed_hydra_args.append(flag_part)
                 abs_path = os.path.abspath(path_val)
                 logger.debug(
-                    f"Converting relative --config-path '{path_val}' (equals separated) to absolute '{abs_path}'"
+                    "Converting relative --config-path '%s' (equals separated) to absolute '%s'",
+                    path_val,
+                    abs_path,
                 )
                 processed_hydra_args.append(abs_path)
             else:
@@ -429,14 +492,16 @@ def main():
             i += 1
 
         sys.argv = [sys.argv[0]] + processed_hydra_args
-        logger.info(f"SYSCALL_ARGV_FOR_HYDRA (after potential abspath conversion): {sys.argv}")
+        logger.info("SYSCALL_ARGV_FOR_HYDRA (after potential abspath conversion): %s", sys.argv)
 
         try:
-            hydra_cli_entry_point()
+            from .cli_commands.run_eval_cmd import hydra_cli_entry_point
+            hydra_entry = cast(Any, hydra_cli_entry_point)
+            hydra_entry()  # type: ignore  # pylint: disable=no-value-for-parameter
             return 0
-        except Exception as e:
+        except Exception as e:  # pylint: disable=broad-except
             error_msg = str(e)
-            logger.error(f"Evaluation failed: {e}")
+            logger.error("Evaluation failed: %s", e)
 
             # Provide helpful suggestions for common Hydra/config errors
             if "Cannot find primary config" in error_msg:
diff --git a/eval_protocol/cli_commands/upload.py b/eval_protocol/cli_commands/upload.py
index 44ff6607..74fd844e 100644
--- a/eval_protocol/cli_commands/upload.py
+++ b/eval_protocol/cli_commands/upload.py
@@ -496,6 +496,16 @@ def upload_command(args: argparse.Namespace) -> int:
         # Normalize the evaluator ID to meet Fireworks requirements
         evaluator_id = _normalize_evaluator_id(evaluator_id)
 
+        # Compute entry point metadata for backend: prefer module:function, fallback to path::function
+        func_name = qualname.split(".")[-1]
+        module_part = qualname.rsplit(".", 1)[0] if "." in qualname else ""
+        # Use pytest pyargs style: package.module:function
+        if module_part and "." in module_part:
+            entry_point = f"{module_part}:{func_name}"
+        else:
+            # If we cannot derive a dotted module path, don't set entry point
+            entry_point = None
+
         print(f"\nUploading evaluator '{evaluator_id}' for {qualname.split('.')[-1]}...")
         try:
             result = create_evaluation(
@@ -507,6 +517,7 @@ def upload_command(args: argparse.Namespace) -> int:
                 display_name=display_name or evaluator_id,
                 description=description or f"Evaluator for {qualname}",
                 force=force,
+                entry_point=entry_point,
             )
             name = result.get("name", evaluator_id) if isinstance(result, dict) else evaluator_id
 
diff --git a/eval_protocol/evaluation.py b/eval_protocol/evaluation.py
index caedbf67..d059071f 100644
--- a/eval_protocol/evaluation.py
+++ b/eval_protocol/evaluation.py
@@ -161,6 +161,7 @@ def __init__(
         reward_function_mode: EvaluationMode = "pointwise",  # New parameter for input processing mode
         account_id: Optional[str] = None,
         api_key: Optional[str] = None,
+        entry_point: Optional[str] = None,
     ):
         self.multi_metrics = multi_metrics
         self.remote_url = remote_url
@@ -175,6 +176,8 @@ def __init__(
         self.api_base = os.environ.get("FIREWORKS_API_BASE", "https://api.fireworks.ai")
         # Optional requirements string for multi-metric mode (when loaded differently)
         self._loaded_multi_metric_requirements_str: Optional[str] = None
+        # Optional entry point metadata (module::function or path::function)
+        self.entry_point: Optional[str] = entry_point
 
         if self.ts_mode_config:
             python_code = self.ts_mode_config.get("python_code")
@@ -530,6 +533,10 @@ def create(self, evaluator_id, display_name=None, description=None, force=False)
             "evaluatorId": evaluator_id,
         }
 
+        # Include optional entry point when provided
+        if self.entry_point:
+            payload_data["evaluator"]["entryPoint"] = self.entry_point
+
         if "dev.api.fireworks.ai" in self.api_base and account_id == "fireworks":
             account_id = "pyroworks-dev"
 
@@ -919,6 +926,7 @@ def create_evaluation(
     reward_function_mode: EvaluationMode = "pointwise",  # Added
     account_id: Optional[str] = None,
     api_key: Optional[str] = None,
+    entry_point: Optional[str] = None,
 ):
     ts_mode_config = None
     if python_code_to_evaluate:
@@ -938,6 +946,7 @@ def create_evaluation(
         reward_function_mode=reward_function_mode,
         account_id=account_id,
         api_key=api_key,
+        entry_point=entry_point,
     )
 
     if remote_url:

From 644eb790dee8502bc65211c446bb60a2625c1350 Mon Sep 17 00:00:00 2001
From: benjibc <youfychenbc5000@gmail.com>
Date: Wed, 8 Oct 2025 05:38:40 +0000
Subject: [PATCH 2/3] more fixes

---
 eval_protocol/auth.py                |   4 +-
 eval_protocol/cli.py                 |   7 +
 eval_protocol/cli_commands/upload.py | 129 ++++++++++++---
 eval_protocol/evaluation.py          |   8 +
 tests/test_upload_entrypoint.py      | 227 +++++++++++++++++++++++++++
 5 files changed, 352 insertions(+), 23 deletions(-)
 create mode 100644 tests/test_upload_entrypoint.py

diff --git a/eval_protocol/auth.py b/eval_protocol/auth.py
index 63f939a9..a9840d96 100644
--- a/eval_protocol/auth.py
+++ b/eval_protocol/auth.py
@@ -186,9 +186,7 @@ def get_fireworks_account_id() -> Optional[str]:
             return account_id_from_file
         account_id = os.environ.get("FIREWORKS_ACCOUNT_ID")
         if account_id:
-            logger.debug(
-                "Using FIREWORKS_ACCOUNT_ID from environment variable (profile active but file missing)."
-            )
+            logger.debug("Using FIREWORKS_ACCOUNT_ID from environment variable (profile active but file missing).")
             return account_id
     else:
         # Default behavior: env overrides file
diff --git a/eval_protocol/cli.py b/eval_protocol/cli.py
index da12144b..5916650c 100644
--- a/eval_protocol/cli.py
+++ b/eval_protocol/cli.py
@@ -425,21 +425,27 @@ def _extract_flag_value(argv_list, flag_name):
 
     if args.command == "preview":
         from .cli_commands.preview import preview_command
+
         return preview_command(args)
     elif args.command == "deploy":
         from .cli_commands.deploy import deploy_command
+
         return deploy_command(args)
     elif args.command == "deploy-mcp":
         from .cli_commands.deploy_mcp import deploy_mcp_command
+
         return deploy_mcp_command(args)
     elif args.command == "agent-eval":
         from .cli_commands.agent_eval_cmd import agent_eval_command
+
         return agent_eval_command(args)
     elif args.command == "logs":
         from .cli_commands.logs import logs_command
+
         return logs_command(args)
     elif args.command == "upload":
         from .cli_commands.upload import upload_command
+
         return upload_command(args)
     elif args.command == "run":
         # For the 'run' command, Hydra takes over argument parsing.
@@ -496,6 +502,7 @@ def _extract_flag_value(argv_list, flag_name):
 
         try:
             from .cli_commands.run_eval_cmd import hydra_cli_entry_point
+
             hydra_entry = cast(Any, hydra_cli_entry_point)
             hydra_entry()  # type: ignore  # pylint: disable=no-value-for-parameter
             return 0
diff --git a/eval_protocol/cli_commands/upload.py b/eval_protocol/cli_commands/upload.py
index 74fd844e..c1f85827 100644
--- a/eval_protocol/cli_commands/upload.py
+++ b/eval_protocol/cli_commands/upload.py
@@ -179,22 +179,85 @@ def _discover_tests(root: str) -> list[DiscoveredTest]:
     return sorted(by_qual.values(), key=lambda x: (x.file_path, x.lineno or 0))
 
 
+def _to_pyargs_nodeid(file_path: str, func_name: str) -> str | None:
+    """Attempt to build a pytest nodeid suitable for `pytest <nodeid>`.
+
+    Preference order:
+    1) Dotted package module path with double-colon: pkg.subpkg.module::func
+    2) Filesystem path with double-colon: path/to/module.py::func
+
+    Returns dotted form when package root can be inferred (directory chain with __init__.py
+    leading up to a directory contained in sys.path). Returns None if no reasonable
+    nodeid can be created (should be rare).
+    """
+    try:
+        abs_path = os.path.abspath(file_path)
+        dir_path, filename = os.path.split(abs_path)
+        module_base, ext = os.path.splitext(filename)
+        if ext != ".py":
+            # Not a python file
+            return None
+
+        # Walk up while packages have __init__.py
+        segments: list[str] = [module_base]
+        current = dir_path
+        package_root = None
+        while True:
+            if os.path.isfile(os.path.join(current, "__init__.py")):
+                segments.insert(0, os.path.basename(current))
+                parent = os.path.dirname(current)
+                # Stop if parent is not within current sys.path import roots
+                if parent == current:
+                    break
+                current = parent
+            else:
+                package_root = current
+                break
+
+        # If we found a package chain, check that the package_root is importable (in sys.path)
+        if package_root and any(
+            os.path.abspath(sp).rstrip(os.sep) == os.path.abspath(package_root).rstrip(os.sep) for sp in sys.path
+        ):
+            dotted = ".".join(segments)
+            return f"{dotted}::{func_name}"
+
+        # Do not emit a dotted top-level module for non-packages; prefer path-based nodeid
+
+        # Fallback to relative path (if under cwd) or absolute path
+        cwd = os.getcwd()
+        try:
+            rel = os.path.relpath(abs_path, cwd)
+        except Exception:
+            rel = abs_path
+        return f"{rel}::{func_name}"
+    except Exception:
+        return None
+
+
 def _parse_entry(entry: str, cwd: str) -> tuple[str, str]:
-    # Accept module:function or path::function
+    # Accept module::function, path::function, or legacy module:function
     entry = entry.strip()
     if "::" in entry:
-        path_part, func = entry.split("::", 1)
-        abs_path = os.path.abspath(os.path.join(cwd, path_part))
-        module_name = Path(abs_path).stem
-        return abs_path, func
+        target, func = entry.split("::", 1)
+        # Determine if target looks like a filesystem path; otherwise treat as module path
+        looks_like_path = (
+            "/" in target or "\\" in target or target.endswith(".py") or os.path.exists(os.path.join(cwd, target))
+        )
+        if looks_like_path:
+            abs_path = os.path.abspath(os.path.join(cwd, target))
+            return abs_path, func
+        else:
+            # Treat as module path for --pyargs style
+            return target, func
     elif ":" in entry:
+        # Legacy support: module:function → convert to module path + function
         module, func = entry.split(":", 1)
         return module, func
     else:
-        raise ValueError("--entry must be in 'module:function' or 'path::function' format")
+        raise ValueError("--entry must be in 'module::function', 'path::function', or 'module:function' format")
 
 
-def _generate_ts_mode_code_from_entry(entry: str, cwd: str) -> tuple[str, str, str]:
+def _generate_ts_mode_code_from_entry(entry: str, cwd: str) -> tuple[str, str, str, str]:
     target, func = _parse_entry(entry, cwd)
 
     # Check if target looks like a file path
@@ -217,10 +280,12 @@ def _generate_ts_mode_code_from_entry(entry: str, cwd: str) -> tuple[str, str, s
         sys.modules[spec.name] = module
         spec.loader.exec_module(module)  # type: ignore[attr-defined]
         module_name = spec.name
+        source_file_path = target
     else:
         # Treat as module path (e.g., "my_package.my_module")
         module_name = target
         module = importlib.import_module(module_name)
+        source_file_path = getattr(module, "__file__", "") or ""
 
     if not hasattr(module, func):
         raise ValueError(f"Function '{func}' not found in module '{module_name}'")
@@ -238,7 +303,7 @@ def _generate_ts_mode_code_from_entry(entry: str, cwd: str) -> tuple[str, str, s
             nodeids=[],
         )
     )
-    return code, file_name, qualname
+    return code, file_name, qualname, os.path.abspath(source_file_path) if source_file_path else ""
 
 
 def _generate_ts_mode_code(test: DiscoveredTest) -> tuple[str, str]:
@@ -440,10 +505,8 @@ def upload_command(args: argparse.Namespace) -> int:
         entries = [e.strip() for e in re.split(r"[,\s]+", entries_arg) if e.strip()]
         selected_specs: list[tuple[str, str, str, str]] = []
         for e in entries:
-            code, file_name, qualname = _generate_ts_mode_code_from_entry(e, root)
-            # For --entry mode, extract file path from the entry
-            file_path = e.split("::")[0] if "::" in e else ""
-            selected_specs.append((code, file_name, qualname, file_path))
+            code, file_name, qualname, resolved_path = _generate_ts_mode_code_from_entry(e, root)
+            selected_specs.append((code, file_name, qualname, resolved_path))
     else:
         print("Scanning for evaluation tests...")
         tests = _discover_tests(root)
@@ -496,15 +559,21 @@ def upload_command(args: argparse.Namespace) -> int:
         # Normalize the evaluator ID to meet Fireworks requirements
         evaluator_id = _normalize_evaluator_id(evaluator_id)
 
-        # Compute entry point metadata for backend: prefer module:function, fallback to path::function
+        # Compute entry point metadata for backend as a pytest nodeid usable with `pytest <entrypoint>`
+        # Always prefer a path-based nodeid to work in plain pytest environments (server may not use --pyargs)
         func_name = qualname.split(".")[-1]
-        module_part = qualname.rsplit(".", 1)[0] if "." in qualname else ""
-        # Use pytest pyargs style: package.module:function
-        if module_part and "." in module_part:
-            entry_point = f"{module_part}:{func_name}"
+        entry_point = None
+        if source_file_path:
+            # Use path relative to current working directory if possible
+            abs_path = os.path.abspath(source_file_path)
+            try:
+                rel = os.path.relpath(abs_path, root)
+            except Exception:
+                rel = abs_path
+            entry_point = f"{rel}::{func_name}"
         else:
-            # If we cannot derive a dotted module path, don't set entry point
-            entry_point = None
+            # Fallback: use filename from qualname only (rare)
+            entry_point = f"{func_name}.py::{func_name}"
 
         print(f"\nUploading evaluator '{evaluator_id}' for {qualname.split('.')[-1]}...")
         try:
@@ -524,7 +593,27 @@ def upload_command(args: argparse.Namespace) -> int:
             # Print success message with Fireworks dashboard link
             print(f"\n✅ Successfully uploaded evaluator: {evaluator_id}")
             print("📊 View in Fireworks Dashboard:")
-            print(f"   https://app.fireworks.ai/dashboard/evaluators/{evaluator_id}")
+            # Map API base to app host (e.g., dev.api.fireworks.ai -> dev.app.fireworks.ai)
+            from urllib.parse import urlparse
+
+            api_base = os.environ.get("FIREWORKS_API_BASE", "https://api.fireworks.ai")
+            try:
+                parsed = urlparse(api_base)
+                host = parsed.netloc or parsed.path  # handle cases where scheme may be missing
+                # Mapping rules:
+                # - dev.api.fireworks.ai → dev.fireworks.ai
+                # - *.api.fireworks.ai → *.app.fireworks.ai (default)
+                if host.startswith("dev.api.fireworks.ai"):
+                    app_host = "dev.fireworks.ai"
+                elif host.startswith("api."):
+                    app_host = host.replace("api.", "app.", 1)
+                else:
+                    app_host = host
+                scheme = parsed.scheme or "https"
+                dashboard_url = f"{scheme}://{app_host}/dashboard/evaluators/{evaluator_id}"
+            except Exception:
+                dashboard_url = f"https://app.fireworks.ai/dashboard/evaluators/{evaluator_id}"
+            print(f"   {dashboard_url}")
             print()
         except Exception as e:
             print(f"Failed to upload {qualname}: {e}")
diff --git a/eval_protocol/evaluation.py b/eval_protocol/evaluation.py
index d059071f..9d75a22a 100644
--- a/eval_protocol/evaluation.py
+++ b/eval_protocol/evaluation.py
@@ -536,6 +536,14 @@ def create(self, evaluator_id, display_name=None, description=None, force=False)
         # Include optional entry point when provided
         if self.entry_point:
             payload_data["evaluator"]["entryPoint"] = self.entry_point
+            logger.info(f"Including entryPoint in payload: {self.entry_point}")
+
+        # Debug log the create payload structure (without sample data)
+        try:
+            logger.info(f"Create API Request Payload: {json.dumps(payload_data, indent=2)}")
+        except Exception:
+            # If serialization fails for any reason, skip debug dump
+            pass
 
         if "dev.api.fireworks.ai" in self.api_base and account_id == "fireworks":
             account_id = "pyroworks-dev"
diff --git a/tests/test_upload_entrypoint.py b/tests/test_upload_entrypoint.py
new file mode 100644
index 00000000..2ae23024
--- /dev/null
+++ b/tests/test_upload_entrypoint.py
@@ -0,0 +1,227 @@
+import os
+import sys
+from types import SimpleNamespace
+
+
+def _write_file(path: str, content: str) -> None:
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+    with open(path, "w", encoding="utf-8") as f:
+        f.write(content)
+
+
+def test_entrypoint_from_path_is_path_based_nodeid(tmp_path, monkeypatch):
+    # Arrange: create a simple module file in a temp project root
+    project_root = tmp_path
+    quickstart_path = project_root / "quickstart.py"
+    _write_file(
+        str(quickstart_path),
+        """
+def test_llm_judge(row=None):
+    return 1
+""".lstrip(),
+    )
+
+    # Capture arguments passed to create_evaluation
+    captured = {}
+
+    from eval_protocol.cli_commands import upload as upload_mod
+
+    def fake_create_evaluation(**kwargs):
+        captured.update(kwargs)
+        # Simulate API response
+        return {"name": kwargs.get("evaluator_id", "eval")}
+
+    monkeypatch.setattr(upload_mod, "create_evaluation", fake_create_evaluation)
+
+    # Act: call upload_command with --entry as path::function
+    args = SimpleNamespace(
+        path=str(project_root),
+        entry=f"{quickstart_path.name}::test_llm_judge",
+        id=None,
+        display_name=None,
+        description=None,
+        force=False,
+        yes=True,
+    )
+
+    exit_code = upload_mod.upload_command(args)
+
+    # Assert
+    assert exit_code == 0
+    assert captured.get("entry_point") == f"{quickstart_path.name}::test_llm_judge"
+
+
+def test_entrypoint_from_module_is_path_based_nodeid(tmp_path, monkeypatch):
+    # Arrange: create a package with a module
+    project_root = tmp_path
+    pkg_dir = project_root / "mypkg"
+    _write_file(str(pkg_dir / "__init__.py"), "")
+    _write_file(
+        str(pkg_dir / "quickstart.py"),
+        """
+def test_llm_judge(row=None):
+    return 1
+""".lstrip(),
+    )
+
+    # Ensure importable
+    sys.path.insert(0, str(project_root))
+
+    captured = {}
+    from eval_protocol.cli_commands import upload as upload_mod
+
+    def fake_create_evaluation(**kwargs):
+        captured.update(kwargs)
+        return {"name": kwargs.get("evaluator_id", "eval")}
+
+    monkeypatch.setattr(upload_mod, "create_evaluation", fake_create_evaluation)
+
+    # Act: use module::function
+    args = SimpleNamespace(
+        path=str(project_root),
+        entry="mypkg.quickstart::test_llm_judge",
+        id=None,
+        display_name=None,
+        description=None,
+        force=False,
+        yes=True,
+    )
+
+    try:
+        exit_code = upload_mod.upload_command(args)
+    finally:
+        # Cleanup path
+        if str(project_root) in sys.path:
+            sys.path.remove(str(project_root))
+
+    # Assert: path-based nodeid relative to project root
+    assert exit_code == 0
+    assert captured.get("entry_point") == "mypkg/quickstart.py::test_llm_judge"
+
+
+def test_entrypoint_from_legacy_module_colon_is_path_based_nodeid(tmp_path, monkeypatch):
+    # Arrange: create a package and module
+    project_root = tmp_path
+    pkg_dir = project_root / "pkg"
+    _write_file(str(pkg_dir / "__init__.py"), "")
+    _write_file(
+        str(pkg_dir / "quickstart.py"),
+        """
+def test_llm_judge(row=None):
+    return 1
+""".lstrip(),
+    )
+
+    sys.path.insert(0, str(project_root))
+
+    captured = {}
+    from eval_protocol.cli_commands import upload as upload_mod
+
+    def fake_create_evaluation(**kwargs):
+        captured.update(kwargs)
+        return {"name": kwargs.get("evaluator_id", "eval")}
+
+    monkeypatch.setattr(upload_mod, "create_evaluation", fake_create_evaluation)
+
+    # Act: use legacy module:function
+    args = SimpleNamespace(
+        path=str(project_root),
+        entry="pkg.quickstart:test_llm_judge",
+        id=None,
+        display_name=None,
+        description=None,
+        force=False,
+        yes=True,
+    )
+
+    try:
+        exit_code = upload_mod.upload_command(args)
+    finally:
+        if str(project_root) in sys.path:
+            sys.path.remove(str(project_root))
+
+    # Assert
+    assert exit_code == 0
+    assert captured.get("entry_point") == "pkg/quickstart.py::test_llm_judge"
+
+
+def test_dashboard_url_mapping_dev_host(tmp_path, monkeypatch, capsys):
+    # Arrange: minimal file and capture printed URL
+    project_root = tmp_path
+    quickstart_path = project_root / "quickstart.py"
+    _write_file(
+        str(quickstart_path),
+        """
+def test_llm_judge(row=None):
+    return 1
+""".lstrip(),
+    )
+
+    from eval_protocol.cli_commands import upload as upload_mod
+
+    # Force API base to dev.api and account to fireworks→pyroworks-dev mapping path
+    monkeypatch.setenv("FIREWORKS_API_BASE", "https://dev.api.fireworks.ai")
+
+    def fake_create_evaluation(**kwargs):
+        # Simulate creation result with evaluator name
+        return {"name": kwargs.get("evaluator_id", "eval")}
+
+    monkeypatch.setattr(upload_mod, "create_evaluation", fake_create_evaluation)
+
+    args = SimpleNamespace(
+        path=str(project_root),
+        entry=f"{quickstart_path.name}::test_llm_judge",
+        id="quickstart-test-llm-judge",
+        display_name=None,
+        description=None,
+        force=True,
+        yes=True,
+    )
+
+    # Act
+    exit_code = upload_mod.upload_command(args)
+    out = capsys.readouterr().out
+
+    # Assert
+    assert exit_code == 0
+    assert "https://dev.fireworks.ai/dashboard/evaluators/quickstart-test-llm-judge" in out
+
+
+def test_dashboard_url_mapping_prod_host(tmp_path, monkeypatch, capsys):
+    # Arrange: minimal file
+    project_root = tmp_path
+    quickstart_path = project_root / "quickstart.py"
+    _write_file(
+        str(quickstart_path),
+        """
+def test_llm_judge(row=None):
+    return 1
+""".lstrip(),
+    )
+
+    from eval_protocol.cli_commands import upload as upload_mod
+
+    monkeypatch.setenv("FIREWORKS_API_BASE", "https://api.fireworks.ai")
+
+    def fake_create_evaluation(**kwargs):
+        return {"name": kwargs.get("evaluator_id", "eval")}
+
+    monkeypatch.setattr(upload_mod, "create_evaluation", fake_create_evaluation)
+
+    args = SimpleNamespace(
+        path=str(project_root),
+        entry=f"{quickstart_path.name}::test_llm_judge",
+        id="quickstart-test-llm-judge",
+        display_name=None,
+        description=None,
+        force=False,
+        yes=True,
+    )
+
+    # Act
+    exit_code = upload_mod.upload_command(args)
+    out = capsys.readouterr().out
+
+    # Assert
+    assert exit_code == 0
+    assert "https://app.fireworks.ai/dashboard/evaluators/quickstart-test-llm-judge" in out

From c2d839f38a1b07082f87620a64b88437356adf75 Mon Sep 17 00:00:00 2001
From: benjibc <youfychenbc5000@gmail.com>
Date: Wed, 8 Oct 2025 05:54:12 +0000
Subject: [PATCH 3/3] fix tests

---
 eval_protocol/cli.py | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/eval_protocol/cli.py b/eval_protocol/cli.py
index 5916650c..01cfb8c5 100644
--- a/eval_protocol/cli.py
+++ b/eval_protocol/cli.py
@@ -14,6 +14,23 @@
 
 from .cli_commands.common import setup_logging
 
+# Re-export deploy_command for backward compatibility with tests importing from eval_protocol.cli
+try:  # pragma: no cover - import-time alias for tests
+    from .cli_commands import deploy as _deploy_mod
+
+    deploy_command = _deploy_mod.deploy_command  # type: ignore[attr-defined]
+except Exception:  # pragma: no cover
+    # If import fails in constrained environments, tests that import it will surface the issue
+    deploy_command = None  # type: ignore[assignment]
+
+# Re-export preview_command for backward compatibility with tests importing from eval_protocol.cli
+try:  # pragma: no cover - import-time alias for tests
+    from .cli_commands import preview as _preview_mod
+
+    preview_command = _preview_mod.preview_command  # type: ignore[attr-defined]
+except Exception:  # pragma: no cover
+    preview_command = None  # type: ignore[assignment]
+
 
 def parse_args(args=None):
     """Parse command line arguments"""
@@ -424,12 +441,12 @@ def _extract_flag_value(argv_list, flag_name):
     setup_logging(args.verbose, getattr(args, "debug", False))
 
     if args.command == "preview":
-        from .cli_commands.preview import preview_command
-
+        if preview_command is None:
+            raise ImportError("preview_command is unavailable")
         return preview_command(args)
     elif args.command == "deploy":
-        from .cli_commands.deploy import deploy_command
-
+        if deploy_command is None:
+            raise ImportError("deploy_command is unavailable")
         return deploy_command(args)
     elif args.command == "deploy-mcp":
         from .cli_commands.deploy_mcp import deploy_mcp_command