From d0f58fa5d2a81b7a18233135f1aee027065aa68d Mon Sep 17 00:00:00 2001 From: benjibc Date: Wed, 8 Oct 2025 01:19:50 +0000 Subject: [PATCH 1/3] support fireworks dev tier --- eval_protocol/auth.py | 122 +++++++++++++++++++++------ eval_protocol/cli.py | 105 ++++++++++++++++++----- eval_protocol/cli_commands/upload.py | 11 +++ eval_protocol/evaluation.py | 9 ++ 4 files changed, 199 insertions(+), 48 deletions(-) diff --git a/eval_protocol/auth.py b/eval_protocol/auth.py index c90c6aef..63f939a9 100644 --- a/eval_protocol/auth.py +++ b/eval_protocol/auth.py @@ -6,10 +6,48 @@ logger = logging.getLogger(__name__) +# Default locations (used for tests and as fallback). Actual resolution is dynamic via _get_auth_ini_file(). FIREWORKS_CONFIG_DIR = Path.home() / ".fireworks" AUTH_INI_FILE = FIREWORKS_CONFIG_DIR / "auth.ini" +def _get_profile_base_dir() -> Path: + """ + Resolve the Fireworks configuration base directory following firectl behavior: + - Default: ~/.fireworks + - If FIREWORKS_PROFILE is set and non-empty: ~/.fireworks/profiles/ + """ + profile_name = os.environ.get("FIREWORKS_PROFILE", "").strip() + base_dir = Path.home() / ".fireworks" + if profile_name: + base_dir = base_dir / "profiles" / profile_name + return base_dir + + +def _get_auth_ini_file() -> Path: + """ + Determine the auth.ini file path. + Priority: + 1) FIREWORKS_AUTH_FILE env var when set + 2) ~/.fireworks[/profiles/]/auth.ini (profile driven) + """ + auth_file_env = os.environ.get("FIREWORKS_AUTH_FILE") + if auth_file_env: + return Path(auth_file_env) + return _get_profile_base_dir() / "auth.ini" + + +def _is_profile_active() -> bool: + """ + Returns True if a specific profile or explicit auth file is active. + In this case, profile-based credentials should take precedence over env vars. + """ + if os.environ.get("FIREWORKS_AUTH_FILE"): + return True + prof = os.environ.get("FIREWORKS_PROFILE", "").strip() + return bool(prof) + + def _parse_simple_auth_file(file_path: Path) -> Dict[str, str]: """ Parses an auth file with simple key=value lines. @@ -20,7 +58,7 @@ def _parse_simple_auth_file(file_path: Path) -> Dict[str, str]: if not file_path.exists(): return creds try: - with open(file_path, "r") as f: + with open(file_path, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line or line.startswith("#") or line.startswith(";"): @@ -39,7 +77,7 @@ def _parse_simple_auth_file(file_path: Path) -> Dict[str, str]: if key in ["api_key", "account_id"] and value: creds[key] = value except Exception as e: - logger.warning(f"Error during simple parsing of {file_path}: {e}") + logger.warning("Error during simple parsing of %s: %s", str(file_path), e) return creds @@ -48,13 +86,14 @@ def _get_credential_from_config_file(key_name: str) -> Optional[str]: Helper to get a specific credential (api_key or account_id) from auth.ini. Tries simple parsing first, then configparser. """ - if not AUTH_INI_FILE.exists(): + auth_ini_path = _get_auth_ini_file() + if not auth_ini_path.exists(): return None # 1. Try simple key-value parsing first - simple_creds = _parse_simple_auth_file(AUTH_INI_FILE) + simple_creds = _parse_simple_auth_file(auth_ini_path) if key_name in simple_creds: - logger.debug(f"Using {key_name} from simple key-value parsing of {AUTH_INI_FILE}.") + logger.debug("Using %s from simple key-value parsing of %s.", key_name, str(auth_ini_path)) return simple_creds[key_name] # 2. Fallback to configparser if not found via simple parsing or if simple parsing failed @@ -62,30 +101,35 @@ def _get_credential_from_config_file(key_name: str) -> Optional[str]: # but only if simple parsing didn't yield the key. try: config = configparser.ConfigParser() - config.read(AUTH_INI_FILE) + config.read(auth_ini_path) # Try [fireworks] section if "fireworks" in config and config.has_option("fireworks", key_name): value_from_file = config.get("fireworks", key_name) if value_from_file: - logger.debug(f"Using {key_name} from [fireworks] section in {AUTH_INI_FILE}.") + logger.debug("Using %s from [fireworks] section in %s.", key_name, str(auth_ini_path)) return value_from_file # Try default section (configparser might place items without section header here) if config.has_option(config.default_section, key_name): value_from_default = config.get(config.default_section, key_name) if value_from_default: - logger.debug(f"Using {key_name} from default section [{config.default_section}] in {AUTH_INI_FILE}.") + logger.debug( + "Using %s from default section [%s] in %s.", + key_name, + config.default_section, + str(auth_ini_path), + ) return value_from_default except configparser.MissingSectionHeaderError: # This error implies the file is purely key-value, which simple parsing should have handled. # If simple parsing failed to get the key, then it's likely not there or malformed. - logger.debug(f"{AUTH_INI_FILE} has no section headers, and simple parsing did not find {key_name}.") + logger.debug("%s has no section headers, and simple parsing did not find %s.", str(auth_ini_path), key_name) except configparser.Error as e_config: - logger.warning(f"Configparser error reading {AUTH_INI_FILE} for {key_name}: {e_config}") + logger.warning("Configparser error reading %s for %s: %s", str(auth_ini_path), key_name, e_config) except Exception as e_general: - logger.warning(f"Unexpected error reading {AUTH_INI_FILE} for {key_name}: {e_general}") + logger.warning("Unexpected error reading %s for %s: %s", str(auth_ini_path), key_name, e_general) return None @@ -101,14 +145,24 @@ def get_fireworks_api_key() -> Optional[str]: Returns: The API key if found, otherwise None. """ - api_key = os.environ.get("FIREWORKS_API_KEY") - if api_key: - logger.debug("Using FIREWORKS_API_KEY from environment variable.") - return api_key - - api_key_from_file = _get_credential_from_config_file("api_key") - if api_key_from_file: - return api_key_from_file + # If a profile is active, prefer profile file first, then env + if _is_profile_active(): + api_key_from_file = _get_credential_from_config_file("api_key") + if api_key_from_file: + return api_key_from_file + api_key = os.environ.get("FIREWORKS_API_KEY") + if api_key: + logger.debug("Using FIREWORKS_API_KEY from environment variable (profile active but file missing).") + return api_key + else: + # Default behavior: env overrides file + api_key = os.environ.get("FIREWORKS_API_KEY") + if api_key: + logger.debug("Using FIREWORKS_API_KEY from environment variable.") + return api_key + api_key_from_file = _get_credential_from_config_file("api_key") + if api_key_from_file: + return api_key_from_file logger.debug("Fireworks API key not found in environment variables or auth.ini.") return None @@ -125,14 +179,26 @@ def get_fireworks_account_id() -> Optional[str]: Returns: The Account ID if found, otherwise None. """ - account_id = os.environ.get("FIREWORKS_ACCOUNT_ID") - if account_id: - logger.debug("Using FIREWORKS_ACCOUNT_ID from environment variable.") - return account_id - - account_id_from_file = _get_credential_from_config_file("account_id") - if account_id_from_file: - return account_id_from_file + # If a profile is active, prefer profile file first, then env + if _is_profile_active(): + account_id_from_file = _get_credential_from_config_file("account_id") + if account_id_from_file: + return account_id_from_file + account_id = os.environ.get("FIREWORKS_ACCOUNT_ID") + if account_id: + logger.debug( + "Using FIREWORKS_ACCOUNT_ID from environment variable (profile active but file missing)." + ) + return account_id + else: + # Default behavior: env overrides file + account_id = os.environ.get("FIREWORKS_ACCOUNT_ID") + if account_id: + logger.debug("Using FIREWORKS_ACCOUNT_ID from environment variable.") + return account_id + account_id_from_file = _get_credential_from_config_file("account_id") + if account_id_from_file: + return account_id_from_file logger.debug("Fireworks Account ID not found in environment variables or auth.ini.") return None @@ -152,5 +218,5 @@ def get_fireworks_api_base() -> str: if os.environ.get("FIREWORKS_API_BASE"): logger.debug("Using FIREWORKS_API_BASE from environment variable.") else: - logger.debug(f"FIREWORKS_API_BASE not set in environment, defaulting to {api_base}.") + logger.debug("FIREWORKS_API_BASE not set in environment, defaulting to %s.", api_base) return api_base diff --git a/eval_protocol/cli.py b/eval_protocol/cli.py index c0858f92..da12144b 100644 --- a/eval_protocol/cli.py +++ b/eval_protocol/cli.py @@ -3,32 +3,30 @@ """ import argparse -import asyncio -import json import logging import os import sys -import traceback -import uuid from pathlib import Path +from typing import Any, cast logger = logging.getLogger(__name__) -from .cli_commands.agent_eval_cmd import agent_eval_command from .cli_commands.common import setup_logging -from .cli_commands.deploy import deploy_command -from .cli_commands.deploy_mcp import deploy_mcp_command -from .cli_commands.logs import logs_command -from .cli_commands.preview import preview_command -from .cli_commands.run_eval_cmd import hydra_cli_entry_point -from .cli_commands.upload import upload_command def parse_args(args=None): """Parse command line arguments""" parser = argparse.ArgumentParser(description="eval-protocol: Tools for evaluation and reward modeling") parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose logging") + parser.add_argument( + "--profile", + help="Fireworks profile to use (reads ~/.fireworks/profiles//auth.ini and settings.ini)", + ) + parser.add_argument( + "--server", + help="Fireworks API server hostname or URL (e.g., dev.api.fireworks.ai or https://dev.api.fireworks.ai)", + ) subparsers = parser.add_subparsers(dest="command", help="Command to run") @@ -356,12 +354,68 @@ def main(): os.environ["PYTHONPATH"] = f"{current_dir}{os.pathsep}{current_pythonpath}" else: os.environ["PYTHONPATH"] = current_dir - logger.debug(f"Added current directory to PYTHONPATH: {current_dir}") + logger.debug("Added current directory to PYTHONPATH: %s", current_dir) # Also add to sys.path so it takes effect immediately for the current process if current_dir not in sys.path: sys.path.insert(0, current_dir) + # Pre-scan raw argv for global flags anywhere (before parsing or imports) + raw_argv = sys.argv[1:] + + def _extract_flag_value(argv_list, flag_name): + # Supports --flag value and --flag=value + for i, tok in enumerate(argv_list): + if tok == flag_name: + if i + 1 < len(argv_list): + return argv_list[i + 1] + elif tok.startswith(flag_name + "="): + return tok.split("=", 1)[1] + return None + + pre_profile = _extract_flag_value(raw_argv, "--profile") + pre_server = _extract_flag_value(raw_argv, "--server") + + # Handle Fireworks profile selection early so downstream modules see the env + profile = pre_profile + if profile: + try: + os.environ["FIREWORKS_PROFILE"] = profile + # Mirror firectl behavior: ~/.fireworks[/profiles/] + base_dir = Path.home() / ".fireworks" + if profile: + base_dir = base_dir / "profiles" / profile + os.makedirs(str(base_dir), mode=0o700, exist_ok=True) + + # Provide helpful env hints for consumers (optional) + os.environ["FIREWORKS_AUTH_FILE"] = str(base_dir / "auth.ini") + os.environ["FIREWORKS_SETTINGS_FILE"] = str(base_dir / "settings.ini") + logger.debug("Using Fireworks profile '%s' at %s", profile, base_dir) + except OSError as e: + logger.warning("Failed to initialize Fireworks profile '%s': %s", profile, e) + + # Proactively resolve and export account_id from the active profile to avoid stale .env overrides + try: + from eval_protocol.auth import get_fireworks_account_id as _resolve_account_id + + resolved_account = _resolve_account_id() + if resolved_account: + os.environ["FIREWORKS_ACCOUNT_ID"] = resolved_account + logger.debug("Resolved account_id from profile '%s': %s", profile, resolved_account) + except Exception as e: # noqa: B902 + logger.debug("Unable to resolve account_id from profile '%s': %s", profile, e) + + # Handle Fireworks server selection early + server = pre_server + if server: + # Normalize to full URL if just a hostname is supplied + normalized = server.strip() + if not normalized.startswith("http://") and not normalized.startswith("https://"): + normalized = f"https://{normalized}" + os.environ["FIREWORKS_API_BASE"] = normalized + logger.debug("Using Fireworks API base: %s", normalized) + + # Now parse args normally (so help/commands work), after globals applied # Store original sys.argv[0] because Hydra might manipulate it # and we need it if we're not calling a Hydra app. original_script_name = sys.argv[0] @@ -370,16 +424,22 @@ def main(): setup_logging(args.verbose, getattr(args, "debug", False)) if args.command == "preview": + from .cli_commands.preview import preview_command return preview_command(args) elif args.command == "deploy": + from .cli_commands.deploy import deploy_command return deploy_command(args) elif args.command == "deploy-mcp": + from .cli_commands.deploy_mcp import deploy_mcp_command return deploy_mcp_command(args) elif args.command == "agent-eval": + from .cli_commands.agent_eval_cmd import agent_eval_command return agent_eval_command(args) elif args.command == "logs": + from .cli_commands.logs import logs_command return logs_command(args) elif args.command == "upload": + from .cli_commands.upload import upload_command return upload_command(args) elif args.command == "run": # For the 'run' command, Hydra takes over argument parsing. @@ -393,7 +453,7 @@ def main(): local_conf_dir = os.path.join(current_dir, "conf") if not has_config_path and os.path.isdir(local_conf_dir): - logger.info(f"Auto-detected local conf directory: {local_conf_dir}") + logger.info("Auto-detected local conf directory: %s", local_conf_dir) hydra_specific_args = [ "--config-path", local_conf_dir, @@ -410,18 +470,21 @@ def main(): path_val = hydra_specific_args[i] abs_path = os.path.abspath(path_val) logger.debug( - f"Converting relative --config-path '{path_val}' (space separated) to absolute '{abs_path}'" + "Converting relative --config-path '%s' (space separated) to absolute '%s'", + path_val, + abs_path, ) processed_hydra_args.append(abs_path) else: logger.error("--config-path specified without a value.") - pass elif arg.startswith("--config-path="): flag_part, path_val = arg.split("=", 1) processed_hydra_args.append(flag_part) abs_path = os.path.abspath(path_val) logger.debug( - f"Converting relative --config-path '{path_val}' (equals separated) to absolute '{abs_path}'" + "Converting relative --config-path '%s' (equals separated) to absolute '%s'", + path_val, + abs_path, ) processed_hydra_args.append(abs_path) else: @@ -429,14 +492,16 @@ def main(): i += 1 sys.argv = [sys.argv[0]] + processed_hydra_args - logger.info(f"SYSCALL_ARGV_FOR_HYDRA (after potential abspath conversion): {sys.argv}") + logger.info("SYSCALL_ARGV_FOR_HYDRA (after potential abspath conversion): %s", sys.argv) try: - hydra_cli_entry_point() + from .cli_commands.run_eval_cmd import hydra_cli_entry_point + hydra_entry = cast(Any, hydra_cli_entry_point) + hydra_entry() # type: ignore # pylint: disable=no-value-for-parameter return 0 - except Exception as e: + except Exception as e: # pylint: disable=broad-except error_msg = str(e) - logger.error(f"Evaluation failed: {e}") + logger.error("Evaluation failed: %s", e) # Provide helpful suggestions for common Hydra/config errors if "Cannot find primary config" in error_msg: diff --git a/eval_protocol/cli_commands/upload.py b/eval_protocol/cli_commands/upload.py index 44ff6607..74fd844e 100644 --- a/eval_protocol/cli_commands/upload.py +++ b/eval_protocol/cli_commands/upload.py @@ -496,6 +496,16 @@ def upload_command(args: argparse.Namespace) -> int: # Normalize the evaluator ID to meet Fireworks requirements evaluator_id = _normalize_evaluator_id(evaluator_id) + # Compute entry point metadata for backend: prefer module:function, fallback to path::function + func_name = qualname.split(".")[-1] + module_part = qualname.rsplit(".", 1)[0] if "." in qualname else "" + # Use pytest pyargs style: package.module:function + if module_part and "." in module_part: + entry_point = f"{module_part}:{func_name}" + else: + # If we cannot derive a dotted module path, don't set entry point + entry_point = None + print(f"\nUploading evaluator '{evaluator_id}' for {qualname.split('.')[-1]}...") try: result = create_evaluation( @@ -507,6 +517,7 @@ def upload_command(args: argparse.Namespace) -> int: display_name=display_name or evaluator_id, description=description or f"Evaluator for {qualname}", force=force, + entry_point=entry_point, ) name = result.get("name", evaluator_id) if isinstance(result, dict) else evaluator_id diff --git a/eval_protocol/evaluation.py b/eval_protocol/evaluation.py index caedbf67..d059071f 100644 --- a/eval_protocol/evaluation.py +++ b/eval_protocol/evaluation.py @@ -161,6 +161,7 @@ def __init__( reward_function_mode: EvaluationMode = "pointwise", # New parameter for input processing mode account_id: Optional[str] = None, api_key: Optional[str] = None, + entry_point: Optional[str] = None, ): self.multi_metrics = multi_metrics self.remote_url = remote_url @@ -175,6 +176,8 @@ def __init__( self.api_base = os.environ.get("FIREWORKS_API_BASE", "https://api.fireworks.ai") # Optional requirements string for multi-metric mode (when loaded differently) self._loaded_multi_metric_requirements_str: Optional[str] = None + # Optional entry point metadata (module::function or path::function) + self.entry_point: Optional[str] = entry_point if self.ts_mode_config: python_code = self.ts_mode_config.get("python_code") @@ -530,6 +533,10 @@ def create(self, evaluator_id, display_name=None, description=None, force=False) "evaluatorId": evaluator_id, } + # Include optional entry point when provided + if self.entry_point: + payload_data["evaluator"]["entryPoint"] = self.entry_point + if "dev.api.fireworks.ai" in self.api_base and account_id == "fireworks": account_id = "pyroworks-dev" @@ -919,6 +926,7 @@ def create_evaluation( reward_function_mode: EvaluationMode = "pointwise", # Added account_id: Optional[str] = None, api_key: Optional[str] = None, + entry_point: Optional[str] = None, ): ts_mode_config = None if python_code_to_evaluate: @@ -938,6 +946,7 @@ def create_evaluation( reward_function_mode=reward_function_mode, account_id=account_id, api_key=api_key, + entry_point=entry_point, ) if remote_url: From 644eb790dee8502bc65211c446bb60a2625c1350 Mon Sep 17 00:00:00 2001 From: benjibc Date: Wed, 8 Oct 2025 05:38:40 +0000 Subject: [PATCH 2/3] more fixes --- eval_protocol/auth.py | 4 +- eval_protocol/cli.py | 7 + eval_protocol/cli_commands/upload.py | 129 ++++++++++++--- eval_protocol/evaluation.py | 8 + tests/test_upload_entrypoint.py | 227 +++++++++++++++++++++++++++ 5 files changed, 352 insertions(+), 23 deletions(-) create mode 100644 tests/test_upload_entrypoint.py diff --git a/eval_protocol/auth.py b/eval_protocol/auth.py index 63f939a9..a9840d96 100644 --- a/eval_protocol/auth.py +++ b/eval_protocol/auth.py @@ -186,9 +186,7 @@ def get_fireworks_account_id() -> Optional[str]: return account_id_from_file account_id = os.environ.get("FIREWORKS_ACCOUNT_ID") if account_id: - logger.debug( - "Using FIREWORKS_ACCOUNT_ID from environment variable (profile active but file missing)." - ) + logger.debug("Using FIREWORKS_ACCOUNT_ID from environment variable (profile active but file missing).") return account_id else: # Default behavior: env overrides file diff --git a/eval_protocol/cli.py b/eval_protocol/cli.py index da12144b..5916650c 100644 --- a/eval_protocol/cli.py +++ b/eval_protocol/cli.py @@ -425,21 +425,27 @@ def _extract_flag_value(argv_list, flag_name): if args.command == "preview": from .cli_commands.preview import preview_command + return preview_command(args) elif args.command == "deploy": from .cli_commands.deploy import deploy_command + return deploy_command(args) elif args.command == "deploy-mcp": from .cli_commands.deploy_mcp import deploy_mcp_command + return deploy_mcp_command(args) elif args.command == "agent-eval": from .cli_commands.agent_eval_cmd import agent_eval_command + return agent_eval_command(args) elif args.command == "logs": from .cli_commands.logs import logs_command + return logs_command(args) elif args.command == "upload": from .cli_commands.upload import upload_command + return upload_command(args) elif args.command == "run": # For the 'run' command, Hydra takes over argument parsing. @@ -496,6 +502,7 @@ def _extract_flag_value(argv_list, flag_name): try: from .cli_commands.run_eval_cmd import hydra_cli_entry_point + hydra_entry = cast(Any, hydra_cli_entry_point) hydra_entry() # type: ignore # pylint: disable=no-value-for-parameter return 0 diff --git a/eval_protocol/cli_commands/upload.py b/eval_protocol/cli_commands/upload.py index 74fd844e..c1f85827 100644 --- a/eval_protocol/cli_commands/upload.py +++ b/eval_protocol/cli_commands/upload.py @@ -179,22 +179,85 @@ def _discover_tests(root: str) -> list[DiscoveredTest]: return sorted(by_qual.values(), key=lambda x: (x.file_path, x.lineno or 0)) +def _to_pyargs_nodeid(file_path: str, func_name: str) -> str | None: + """Attempt to build a pytest nodeid suitable for `pytest `. + + Preference order: + 1) Dotted package module path with double-colon: pkg.subpkg.module::func + 2) Filesystem path with double-colon: path/to/module.py::func + + Returns dotted form when package root can be inferred (directory chain with __init__.py + leading up to a directory contained in sys.path). Returns None if no reasonable + nodeid can be created (should be rare). + """ + try: + abs_path = os.path.abspath(file_path) + dir_path, filename = os.path.split(abs_path) + module_base, ext = os.path.splitext(filename) + if ext != ".py": + # Not a python file + return None + + # Walk up while packages have __init__.py + segments: list[str] = [module_base] + current = dir_path + package_root = None + while True: + if os.path.isfile(os.path.join(current, "__init__.py")): + segments.insert(0, os.path.basename(current)) + parent = os.path.dirname(current) + # Stop if parent is not within current sys.path import roots + if parent == current: + break + current = parent + else: + package_root = current + break + + # If we found a package chain, check that the package_root is importable (in sys.path) + if package_root and any( + os.path.abspath(sp).rstrip(os.sep) == os.path.abspath(package_root).rstrip(os.sep) for sp in sys.path + ): + dotted = ".".join(segments) + return f"{dotted}::{func_name}" + + # Do not emit a dotted top-level module for non-packages; prefer path-based nodeid + + # Fallback to relative path (if under cwd) or absolute path + cwd = os.getcwd() + try: + rel = os.path.relpath(abs_path, cwd) + except Exception: + rel = abs_path + return f"{rel}::{func_name}" + except Exception: + return None + + def _parse_entry(entry: str, cwd: str) -> tuple[str, str]: - # Accept module:function or path::function + # Accept module::function, path::function, or legacy module:function entry = entry.strip() if "::" in entry: - path_part, func = entry.split("::", 1) - abs_path = os.path.abspath(os.path.join(cwd, path_part)) - module_name = Path(abs_path).stem - return abs_path, func + target, func = entry.split("::", 1) + # Determine if target looks like a filesystem path; otherwise treat as module path + looks_like_path = ( + "/" in target or "\\" in target or target.endswith(".py") or os.path.exists(os.path.join(cwd, target)) + ) + if looks_like_path: + abs_path = os.path.abspath(os.path.join(cwd, target)) + return abs_path, func + else: + # Treat as module path for --pyargs style + return target, func elif ":" in entry: + # Legacy support: module:function → convert to module path + function module, func = entry.split(":", 1) return module, func else: - raise ValueError("--entry must be in 'module:function' or 'path::function' format") + raise ValueError("--entry must be in 'module::function', 'path::function', or 'module:function' format") -def _generate_ts_mode_code_from_entry(entry: str, cwd: str) -> tuple[str, str, str]: +def _generate_ts_mode_code_from_entry(entry: str, cwd: str) -> tuple[str, str, str, str]: target, func = _parse_entry(entry, cwd) # Check if target looks like a file path @@ -217,10 +280,12 @@ def _generate_ts_mode_code_from_entry(entry: str, cwd: str) -> tuple[str, str, s sys.modules[spec.name] = module spec.loader.exec_module(module) # type: ignore[attr-defined] module_name = spec.name + source_file_path = target else: # Treat as module path (e.g., "my_package.my_module") module_name = target module = importlib.import_module(module_name) + source_file_path = getattr(module, "__file__", "") or "" if not hasattr(module, func): raise ValueError(f"Function '{func}' not found in module '{module_name}'") @@ -238,7 +303,7 @@ def _generate_ts_mode_code_from_entry(entry: str, cwd: str) -> tuple[str, str, s nodeids=[], ) ) - return code, file_name, qualname + return code, file_name, qualname, os.path.abspath(source_file_path) if source_file_path else "" def _generate_ts_mode_code(test: DiscoveredTest) -> tuple[str, str]: @@ -440,10 +505,8 @@ def upload_command(args: argparse.Namespace) -> int: entries = [e.strip() for e in re.split(r"[,\s]+", entries_arg) if e.strip()] selected_specs: list[tuple[str, str, str, str]] = [] for e in entries: - code, file_name, qualname = _generate_ts_mode_code_from_entry(e, root) - # For --entry mode, extract file path from the entry - file_path = e.split("::")[0] if "::" in e else "" - selected_specs.append((code, file_name, qualname, file_path)) + code, file_name, qualname, resolved_path = _generate_ts_mode_code_from_entry(e, root) + selected_specs.append((code, file_name, qualname, resolved_path)) else: print("Scanning for evaluation tests...") tests = _discover_tests(root) @@ -496,15 +559,21 @@ def upload_command(args: argparse.Namespace) -> int: # Normalize the evaluator ID to meet Fireworks requirements evaluator_id = _normalize_evaluator_id(evaluator_id) - # Compute entry point metadata for backend: prefer module:function, fallback to path::function + # Compute entry point metadata for backend as a pytest nodeid usable with `pytest ` + # Always prefer a path-based nodeid to work in plain pytest environments (server may not use --pyargs) func_name = qualname.split(".")[-1] - module_part = qualname.rsplit(".", 1)[0] if "." in qualname else "" - # Use pytest pyargs style: package.module:function - if module_part and "." in module_part: - entry_point = f"{module_part}:{func_name}" + entry_point = None + if source_file_path: + # Use path relative to current working directory if possible + abs_path = os.path.abspath(source_file_path) + try: + rel = os.path.relpath(abs_path, root) + except Exception: + rel = abs_path + entry_point = f"{rel}::{func_name}" else: - # If we cannot derive a dotted module path, don't set entry point - entry_point = None + # Fallback: use filename from qualname only (rare) + entry_point = f"{func_name}.py::{func_name}" print(f"\nUploading evaluator '{evaluator_id}' for {qualname.split('.')[-1]}...") try: @@ -524,7 +593,27 @@ def upload_command(args: argparse.Namespace) -> int: # Print success message with Fireworks dashboard link print(f"\n✅ Successfully uploaded evaluator: {evaluator_id}") print("📊 View in Fireworks Dashboard:") - print(f" https://app.fireworks.ai/dashboard/evaluators/{evaluator_id}") + # Map API base to app host (e.g., dev.api.fireworks.ai -> dev.app.fireworks.ai) + from urllib.parse import urlparse + + api_base = os.environ.get("FIREWORKS_API_BASE", "https://api.fireworks.ai") + try: + parsed = urlparse(api_base) + host = parsed.netloc or parsed.path # handle cases where scheme may be missing + # Mapping rules: + # - dev.api.fireworks.ai → dev.fireworks.ai + # - *.api.fireworks.ai → *.app.fireworks.ai (default) + if host.startswith("dev.api.fireworks.ai"): + app_host = "dev.fireworks.ai" + elif host.startswith("api."): + app_host = host.replace("api.", "app.", 1) + else: + app_host = host + scheme = parsed.scheme or "https" + dashboard_url = f"{scheme}://{app_host}/dashboard/evaluators/{evaluator_id}" + except Exception: + dashboard_url = f"https://app.fireworks.ai/dashboard/evaluators/{evaluator_id}" + print(f" {dashboard_url}") print() except Exception as e: print(f"Failed to upload {qualname}: {e}") diff --git a/eval_protocol/evaluation.py b/eval_protocol/evaluation.py index d059071f..9d75a22a 100644 --- a/eval_protocol/evaluation.py +++ b/eval_protocol/evaluation.py @@ -536,6 +536,14 @@ def create(self, evaluator_id, display_name=None, description=None, force=False) # Include optional entry point when provided if self.entry_point: payload_data["evaluator"]["entryPoint"] = self.entry_point + logger.info(f"Including entryPoint in payload: {self.entry_point}") + + # Debug log the create payload structure (without sample data) + try: + logger.info(f"Create API Request Payload: {json.dumps(payload_data, indent=2)}") + except Exception: + # If serialization fails for any reason, skip debug dump + pass if "dev.api.fireworks.ai" in self.api_base and account_id == "fireworks": account_id = "pyroworks-dev" diff --git a/tests/test_upload_entrypoint.py b/tests/test_upload_entrypoint.py new file mode 100644 index 00000000..2ae23024 --- /dev/null +++ b/tests/test_upload_entrypoint.py @@ -0,0 +1,227 @@ +import os +import sys +from types import SimpleNamespace + + +def _write_file(path: str, content: str) -> None: + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "w", encoding="utf-8") as f: + f.write(content) + + +def test_entrypoint_from_path_is_path_based_nodeid(tmp_path, monkeypatch): + # Arrange: create a simple module file in a temp project root + project_root = tmp_path + quickstart_path = project_root / "quickstart.py" + _write_file( + str(quickstart_path), + """ +def test_llm_judge(row=None): + return 1 +""".lstrip(), + ) + + # Capture arguments passed to create_evaluation + captured = {} + + from eval_protocol.cli_commands import upload as upload_mod + + def fake_create_evaluation(**kwargs): + captured.update(kwargs) + # Simulate API response + return {"name": kwargs.get("evaluator_id", "eval")} + + monkeypatch.setattr(upload_mod, "create_evaluation", fake_create_evaluation) + + # Act: call upload_command with --entry as path::function + args = SimpleNamespace( + path=str(project_root), + entry=f"{quickstart_path.name}::test_llm_judge", + id=None, + display_name=None, + description=None, + force=False, + yes=True, + ) + + exit_code = upload_mod.upload_command(args) + + # Assert + assert exit_code == 0 + assert captured.get("entry_point") == f"{quickstart_path.name}::test_llm_judge" + + +def test_entrypoint_from_module_is_path_based_nodeid(tmp_path, monkeypatch): + # Arrange: create a package with a module + project_root = tmp_path + pkg_dir = project_root / "mypkg" + _write_file(str(pkg_dir / "__init__.py"), "") + _write_file( + str(pkg_dir / "quickstart.py"), + """ +def test_llm_judge(row=None): + return 1 +""".lstrip(), + ) + + # Ensure importable + sys.path.insert(0, str(project_root)) + + captured = {} + from eval_protocol.cli_commands import upload as upload_mod + + def fake_create_evaluation(**kwargs): + captured.update(kwargs) + return {"name": kwargs.get("evaluator_id", "eval")} + + monkeypatch.setattr(upload_mod, "create_evaluation", fake_create_evaluation) + + # Act: use module::function + args = SimpleNamespace( + path=str(project_root), + entry="mypkg.quickstart::test_llm_judge", + id=None, + display_name=None, + description=None, + force=False, + yes=True, + ) + + try: + exit_code = upload_mod.upload_command(args) + finally: + # Cleanup path + if str(project_root) in sys.path: + sys.path.remove(str(project_root)) + + # Assert: path-based nodeid relative to project root + assert exit_code == 0 + assert captured.get("entry_point") == "mypkg/quickstart.py::test_llm_judge" + + +def test_entrypoint_from_legacy_module_colon_is_path_based_nodeid(tmp_path, monkeypatch): + # Arrange: create a package and module + project_root = tmp_path + pkg_dir = project_root / "pkg" + _write_file(str(pkg_dir / "__init__.py"), "") + _write_file( + str(pkg_dir / "quickstart.py"), + """ +def test_llm_judge(row=None): + return 1 +""".lstrip(), + ) + + sys.path.insert(0, str(project_root)) + + captured = {} + from eval_protocol.cli_commands import upload as upload_mod + + def fake_create_evaluation(**kwargs): + captured.update(kwargs) + return {"name": kwargs.get("evaluator_id", "eval")} + + monkeypatch.setattr(upload_mod, "create_evaluation", fake_create_evaluation) + + # Act: use legacy module:function + args = SimpleNamespace( + path=str(project_root), + entry="pkg.quickstart:test_llm_judge", + id=None, + display_name=None, + description=None, + force=False, + yes=True, + ) + + try: + exit_code = upload_mod.upload_command(args) + finally: + if str(project_root) in sys.path: + sys.path.remove(str(project_root)) + + # Assert + assert exit_code == 0 + assert captured.get("entry_point") == "pkg/quickstart.py::test_llm_judge" + + +def test_dashboard_url_mapping_dev_host(tmp_path, monkeypatch, capsys): + # Arrange: minimal file and capture printed URL + project_root = tmp_path + quickstart_path = project_root / "quickstart.py" + _write_file( + str(quickstart_path), + """ +def test_llm_judge(row=None): + return 1 +""".lstrip(), + ) + + from eval_protocol.cli_commands import upload as upload_mod + + # Force API base to dev.api and account to fireworks→pyroworks-dev mapping path + monkeypatch.setenv("FIREWORKS_API_BASE", "https://dev.api.fireworks.ai") + + def fake_create_evaluation(**kwargs): + # Simulate creation result with evaluator name + return {"name": kwargs.get("evaluator_id", "eval")} + + monkeypatch.setattr(upload_mod, "create_evaluation", fake_create_evaluation) + + args = SimpleNamespace( + path=str(project_root), + entry=f"{quickstart_path.name}::test_llm_judge", + id="quickstart-test-llm-judge", + display_name=None, + description=None, + force=True, + yes=True, + ) + + # Act + exit_code = upload_mod.upload_command(args) + out = capsys.readouterr().out + + # Assert + assert exit_code == 0 + assert "https://dev.fireworks.ai/dashboard/evaluators/quickstart-test-llm-judge" in out + + +def test_dashboard_url_mapping_prod_host(tmp_path, monkeypatch, capsys): + # Arrange: minimal file + project_root = tmp_path + quickstart_path = project_root / "quickstart.py" + _write_file( + str(quickstart_path), + """ +def test_llm_judge(row=None): + return 1 +""".lstrip(), + ) + + from eval_protocol.cli_commands import upload as upload_mod + + monkeypatch.setenv("FIREWORKS_API_BASE", "https://api.fireworks.ai") + + def fake_create_evaluation(**kwargs): + return {"name": kwargs.get("evaluator_id", "eval")} + + monkeypatch.setattr(upload_mod, "create_evaluation", fake_create_evaluation) + + args = SimpleNamespace( + path=str(project_root), + entry=f"{quickstart_path.name}::test_llm_judge", + id="quickstart-test-llm-judge", + display_name=None, + description=None, + force=False, + yes=True, + ) + + # Act + exit_code = upload_mod.upload_command(args) + out = capsys.readouterr().out + + # Assert + assert exit_code == 0 + assert "https://app.fireworks.ai/dashboard/evaluators/quickstart-test-llm-judge" in out From c2d839f38a1b07082f87620a64b88437356adf75 Mon Sep 17 00:00:00 2001 From: benjibc Date: Wed, 8 Oct 2025 05:54:12 +0000 Subject: [PATCH 3/3] fix tests --- eval_protocol/cli.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/eval_protocol/cli.py b/eval_protocol/cli.py index 5916650c..01cfb8c5 100644 --- a/eval_protocol/cli.py +++ b/eval_protocol/cli.py @@ -14,6 +14,23 @@ from .cli_commands.common import setup_logging +# Re-export deploy_command for backward compatibility with tests importing from eval_protocol.cli +try: # pragma: no cover - import-time alias for tests + from .cli_commands import deploy as _deploy_mod + + deploy_command = _deploy_mod.deploy_command # type: ignore[attr-defined] +except Exception: # pragma: no cover + # If import fails in constrained environments, tests that import it will surface the issue + deploy_command = None # type: ignore[assignment] + +# Re-export preview_command for backward compatibility with tests importing from eval_protocol.cli +try: # pragma: no cover - import-time alias for tests + from .cli_commands import preview as _preview_mod + + preview_command = _preview_mod.preview_command # type: ignore[attr-defined] +except Exception: # pragma: no cover + preview_command = None # type: ignore[assignment] + def parse_args(args=None): """Parse command line arguments""" @@ -424,12 +441,12 @@ def _extract_flag_value(argv_list, flag_name): setup_logging(args.verbose, getattr(args, "debug", False)) if args.command == "preview": - from .cli_commands.preview import preview_command - + if preview_command is None: + raise ImportError("preview_command is unavailable") return preview_command(args) elif args.command == "deploy": - from .cli_commands.deploy import deploy_command - + if deploy_command is None: + raise ImportError("deploy_command is unavailable") return deploy_command(args) elif args.command == "deploy-mcp": from .cli_commands.deploy_mcp import deploy_mcp_command