more fixes

benjibc · benjibc · commit 644eb790dee8 · 2025-10-08T05:38:40.000Z
diff --git a/eval_protocol/auth.py b/eval_protocol/auth.py
@@ -186,9 +186,7 @@ def get_fireworks_account_id() -> Optional[str]:
             return account_id_from_file
         account_id = os.environ.get("FIREWORKS_ACCOUNT_ID")
         if account_id:
-            logger.debug(
-                "Using FIREWORKS_ACCOUNT_ID from environment variable (profile active but file missing)."
-            )
+            logger.debug("Using FIREWORKS_ACCOUNT_ID from environment variable (profile active but file missing).")
             return account_id
     else:
         # Default behavior: env overrides file
diff --git a/eval_protocol/cli.py b/eval_protocol/cli.py
@@ -425,21 +425,27 @@ def _extract_flag_value(argv_list, flag_name):
 
     if args.command == "preview":
         from .cli_commands.preview import preview_command
+
         return preview_command(args)
     elif args.command == "deploy":
         from .cli_commands.deploy import deploy_command
+
         return deploy_command(args)
     elif args.command == "deploy-mcp":
         from .cli_commands.deploy_mcp import deploy_mcp_command
+
         return deploy_mcp_command(args)
     elif args.command == "agent-eval":
         from .cli_commands.agent_eval_cmd import agent_eval_command
+
         return agent_eval_command(args)
     elif args.command == "logs":
         from .cli_commands.logs import logs_command
+
         return logs_command(args)
     elif args.command == "upload":
         from .cli_commands.upload import upload_command
+
         return upload_command(args)
     elif args.command == "run":
         # For the 'run' command, Hydra takes over argument parsing.
@@ -496,6 +502,7 @@ def _extract_flag_value(argv_list, flag_name):
 
         try:
             from .cli_commands.run_eval_cmd import hydra_cli_entry_point
+
             hydra_entry = cast(Any, hydra_cli_entry_point)
             hydra_entry()  # type: ignore  # pylint: disable=no-value-for-parameter
             return 0
diff --git a/eval_protocol/cli_commands/upload.py b/eval_protocol/cli_commands/upload.py
@@ -179,22 +179,85 @@ def _discover_tests(root: str) -> list[DiscoveredTest]:
     return sorted(by_qual.values(), key=lambda x: (x.file_path, x.lineno or 0))
 
 
+def _to_pyargs_nodeid(file_path: str, func_name: str) -> str | None:
+    """Attempt to build a pytest nodeid suitable for `pytest <nodeid>`.
+
+    Preference order:
+    1) Dotted package module path with double-colon: pkg.subpkg.module::func
+    2) Filesystem path with double-colon: path/to/module.py::func
+
+    Returns dotted form when package root can be inferred (directory chain with __init__.py
+    leading up to a directory contained in sys.path). Returns None if no reasonable
+    nodeid can be created (should be rare).
+    """
+    try:
+        abs_path = os.path.abspath(file_path)
+        dir_path, filename = os.path.split(abs_path)
+        module_base, ext = os.path.splitext(filename)
+        if ext != ".py":
+            # Not a python file
+            return None
+
+        # Walk up while packages have __init__.py
+        segments: list[str] = [module_base]
+        current = dir_path
+        package_root = None
+        while True:
+            if os.path.isfile(os.path.join(current, "__init__.py")):
+                segments.insert(0, os.path.basename(current))
+                parent = os.path.dirname(current)
+                # Stop if parent is not within current sys.path import roots
+                if parent == current:
+                    break
+                current = parent
+            else:
+                package_root = current
+                break
+
+        # If we found a package chain, check that the package_root is importable (in sys.path)
+        if package_root and any(
+            os.path.abspath(sp).rstrip(os.sep) == os.path.abspath(package_root).rstrip(os.sep) for sp in sys.path
+        ):
+            dotted = ".".join(segments)
+            return f"{dotted}::{func_name}"
+
+        # Do not emit a dotted top-level module for non-packages; prefer path-based nodeid
+
+        # Fallback to relative path (if under cwd) or absolute path
+        cwd = os.getcwd()
+        try:
+            rel = os.path.relpath(abs_path, cwd)
+        except Exception:
+            rel = abs_path
+        return f"{rel}::{func_name}"
+    except Exception:
+        return None
+
+
 def _parse_entry(entry: str, cwd: str) -> tuple[str, str]:
-    # Accept module:function or path::function
+    # Accept module::function, path::function, or legacy module:function
     entry = entry.strip()
     if "::" in entry:
-        path_part, func = entry.split("::", 1)
-        abs_path = os.path.abspath(os.path.join(cwd, path_part))
-        module_name = Path(abs_path).stem
-        return abs_path, func
+        target, func = entry.split("::", 1)
+        # Determine if target looks like a filesystem path; otherwise treat as module path
+        looks_like_path = (
+            "/" in target or "\\" in target or target.endswith(".py") or os.path.exists(os.path.join(cwd, target))
+        )
+        if looks_like_path:
+            abs_path = os.path.abspath(os.path.join(cwd, target))
+            return abs_path, func
+        else:
+            # Treat as module path for --pyargs style
+            return target, func
     elif ":" in entry:
+        # Legacy support: module:function → convert to module path + function
         module, func = entry.split(":", 1)
         return module, func
     else:
-        raise ValueError("--entry must be in 'module:function' or 'path::function' format")
+        raise ValueError("--entry must be in 'module::function', 'path::function', or 'module:function' format")
 
 
-def _generate_ts_mode_code_from_entry(entry: str, cwd: str) -> tuple[str, str, str]:
+def _generate_ts_mode_code_from_entry(entry: str, cwd: str) -> tuple[str, str, str, str]:
     target, func = _parse_entry(entry, cwd)
 
     # Check if target looks like a file path
@@ -217,10 +280,12 @@ def _generate_ts_mode_code_from_entry(entry: str, cwd: str) -> tuple[str, str, s
         sys.modules[spec.name] = module
         spec.loader.exec_module(module)  # type: ignore[attr-defined]
         module_name = spec.name
+        source_file_path = target
     else:
         # Treat as module path (e.g., "my_package.my_module")
         module_name = target
         module = importlib.import_module(module_name)
+        source_file_path = getattr(module, "__file__", "") or ""
 
     if not hasattr(module, func):
         raise ValueError(f"Function '{func}' not found in module '{module_name}'")
@@ -238,7 +303,7 @@ def _generate_ts_mode_code_from_entry(entry: str, cwd: str) -> tuple[str, str, s
             nodeids=[],
         )
     )
-    return code, file_name, qualname
+    return code, file_name, qualname, os.path.abspath(source_file_path) if source_file_path else ""
 
 
 def _generate_ts_mode_code(test: DiscoveredTest) -> tuple[str, str]:
@@ -440,10 +505,8 @@ def upload_command(args: argparse.Namespace) -> int:
         entries = [e.strip() for e in re.split(r"[,\s]+", entries_arg) if e.strip()]
         selected_specs: list[tuple[str, str, str, str]] = []
         for e in entries:
-            code, file_name, qualname = _generate_ts_mode_code_from_entry(e, root)
-            # For --entry mode, extract file path from the entry
-            file_path = e.split("::")[0] if "::" in e else ""
-            selected_specs.append((code, file_name, qualname, file_path))
+            code, file_name, qualname, resolved_path = _generate_ts_mode_code_from_entry(e, root)
+            selected_specs.append((code, file_name, qualname, resolved_path))
     else:
         print("Scanning for evaluation tests...")
         tests = _discover_tests(root)
@@ -496,15 +559,21 @@ def upload_command(args: argparse.Namespace) -> int:
         # Normalize the evaluator ID to meet Fireworks requirements
         evaluator_id = _normalize_evaluator_id(evaluator_id)
 
-        # Compute entry point metadata for backend: prefer module:function, fallback to path::function
+        # Compute entry point metadata for backend as a pytest nodeid usable with `pytest <entrypoint>`
+        # Always prefer a path-based nodeid to work in plain pytest environments (server may not use --pyargs)
         func_name = qualname.split(".")[-1]
-        module_part = qualname.rsplit(".", 1)[0] if "." in qualname else ""
-        # Use pytest pyargs style: package.module:function
-        if module_part and "." in module_part:
-            entry_point = f"{module_part}:{func_name}"
+        entry_point = None
+        if source_file_path:
+            # Use path relative to current working directory if possible
+            abs_path = os.path.abspath(source_file_path)
+            try:
+                rel = os.path.relpath(abs_path, root)
+            except Exception:
+                rel = abs_path
+            entry_point = f"{rel}::{func_name}"
         else:
-            # If we cannot derive a dotted module path, don't set entry point
-            entry_point = None
+            # Fallback: use filename from qualname only (rare)
+            entry_point = f"{func_name}.py::{func_name}"
 
         print(f"\nUploading evaluator '{evaluator_id}' for {qualname.split('.')[-1]}...")
         try:
@@ -524,7 +593,27 @@ def upload_command(args: argparse.Namespace) -> int:
             # Print success message with Fireworks dashboard link
             print(f"\n✅ Successfully uploaded evaluator: {evaluator_id}")
             print("📊 View in Fireworks Dashboard:")
-            print(f"   https://app.fireworks.ai/dashboard/evaluators/{evaluator_id}")
+            # Map API base to app host (e.g., dev.api.fireworks.ai -> dev.app.fireworks.ai)
+            from urllib.parse import urlparse
+
+            api_base = os.environ.get("FIREWORKS_API_BASE", "https://api.fireworks.ai")
+            try:
+                parsed = urlparse(api_base)
+                host = parsed.netloc or parsed.path  # handle cases where scheme may be missing
+                # Mapping rules:
+                # - dev.api.fireworks.ai → dev.fireworks.ai
+                # - *.api.fireworks.ai → *.app.fireworks.ai (default)
+                if host.startswith("dev.api.fireworks.ai"):
+                    app_host = "dev.fireworks.ai"
+                elif host.startswith("api."):
+                    app_host = host.replace("api.", "app.", 1)
+                else:
+                    app_host = host
+                scheme = parsed.scheme or "https"
+                dashboard_url = f"{scheme}://{app_host}/dashboard/evaluators/{evaluator_id}"
+            except Exception:
+                dashboard_url = f"https://app.fireworks.ai/dashboard/evaluators/{evaluator_id}"
+            print(f"   {dashboard_url}")
             print()
         except Exception as e:
             print(f"Failed to upload {qualname}: {e}")
diff --git a/eval_protocol/evaluation.py b/eval_protocol/evaluation.py
@@ -536,6 +536,14 @@ def create(self, evaluator_id, display_name=None, description=None, force=False)
         # Include optional entry point when provided
         if self.entry_point:
             payload_data["evaluator"]["entryPoint"] = self.entry_point
+            logger.info(f"Including entryPoint in payload: {self.entry_point}")
+
+        # Debug log the create payload structure (without sample data)
+        try:
+            logger.info(f"Create API Request Payload: {json.dumps(payload_data, indent=2)}")
+        except Exception:
+            # If serialization fails for any reason, skip debug dump
+            pass
 
         if "dev.api.fireworks.ai" in self.api_base and account_id == "fireworks":
             account_id = "pyroworks-dev"
diff --git a/tests/test_upload_entrypoint.py b/tests/test_upload_entrypoint.py