From 59ebb4a5e0446e4a96e3e3fda506406d1ca75fa2 Mon Sep 17 00:00:00 2001
From: Shrey Modi <shrey@fireworks.ai>
Date: Tue, 21 Oct 2025 16:27:16 -0700
Subject: [PATCH 1/3] zip uploadf

---
 eval_protocol/evaluation.py | 58 ++++++++++++++++++++++++++++++++++---
 1 file changed, 54 insertions(+), 4 deletions(-)

diff --git a/eval_protocol/evaluation.py b/eval_protocol/evaluation.py
index 58e0beea..691fba6b 100644
--- a/eval_protocol/evaluation.py
+++ b/eval_protocol/evaluation.py
@@ -509,6 +509,54 @@ def _simulated_preview(self, samples):
         preview_result.total_runtime_ms = max(1, int((end_time - start_time) * 1000))
         return preview_result
 
+    def _build_minimal_criteria(self) -> List[Dict[str, str]]:
+        """Build minimal criteria (name, type, description) without code snippets."""
+
+        # Remote URL mode
+        if self.remote_url:
+            return [
+                {
+                    "name": "remote_eval_proxy",
+                    "type": "CODE_SNIPPETS",
+                    "description": f"Proxies evaluation to remote URL: {self.remote_url}",
+                }
+            ]
+
+        # TS mode (direct code snippet)
+        elif self.ts_mode_config:
+            criterion_name = self.ts_mode_config.get("criterion_name", "default_code_criterion")
+            description = self.ts_mode_config.get("description", "Python code execution")
+            return [
+                {
+                    "name": criterion_name,
+                    "type": "CODE_SNIPPETS",
+                    "description": description,
+                }
+            ]
+
+        # Multi-metrics mode
+        elif self.multi_metrics:
+            return [
+                {
+                    "name": "eval",
+                    "type": "CODE_SNIPPETS",
+                    "description": self.description or "Multi-metric evaluation",
+                }
+            ]
+
+        # Single metric folders
+        else:
+            criteria = []
+            for metric_name in self.metric_folders:
+                criteria.append(
+                    {
+                        "name": metric_name,
+                        "type": "CODE_SNIPPETS",
+                        "description": self.description or f"Evaluation metric: {metric_name}",
+                    }
+                )
+            return criteria
+
     def create(self, evaluator_id, display_name=None, description=None, force=False):
         if not self.remote_url and not self.ts_mode_config and not self.code_files:
             raise ValueError("No code files loaded. Load metric folder(s) or provide ts_mode_config/remote_url first.")
@@ -524,18 +572,19 @@ def create(self, evaluator_id, display_name=None, description=None, force=False)
 
         self.display_name = display_name or evaluator_id
         self.description = description or f"Evaluator created from {evaluator_id}"
-
+        account_id = "pyroworks-dev"
         # Keep multiMetrics/rollupSettings for backward compatibility with tests
         payload_multi_metrics = True
         payload_rollup_settings = {"skipRollup": True}
-
+        parent = f"accounts/{account_id}"
         payload_data = {
+            "parent": parent,
             "evaluator": {
                 "displayName": self.display_name,
                 "description": self.description,
                 "multiMetrics": payload_multi_metrics,
                 # "rewardFunctionMode": self.reward_function_mode,  # How input is processed by user func
-                "criteria": self._construct_criteria(criteria_data={}),
+                "criteria": self._build_minimal_criteria(),
                 "requirements": "",
                 "rollupSettings": payload_rollup_settings,
             },
@@ -557,10 +606,11 @@ def create(self, evaluator_id, display_name=None, description=None, force=False)
         if "dev.api.fireworks.ai" in self.api_base and account_id == "fireworks":
             account_id = "pyroworks-dev"
 
-        base_url = f"{self.api_base}/v1/accounts/{account_id}/evaluators"
+        base_url = f"{self.api_base}/v1/{parent}/evaluators"
         headers = {
             "Authorization": f"Bearer {auth_token}",
             "Content-Type": "application/json",
+            "X-Fireworks-Gateway-Secret": "8b8a823c-0b29-41f4-8537-4c9f650a113c",
         }
         logger.info(f"Creating evaluator '{evaluator_id}' for account '{account_id}'...")
 

From 750a3e5ddd2ecfaa554a9e60d8104ce35ddddc19 Mon Sep 17 00:00:00 2001
From: Yinghan Ma <yinghan.ma@fireworks.ai>
Date: Tue, 21 Oct 2025 17:14:39 -0700
Subject: [PATCH 2/3] update

---
 eval_protocol/cli_commands/upload.py | 58 ++++++++++++++--------------
 eval_protocol/evaluation.py          |  8 +++-
 2 files changed, 36 insertions(+), 30 deletions(-)

diff --git a/eval_protocol/cli_commands/upload.py b/eval_protocol/cli_commands/upload.py
index 8683c872..f0980d52 100644
--- a/eval_protocol/cli_commands/upload.py
+++ b/eval_protocol/cli_commands/upload.py
@@ -519,35 +519,35 @@ def upload_command(args: argparse.Namespace) -> int:
     description = getattr(args, "description", None)
     force = bool(getattr(args, "force", False))
 
-    # Ensure FIREWORKS_API_KEY is available to the remote by storing it as a Fireworks secret
-    try:
-        fw_account_id = get_fireworks_account_id()
-        fw_api_key_value = get_fireworks_api_key()
-        if not fw_account_id and fw_api_key_value:
-            # Attempt to verify and resolve account id from server headers
-            resolved = verify_api_key_and_get_account_id(api_key=fw_api_key_value, api_base=get_fireworks_api_base())
-            if resolved:
-                fw_account_id = resolved
-                # Propagate to environment so downstream calls use it if needed
-                os.environ["FIREWORKS_ACCOUNT_ID"] = fw_account_id
-                print(f"Resolved FIREWORKS_ACCOUNT_ID via API verification: {fw_account_id}")
-        if fw_account_id and fw_api_key_value:
-            print("Ensuring FIREWORKS_API_KEY is registered as a secret on Fireworks for rollout...")
-            if create_or_update_fireworks_secret(
-                account_id=fw_account_id,
-                key_name="FIREWORKS_API_KEY",
-                secret_value=fw_api_key_value,
-            ):
-                print("✓ FIREWORKS_API_KEY secret created/updated on Fireworks.")
-            else:
-                print("Warning: Failed to create/update FIREWORKS_API_KEY secret on Fireworks.")
-        else:
-            if not fw_account_id:
-                print("Warning: FIREWORKS_ACCOUNT_ID not found; cannot register FIREWORKS_API_KEY secret.")
-            if not fw_api_key_value:
-                print("Warning: FIREWORKS_API_KEY not found locally; cannot register secret.")
-    except Exception as e:
-        print(f"Warning: Skipped Fireworks secret registration due to error: {e}")
+    # # Ensure FIREWORKS_API_KEY is available to the remote by storing it as a Fireworks secret
+    # try:
+    #     fw_account_id = get_fireworks_account_id()
+    #     fw_api_key_value = get_fireworks_api_key()
+    #     if not fw_account_id and fw_api_key_value:
+    #         # Attempt to verify and resolve account id from server headers
+    #         resolved = verify_api_key_and_get_account_id(api_key=fw_api_key_value, api_base=get_fireworks_api_base())
+    #         if resolved:
+    #             fw_account_id = resolved
+    #             # Propagate to environment so downstream calls use it if needed
+    #             os.environ["FIREWORKS_ACCOUNT_ID"] = fw_account_id
+    #             print(f"Resolved FIREWORKS_ACCOUNT_ID via API verification: {fw_account_id}")
+    #     if fw_account_id and fw_api_key_value:
+    #         print("Ensuring FIREWORKS_API_KEY is registered as a secret on Fireworks for rollout...")
+    #         if create_or_update_fireworks_secret(
+    #             account_id=fw_account_id,
+    #             key_name="FIREWORKS_API_KEY",
+    #             secret_value=fw_api_key_value,
+    #         ):
+    #             print("✓ FIREWORKS_API_KEY secret created/updated on Fireworks.")
+    #         else:
+    #             print("Warning: Failed to create/update FIREWORKS_API_KEY secret on Fireworks.")
+    #     else:
+    #         if not fw_account_id:
+    #             print("Warning: FIREWORKS_ACCOUNT_ID not found; cannot register FIREWORKS_API_KEY secret.")
+    #         if not fw_api_key_value:
+    #             print("Warning: FIREWORKS_API_KEY not found locally; cannot register secret.")
+    # except Exception as e:
+    #     print(f"Warning: Skipped Fireworks secret registration due to error: {e}")
 
     exit_code = 0
     for i, (qualname, source_file_path) in enumerate(selected_specs):
diff --git a/eval_protocol/evaluation.py b/eval_protocol/evaluation.py
index 691fba6b..46762a53 100644
--- a/eval_protocol/evaluation.py
+++ b/eval_protocol/evaluation.py
@@ -606,18 +606,21 @@ def create(self, evaluator_id, display_name=None, description=None, force=False)
         if "dev.api.fireworks.ai" in self.api_base and account_id == "fireworks":
             account_id = "pyroworks-dev"
 
-        base_url = f"{self.api_base}/v1/{parent}/evaluators"
+        base_url = f"{self.api_base}/v1/{parent}/evaluatorsV2"
         headers = {
             "Authorization": f"Bearer {auth_token}",
+            "x-api-key": f"{auth_token}",
             "Content-Type": "application/json",
             "X-Fireworks-Gateway-Secret": "8b8a823c-0b29-41f4-8537-4c9f650a113c",
         }
+        
         logger.info(f"Creating evaluator '{evaluator_id}' for account '{account_id}'...")
 
         try:
             if force:
                 check_url = f"{base_url}/{evaluator_id}"
                 try:
+                    logger.info(f"check_url: {check_url}, headers: {headers}")
                     check_response = requests.get(check_url, headers=headers)
                     if check_response.status_code == 200:
                         logger.info(f"Evaluator '{evaluator_id}' already exists, deleting and recreating...")
@@ -632,12 +635,15 @@ def create(self, evaluator_id, display_name=None, description=None, force=False)
                                 )
                         except Exception as e_del:
                             logger.warning(f"Error deleting evaluator: {str(e_del)}")
+                        logger.info(f"base_url: {base_url}, payload_data: {payload_data}, headers: {headers}")
                         response = requests.post(base_url, json=payload_data, headers=headers)
                     else:
+                        print(f"base_url: {base_url}, payload_data: {payload_data}, headers: {headers}")
                         response = requests.post(base_url, json=payload_data, headers=headers)
                 except requests.exceptions.RequestException:
                     response = requests.post(base_url, json=payload_data, headers=headers)
             else:
+                logger.info(f"check_url: {base_url}, headers: {headers}, payload_data: {payload_data}")
                 response = requests.post(base_url, json=payload_data, headers=headers)
 
             response.raise_for_status()

From 665bc56fe46ea768574784b45b86b485103a8718 Mon Sep 17 00:00:00 2001
From: Shrey Modi <shrey@fireworks.ai>
Date: Wed, 22 Oct 2025 17:15:10 -0700
Subject: [PATCH 3/3] gcs support

---
 eval_protocol/cli_commands/upload.py | 200 +++++++++++++++++----------
 eval_protocol/evaluation.py          | 184 ++++++++++++++++++++++--
 2 files changed, 303 insertions(+), 81 deletions(-)

diff --git a/eval_protocol/cli_commands/upload.py b/eval_protocol/cli_commands/upload.py
index f0980d52..68284b76 100644
--- a/eval_protocol/cli_commands/upload.py
+++ b/eval_protocol/cli_commands/upload.py
@@ -81,6 +81,12 @@ def _is_eval_protocol_test(obj: Any) -> bool:
         return False
     # Must have pytest marks from evaluation_test
     marks = getattr(obj, "pytestmark", [])
+    # Handle pytest proxy objects (APIRemovedInV1Proxy)
+    if not isinstance(marks, (list, tuple)):
+        try:
+            marks = list(marks) if marks else []
+        except (TypeError, AttributeError):
+            return False
     return len(marks) > 0
 
 
@@ -131,51 +137,103 @@ def _discover_tests(root: str) -> list[DiscoveredTest]:
 
     discovered: list[DiscoveredTest] = []
 
-    # Collect all test functions from Python files
-    for file_path in _iter_python_files(root):
+    class CollectionPlugin:
+        """Plugin to capture collected items without running code."""
+
+        def __init__(self):
+            self.items = []
+
+        def pytest_ignore_collect(self, collection_path, config):
+            """Ignore problematic files before pytest tries to import them."""
+            # Ignore specific files
+            ignored_files = ["setup.py", "versioneer.py", "conf.py", "__main__.py"]
+            if collection_path.name in ignored_files:
+                return True
+
+            # Ignore hidden files (starting with .)
+            if collection_path.name.startswith("."):
+                return True
+
+            # Ignore test_discovery files
+            if collection_path.name.startswith("test_discovery"):
+                return True
+
+            return None
+
+        def pytest_collection_modifyitems(self, items):
+            """Hook called after collection is done."""
+            self.items = items
+
+    plugin = CollectionPlugin()
+
+    # Run pytest collection only (--collect-only prevents code execution)
+    # Override python_files to collect from ANY .py file
+    args = [
+        abs_root,
+        "--collect-only",
+        "-q",
+        "--pythonwarnings=ignore",
+        "-o",
+        "python_files=*.py",  # Override to collect all .py files
+    ]
+
+    try:
+        # Suppress pytest output
+        import io
+        import contextlib
+
+        with contextlib.redirect_stdout(io.StringIO()), contextlib.redirect_stderr(io.StringIO()):
+            pytest.main(args, plugins=[plugin])
+    except Exception:
+        # If pytest collection fails, fall back to empty list
+        return []
+
+    # Process collected items
+    for item in plugin.items:
+        if not hasattr(item, "obj"):
+            continue
+
+        obj = item.obj
+        if not _is_eval_protocol_test(obj):
+            continue
+
+        origin = getattr(obj, "_origin_func", obj)
         try:
-            unique_name = "ep_upload_" + re.sub(r"[^a-zA-Z0-9_]", "_", os.path.abspath(file_path))
-            spec = importlib.util.spec_from_file_location(unique_name, file_path)
-            if spec and spec.loader:
-                module = importlib.util.module_from_spec(spec)
-                sys.modules[spec.name] = module
-                spec.loader.exec_module(module)  # type: ignore[attr-defined]
-            else:
-                continue
+            src_file = inspect.getsourcefile(origin) or str(item.path)
+            _, lineno = inspect.getsourcelines(origin)
         except Exception:
-            continue
+            src_file, lineno = str(item.path), None
 
-        for name, obj in inspect.getmembers(module):
-            if _is_eval_protocol_test(obj):
-                origin = getattr(obj, "_origin_func", obj)
-                try:
-                    src_file = inspect.getsourcefile(origin) or file_path
-                    _, lineno = inspect.getsourcelines(origin)
-                except Exception:
-                    src_file, lineno = file_path, None
-
-                # Extract parametrization info from marks
-                has_parametrize, param_count, param_ids = _extract_param_info_from_marks(obj)
-
-                # Generate synthetic nodeids for display
-                base_nodeid = f"{os.path.basename(file_path)}::{name}"
-                if has_parametrize and param_ids:
-                    nodeids = [f"{base_nodeid}[{pid}]" for pid in param_ids]
-                else:
-                    nodeids = [base_nodeid]
-
-                discovered.append(
-                    DiscoveredTest(
-                        module_path=module.__name__,
-                        module_name=module.__name__,
-                        qualname=f"{module.__name__}.{name}",
-                        file_path=os.path.abspath(src_file),
-                        lineno=lineno,
-                        has_parametrize=has_parametrize,
-                        param_count=param_count,
-                        nodeids=nodeids,
-                    )
-                )
+        # Extract parametrization info from marks
+        has_parametrize, param_count, param_ids = _extract_param_info_from_marks(obj)
+
+        # Get module name and function name
+        module_name = (
+            item.module.__name__
+            if hasattr(item, "module")
+            else item.nodeid.split("::")[0].replace("/", ".").replace(".py", "")
+        )
+        func_name = item.name.split("[")[0] if "[" in item.name else item.name
+
+        # Generate nodeids
+        base_nodeid = f"{os.path.basename(src_file)}::{func_name}"
+        if param_ids:
+            nodeids = [f"{base_nodeid}[{pid}]" for pid in param_ids]
+        else:
+            nodeids = [base_nodeid]
+
+        discovered.append(
+            DiscoveredTest(
+                module_path=module_name,
+                module_name=module_name,
+                qualname=f"{module_name}.{func_name}",
+                file_path=os.path.abspath(src_file),
+                lineno=lineno,
+                has_parametrize=has_parametrize,
+                param_count=param_count,
+                nodeids=nodeids,
+            )
+        )
 
     # Deduplicate by qualname (in case same test appears multiple times)
     by_qual: dict[str, DiscoveredTest] = {}
@@ -519,35 +577,35 @@ def upload_command(args: argparse.Namespace) -> int:
     description = getattr(args, "description", None)
     force = bool(getattr(args, "force", False))
 
-    # # Ensure FIREWORKS_API_KEY is available to the remote by storing it as a Fireworks secret
-    # try:
-    #     fw_account_id = get_fireworks_account_id()
-    #     fw_api_key_value = get_fireworks_api_key()
-    #     if not fw_account_id and fw_api_key_value:
-    #         # Attempt to verify and resolve account id from server headers
-    #         resolved = verify_api_key_and_get_account_id(api_key=fw_api_key_value, api_base=get_fireworks_api_base())
-    #         if resolved:
-    #             fw_account_id = resolved
-    #             # Propagate to environment so downstream calls use it if needed
-    #             os.environ["FIREWORKS_ACCOUNT_ID"] = fw_account_id
-    #             print(f"Resolved FIREWORKS_ACCOUNT_ID via API verification: {fw_account_id}")
-    #     if fw_account_id and fw_api_key_value:
-    #         print("Ensuring FIREWORKS_API_KEY is registered as a secret on Fireworks for rollout...")
-    #         if create_or_update_fireworks_secret(
-    #             account_id=fw_account_id,
-    #             key_name="FIREWORKS_API_KEY",
-    #             secret_value=fw_api_key_value,
-    #         ):
-    #             print("✓ FIREWORKS_API_KEY secret created/updated on Fireworks.")
-    #         else:
-    #             print("Warning: Failed to create/update FIREWORKS_API_KEY secret on Fireworks.")
-    #     else:
-    #         if not fw_account_id:
-    #             print("Warning: FIREWORKS_ACCOUNT_ID not found; cannot register FIREWORKS_API_KEY secret.")
-    #         if not fw_api_key_value:
-    #             print("Warning: FIREWORKS_API_KEY not found locally; cannot register secret.")
-    # except Exception as e:
-    #     print(f"Warning: Skipped Fireworks secret registration due to error: {e}")
+    # Ensure FIREWORKS_API_KEY is available to the remote by storing it as a Fireworks secret
+    try:
+        fw_account_id = get_fireworks_account_id()
+        fw_api_key_value = get_fireworks_api_key()
+        if not fw_account_id and fw_api_key_value:
+            # Attempt to verify and resolve account id from server headers
+            resolved = verify_api_key_and_get_account_id(api_key=fw_api_key_value, api_base=get_fireworks_api_base())
+            if resolved:
+                fw_account_id = resolved
+                # Propagate to environment so downstream calls use it if needed
+                os.environ["FIREWORKS_ACCOUNT_ID"] = fw_account_id
+                print(f"Resolved FIREWORKS_ACCOUNT_ID via API verification: {fw_account_id}")
+        if fw_account_id and fw_api_key_value:
+            print("Ensuring FIREWORKS_API_KEY is registered as a secret on Fireworks for rollout...")
+            if create_or_update_fireworks_secret(
+                account_id=fw_account_id,
+                key_name="FIREWORKS_API_KEY",
+                secret_value=fw_api_key_value,
+            ):
+                print("✓ FIREWORKS_API_KEY secret created/updated on Fireworks.")
+            else:
+                print("Warning: Failed to create/update FIREWORKS_API_KEY secret on Fireworks.")
+        else:
+            if not fw_account_id:
+                print("Warning: FIREWORKS_ACCOUNT_ID not found; cannot register FIREWORKS_API_KEY secret.")
+            if not fw_api_key_value:
+                print("Warning: FIREWORKS_API_KEY not found locally; cannot register secret.")
+    except Exception as e:
+        print(f"Warning: Skipped Fireworks secret registration due to error: {e}")
 
     exit_code = 0
     for i, (qualname, source_file_path) in enumerate(selected_specs):
diff --git a/eval_protocol/evaluation.py b/eval_protocol/evaluation.py
index 46762a53..9e5c1e8f 100644
--- a/eval_protocol/evaluation.py
+++ b/eval_protocol/evaluation.py
@@ -22,6 +22,8 @@
 )
 from eval_protocol.typed_interface import EvaluationMode
 
+from eval_protocol.get_pep440_version import get_pep440_version
+
 logger = logging.getLogger(__name__)
 
 # Flag to track if the preview API was successfully used
@@ -557,6 +559,88 @@ def _build_minimal_criteria(self) -> List[Dict[str, str]]:
                 )
             return criteria
 
+    @staticmethod
+    def _parse_ignore_file(ignore_path: str) -> List[str]:
+        """Parse .gitignore or .dockerignore and return patterns."""
+        patterns = []
+        if not os.path.exists(ignore_path):
+            return patterns
+
+        try:
+            with open(ignore_path, "r") as f:
+                for line in f:
+                    line = line.strip()
+                    if line and not line.startswith("#"):
+                        patterns.append(line)
+        except Exception:
+            pass
+
+        return patterns
+
+    @staticmethod
+    def _ensure_requirements_present(source_dir: str) -> None:
+        req_path = os.path.join(source_dir, "requirements.txt")
+        pyproj_path = os.path.join(source_dir, "pyproject.toml")
+        if not (os.path.isfile(req_path) or os.path.isfile(pyproj_path)):
+            logger.error("Missing requirements.txt or pyproject.toml in upload directory: %s", source_dir)
+            raise ValueError(
+                "Upload requires either requirements.txt or pyproject.toml in the project root. "
+                "Please add one and re-run ep upload."
+            )
+
+    @staticmethod
+    def _should_ignore(path: str, ignore_patterns: List[str]) -> bool:
+        """Check if path matches any ignore pattern."""
+        from pathlib import Path
+        import fnmatch
+
+        default_ignores = [".git", "__pycache__", "*.pyc", ".venv", "venv", "node_modules", "*.egg-info"]
+        all_patterns = default_ignores + ignore_patterns
+
+        path_obj = Path(path)
+        for pattern in all_patterns:
+            if pattern.endswith("/"):
+                if path_obj.is_dir() and fnmatch.fnmatch(path_obj.name, pattern.rstrip("/")):
+                    return True
+            elif fnmatch.fnmatch(path_obj.name, pattern) or fnmatch.fnmatch(str(path_obj), pattern):
+                return True
+
+        return False
+
+    @staticmethod
+    def _create_tar_gz_with_ignores(output_path: str, source_dir: str) -> int:
+        """Create tar.gz of source_dir with parent directory included."""
+        import tarfile
+        from pathlib import Path
+
+        source_path = Path(source_dir)
+        gitignore_patterns = Evaluator._parse_ignore_file(str(source_path / ".gitignore"))
+        dockerignore_patterns = Evaluator._parse_ignore_file(str(source_path / ".dockerignore"))
+        all_ignore_patterns = gitignore_patterns + dockerignore_patterns
+
+        logger.info(f"Creating tar.gz with {len(all_ignore_patterns)} ignore patterns")
+
+        # Get directory name for the archive root
+        dir_name = os.path.basename(source_dir)
+        parent_dir = os.path.dirname(source_dir)
+
+        with tarfile.open(output_path, "w:gz") as tar:
+            for root, dirs, files in os.walk(source_dir):
+                dirs[:] = [d for d in dirs if not Evaluator._should_ignore(os.path.join(root, d), all_ignore_patterns)]
+
+                for file in files:
+                    file_path = os.path.join(root, file)
+                    if Evaluator._should_ignore(file_path, all_ignore_patterns):
+                        continue
+
+                    # Include parent directory in archive path
+                    rel_path = os.path.relpath(file_path, parent_dir)  # Relative to parent
+                    tar.add(file_path, arcname=rel_path)  # Keeps "python-sdk/..." structure
+
+        size_bytes = os.path.getsize(output_path)
+        logger.info(f"Created {output_path} ({size_bytes:,} bytes)")
+        return size_bytes
+
     def create(self, evaluator_id, display_name=None, description=None, force=False):
         if not self.remote_url and not self.ts_mode_config and not self.code_files:
             raise ValueError("No code files loaded. Load metric folder(s) or provide ts_mode_config/remote_url first.")
@@ -572,17 +656,26 @@ def create(self, evaluator_id, display_name=None, description=None, force=False)
 
         self.display_name = display_name or evaluator_id
         self.description = description or f"Evaluator created from {evaluator_id}"
-        account_id = "pyroworks-dev"
+
         # Keep multiMetrics/rollupSettings for backward compatibility with tests
         payload_multi_metrics = True
         payload_rollup_settings = {"skipRollup": True}
         parent = f"accounts/{account_id}"
+
+        version_str = None
+
+        try:
+            version_str = get_pep440_version()
+        except Exception:
+            version_str = None
+
         payload_data = {
             "parent": parent,
             "evaluator": {
                 "displayName": self.display_name,
                 "description": self.description,
                 "multiMetrics": payload_multi_metrics,
+                "commitHash": version_str,
                 # "rewardFunctionMode": self.reward_function_mode,  # How input is processed by user func
                 "criteria": self._build_minimal_criteria(),
                 "requirements": "",
@@ -609,22 +702,24 @@ def create(self, evaluator_id, display_name=None, description=None, force=False)
         base_url = f"{self.api_base}/v1/{parent}/evaluatorsV2"
         headers = {
             "Authorization": f"Bearer {auth_token}",
-            "x-api-key": f"{auth_token}",
             "Content-Type": "application/json",
-            "X-Fireworks-Gateway-Secret": "8b8a823c-0b29-41f4-8537-4c9f650a113c",
         }
-        
+
+        self._ensure_requirements_present(os.getcwd())
+
         logger.info(f"Creating evaluator '{evaluator_id}' for account '{account_id}'...")
 
         try:
             if force:
-                check_url = f"{base_url}/{evaluator_id}"
+                base_url_2 = f"{self.api_base}/v1/{parent}/evaluators"
+                check_url = f"{base_url_2}/{evaluator_id}"
                 try:
                     logger.info(f"check_url: {check_url}, headers: {headers}")
                     check_response = requests.get(check_url, headers=headers)
+
                     if check_response.status_code == 200:
                         logger.info(f"Evaluator '{evaluator_id}' already exists, deleting and recreating...")
-                        delete_url = f"{base_url}/{evaluator_id}"
+                        delete_url = f"{base_url_2}/{evaluator_id}"
                         try:
                             delete_response = requests.delete(delete_url, headers=headers)
                             if delete_response.status_code < 400:
@@ -635,13 +730,13 @@ def create(self, evaluator_id, display_name=None, description=None, force=False)
                                 )
                         except Exception as e_del:
                             logger.warning(f"Error deleting evaluator: {str(e_del)}")
-                        logger.info(f"base_url: {base_url}, payload_data: {payload_data}, headers: {headers}")
+                        logger.info(f"base_url: {base_url_2}, payload_data: {payload_data}, headers: {headers}")
                         response = requests.post(base_url, json=payload_data, headers=headers)
                     else:
-                        print(f"base_url: {base_url}, payload_data: {payload_data}, headers: {headers}")
+                        print(f"base_url: {base_url_2}, payload_data: {payload_data}, headers: {headers}")
                         response = requests.post(base_url, json=payload_data, headers=headers)
                 except requests.exceptions.RequestException:
-                    response = requests.post(base_url, json=payload_data, headers=headers)
+                    response = requests.post(base_url_2, json=payload_data, headers=headers)
             else:
                 logger.info(f"check_url: {base_url}, headers: {headers}, payload_data: {payload_data}")
                 response = requests.post(base_url, json=payload_data, headers=headers)
@@ -649,7 +744,76 @@ def create(self, evaluator_id, display_name=None, description=None, force=False)
             response.raise_for_status()
             result = response.json()
             logger.info(f"Successfully created evaluator '{evaluator_id}'")
-            return result
+
+            # Upload code as tar.gz to GCS
+            evaluator_name = result.get("name")  # e.g., "accounts/pyroworks/evaluators/test-123"
+
+            if evaluator_name:
+                try:
+                    # Create tar.gz of current directory
+                    cwd = os.getcwd()
+                    dir_name = os.path.basename(cwd)
+                    tar_filename = f"{dir_name}.tar.gz"
+                    tar_path = os.path.join(cwd, tar_filename)
+
+                    tar_size = self._create_tar_gz_with_ignores(tar_path, cwd)
+
+                    # Call GetEvaluatorUploadEndpoint
+
+                    upload_endpoint_url = f"{self.api_base}/v1/{evaluator_name}:getUploadEndpoint"
+                    upload_payload = {"name": evaluator_name, "filename_to_size": {tar_filename: tar_size}}
+
+                    logger.info(f"Requesting upload endpoint for {tar_filename}")
+                    upload_response = requests.post(upload_endpoint_url, json=upload_payload, headers=headers)
+
+                    upload_response.raise_for_status()
+
+                    signed_urls = upload_response.json().get("filenameToSignedUrls", {})
+                    signed_url = signed_urls.get(tar_filename)
+
+                    if signed_url:
+                        logger.info(f"Uploading {tar_filename} to GCS...")
+
+                        file_size = os.path.getsize(tar_path)
+
+                        with open(tar_path, "rb") as f:
+                            # Create request exactly like Golang
+                            req = requests.Request(
+                                "PUT",
+                                signed_url,
+                                data=f,
+                                headers={
+                                    "Content-Type": "application/octet-stream",
+                                    "X-Goog-Content-Length-Range": f"{file_size},{file_size}",
+                                },
+                            )
+                            prepared = req.prepare()
+
+                            # Don't let requests add extra headers
+                            session = requests.Session()
+                            gcs_response = session.send(prepared, timeout=600)
+                            gcs_response.raise_for_status()
+
+                        logger.info(f"Successfully uploaded {tar_filename}")
+
+                        # Step 3: Validate upload
+                        validate_url = f"{self.api_base}/v1/{evaluator_name}:validateUpload"
+                        validate_payload = {"name": evaluator_name}
+                        validate_response = requests.post(validate_url, json=validate_payload, headers=headers)
+                        validate_response.raise_for_status()
+
+                        logger.info("Upload validated successfully")
+
+                    # Clean up tar file
+                    if os.path.exists(tar_path):
+                        os.remove(tar_path)
+
+                except Exception as upload_error:
+                    logger.warning(f"Code upload failed (evaluator created but code not uploaded): {upload_error}")
+                    # Don't fail - evaluator is created, just code upload failed
+
+            # return result  # OLD: Direct return
+            return result  # Return after attempting upload
         except Exception as e:
             logger.error(f"Error creating evaluator: {str(e)}")
             if isinstance(e, requests.exceptions.HTTPError) and hasattr(e, "response"):