From b9c5d17cfd83f9cea68310a58a30488663b44064 Mon Sep 17 00:00:00 2001 From: Benny Chen Date: Sun, 23 Nov 2025 17:10:08 -0800 Subject: [PATCH 1/2] reduce collision --- eval_protocol/human_id/__init__.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/eval_protocol/human_id/__init__.py b/eval_protocol/human_id/__init__.py index eb0df9b4..4353dbd9 100644 --- a/eval_protocol/human_id/__init__.py +++ b/eval_protocol/human_id/__init__.py @@ -39,22 +39,22 @@ def generate_id( adjectives = dictionary.adjectives nouns = dictionary.nouns - # Calculate total combinations: adjectives * nouns * 100 (for 00-99) - total = len(adjectives) * len(nouns) * 100 + # Calculate total combinations: adjectives * nouns * 1000000 (for 000000-999999) + total = len(adjectives) * len(nouns) * 1000000 if index >= total: raise ValueError(f"index out of range. Received {index}, max allowed is {total - 1}") # Decompose index into adjective, noun, and number - number = index % 100 - remaining = index // 100 + number = index % 1000000 + remaining = index // 1000000 noun_idx = remaining % len(nouns) adj_idx = remaining // len(nouns) adjective = adjectives[adj_idx] noun = nouns[noun_idx] - return f"{adjective}{separator}{noun}{separator}{number:02d}" + return f"{adjective}{separator}{noun}{separator}{number:06d}" # Random generation random_obj = system_random @@ -63,15 +63,15 @@ def generate_id( adjective = random_obj.choice(dictionary.adjectives) noun = random_obj.choice(dictionary.nouns) - number = random_obj.randint(0, 99) + number = random_obj.randint(0, 999999) - return f"{adjective}{separator}{noun}{separator}{number:02d}" + return f"{adjective}{separator}{noun}{separator}{number:06d}" def num_combinations() -> int: """ Return the total number of unique IDs possible. - Format uses adjective-noun-NN, so total = adjectives * nouns * 100. + Format uses adjective-noun-NNNNNN, so total = adjectives * nouns * 1000000. """ - return len(dictionary.adjectives) * len(dictionary.nouns) * 100 + return len(dictionary.adjectives) * len(dictionary.nouns) * 1000000 From b844a72a1514592da2e4fd2b60c1b72c42b7e539 Mon Sep 17 00:00:00 2001 From: Benny Chen Date: Mon, 24 Nov 2025 16:03:52 -0800 Subject: [PATCH 2/2] fix tests --- tests/pytest/test_openenv_echo_hub.py | 8 ++++++- tests/test_human_id.py | 32 +++++++++++++-------------- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/tests/pytest/test_openenv_echo_hub.py b/tests/pytest/test_openenv_echo_hub.py index ae9c2cdc..a80e2ef9 100644 --- a/tests/pytest/test_openenv_echo_hub.py +++ b/tests/pytest/test_openenv_echo_hub.py @@ -10,7 +10,13 @@ # Preferred import when using the monolithic `openenv` package -from envs.echo_env import EchoEnv # type: ignore +# Preferred import when using the monolithic `openenv` package +try: + from envs.echo_env import EchoEnv # type: ignore +except ImportError: + # Define dummy class to satisfy OpenEnvRolloutProcessor validation during collection + class EchoEnv: # type: ignore + pass # Skip these integration-heavy tests on CI runners by default diff --git a/tests/test_human_id.py b/tests/test_human_id.py index ce35fe09..884de67f 100644 --- a/tests/test_human_id.py +++ b/tests/test_human_id.py @@ -7,14 +7,14 @@ def test_generate_id_basic_format(): """Test that generate_id produces the expected adjective-noun-NN format""" id_str = generate_id(index=0) - # Should match pattern: adjective-noun-NN where NN is 00-99 - assert re.match(r"^[a-z]+-[a-z]+-\d{2}$", id_str) + # Should match pattern: adjective-noun-NNNNNN where NNNNNN is 000000-999999 + assert re.match(r"^[a-z]+-[a-z]+-\d{6}$", id_str) # Test a few specific indices to ensure deterministic behavior - assert generate_id(index=0) == "other-time-00" - assert generate_id(index=1) == "other-time-01" - assert generate_id(index=99) == "other-time-99" - assert generate_id(index=100) == "other-year-00" + assert generate_id(index=0) == "other-time-000000" + assert generate_id(index=1) == "other-time-000001" + assert generate_id(index=99) == "other-time-000099" + assert generate_id(index=100) == "other-time-000100" def test_generate_id_index_mapping(): @@ -22,20 +22,20 @@ def test_generate_id_index_mapping(): # Test number cycling (0-99) for i in range(100): id_str = generate_id(index=i) - expected_num = f"{i:02d}" + expected_num = f"{i:06d}" assert id_str.endswith(f"-{expected_num}") assert id_str.startswith("other-time-") - # Test noun advancement after 100 numbers - id_100 = generate_id(index=100) - assert id_100.startswith("other-year-00") + # Test noun advancement after 1000000 numbers + id_1000000 = generate_id(index=1000000) + assert id_1000000.startswith("other-year-000000") - # Test adjective advancement (after all nouns * 100) + # Test adjective advancement (after all nouns * 1000000) # This will depend on dictionary size, so let's test the pattern from eval_protocol.human_id import dictionary nouns_count = len(dictionary.nouns) - adjective_boundary = nouns_count * 100 + adjective_boundary = nouns_count * 1000000 id_at_boundary = generate_id(index=adjective_boundary) # Should have advanced to the next adjective @@ -68,7 +68,7 @@ def test_generate_id_seed_stability(): # Without index, default produces separator '-' and at least 3 components c = generate_id() - assert re.match(r"^[a-z]+-[a-z]+-\d{2}$", c) + assert re.match(r"^[a-z]+-[a-z]+-\d{6}$", c) def test_generate_id_seed_with_index(): @@ -83,12 +83,12 @@ def test_generate_id_seed_with_index(): assert x != y # All should follow the correct format - assert re.match(r"^[a-z]+-[a-z]+-\d{2}$", x) - assert re.match(r"^[a-z]+-[a-z]+-\d{2}$", y) + assert re.match(r"^[a-z]+-[a-z]+-\d{6}$", x) + assert re.match(r"^[a-z]+-[a-z]+-\d{6}$", y) def test_generate_id_random_format(): """Test that random generation (no index) produces correct format""" for _ in range(10): id_str = generate_id() - assert re.match(r"^[a-z]+-[a-z]+-\d{2}$", id_str) + assert re.match(r"^[a-z]+-[a-z]+-\d{6}$", id_str)