waynehamadi · waynehamadi · May 28, 2023 · May 29, 2023
diff --git a/.gitmodules b/.gitmodules
@@ -1,4 +1,4 @@
 [submodule "tests/Auto-GPT-test-cassettes"]
     path = tests/Auto-GPT-test-cassettes
     url = https://github.com/Significant-Gravitas/Auto-GPT-test-cassettes
-    branch = master
+    branch = master
diff --git a/tests/integration/challenges/basic_abilities/test_browse_website.py b/tests/integration/challenges/basic_abilities/test_browse_website.py
@@ -1,6 +1,9 @@
 import pytest
 
 from autogpt.agent import Agent
+from tests.integration.challenges.challenge_decorator.challenge_decorator import (
+    challenge,
+)
 from tests.integration.challenges.utils import run_interaction_loop
 from tests.utils import requires_api_key
 
@@ -9,11 +12,12 @@
 
 @requires_api_key("OPENAI_API_KEY")
 @pytest.mark.vcr
+@challenge
 def test_browse_website(
     browser_agent: Agent,
     patched_api_requestor: None,
     monkeypatch: pytest.MonkeyPatch,
-    # config: Config,
+    level_to_run: int,
 ) -> None:
     file_path = browser_agent.workspace.get_path("browse_website.txt")
     run_interaction_loop(monkeypatch, browser_agent, CYCLE_COUNT)

diff --git a/tests/integration/challenges/basic_abilities/test_write_file.py b/tests/integration/challenges/basic_abilities/test_write_file.py
@@ -3,6 +3,9 @@
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file
 from autogpt.config import Config
+from tests.integration.challenges.challenge_decorator.challenge_decorator import (
+    challenge,
+)
 from tests.integration.challenges.utils import run_interaction_loop
 from tests.utils import requires_api_key
 
@@ -11,11 +14,13 @@
 
 @requires_api_key("OPENAI_API_KEY")
 @pytest.mark.vcr
+@challenge
 def test_write_file(
     writer_agent: Agent,
     patched_api_requestor: None,
     monkeypatch: pytest.MonkeyPatch,
     config: Config,
+    level_to_run: int,
 ) -> None:
     file_path = str(writer_agent.workspace.get_path("hello_world.txt"))
     run_interaction_loop(monkeypatch, writer_agent, CYCLE_COUNT)

diff --git a/tests/integration/challenges/challenge_decorator/__init__.py b/tests/integration/challenges/challenge_decorator/__init__.py
diff --git a/tests/integration/challenges/challenge_decorator/challenge.py b/tests/integration/challenges/challenge_decorator/challenge.py
@@ -0,0 +1,21 @@
+from typing import Optional
+
+
+class Challenge:
+    BEAT_CHALLENGES = False
+
+    def __init__(
+        self,
+        name: str,
+        category: str,
+        max_level: int,
+        max_level_beaten: Optional[int],
+        level_to_run: Optional[int] = None,
+    ) -> None:
+        self.name = name
+        self.category = category
+        self.max_level_beaten = max_level_beaten
+        self.max_level = max_level
+        self.succeeded = False
+        self.skipped = False
+        self.level_to_run = level_to_run
diff --git a/tests/integration/challenges/challenge_decorator/challenge_decorator.py b/tests/integration/challenges/challenge_decorator/challenge_decorator.py
@@ -0,0 +1,68 @@
+import contextlib
+import os
+from functools import wraps
+from typing import Any, Callable, Optional
+
+import pytest
+
+from tests.integration.challenges.challenge_decorator.challenge import Challenge
+from tests.integration.challenges.challenge_decorator.challenge_utils import (
+    create_challenge,
+)
+from tests.integration.challenges.challenge_decorator.score_utils import (
+    get_scores,
+    update_new_score,
+)
+
+MAX_LEVEL_TO_IMPROVE_ON = (
+    1  # we will attempt to beat 1 level above the current level for now.
+)
+
+
+def challenge(func: Callable[..., Any]) -> Callable[..., None]:
+    @wraps(func)
+    def wrapper(*args: Any, **kwargs: Any) -> None:
+        run_remaining = MAX_LEVEL_TO_IMPROVE_ON if Challenge.BEAT_CHALLENGES else 1
+
+        while run_remaining > 0:
+            current_score, new_score, new_score_location = get_scores()
+            level_to_run = kwargs["level_to_run"] if "level_to_run" in kwargs else None
+            challenge = create_challenge(
+                func, current_score, Challenge.BEAT_CHALLENGES, level_to_run
+            )
+            if challenge.level_to_run is not None:
+                kwargs["level_to_run"] = challenge.level_to_run
+                with contextlib.suppress(AssertionError):
+                    func(*args, **kwargs)
+                    challenge.succeeded = True
+            else:
+                challenge.skipped = True
+            if os.environ.get("CI") == "true":
+                new_max_level_beaten = get_new_max_level_beaten(
+                    challenge, Challenge.BEAT_CHALLENGES
+                )
+                update_new_score(
+                    new_score_location, new_score, challenge, new_max_level_beaten
+                )
+            if challenge.level_to_run is None:
+                pytest.skip("This test has not been unlocked yet.")
+
+            if not challenge.succeeded:
+                if Challenge.BEAT_CHALLENGES:
+                    # xfail
+                    pytest.xfail("Challenge failed")
+                raise AssertionError("Challenge failed")
+            run_remaining -= 1
+
+    return wrapper
+
+
+def get_new_max_level_beaten(
+    challenge: Challenge, beat_challenges: bool
+) -> Optional[int]:
+    if challenge.succeeded:
+        return challenge.level_to_run
+    if challenge.skipped:
+        return challenge.max_level_beaten
+    # Challenge failed
+    return challenge.max_level_beaten if beat_challenges else None
diff --git a/tests/integration/challenges/challenge_decorator/challenge_utils.py b/tests/integration/challenges/challenge_decorator/challenge_utils.py
@@ -0,0 +1,81 @@
+import os
+from typing import Any, Callable, Dict, Optional, Tuple
+
+from tests.integration.challenges.challenge_decorator.challenge import Challenge
+
+CHALLENGE_PREFIX = "test_"
+
+
+def create_challenge(
+    func: Callable[..., Any],
+    current_score: Dict[str, Any],
+    is_beat_challenges: bool,
+    level_to_run: Optional[int] = None,
+) -> Challenge:
+    challenge_category, challenge_name = get_challenge_identifiers(func)
+
+    max_level = get_max_level(current_score, challenge_category, challenge_name)
+    max_level_beaten = get_max_level_beaten(
+        current_score, challenge_category, challenge_name
+    )
+    level_to_run = get_level_to_run(
+        is_beat_challenges, level_to_run, max_level, max_level_beaten
+    )
+
+    return Challenge(
+        name=challenge_name,
+        category=challenge_category,
+        max_level=max_level,
+        max_level_beaten=max_level_beaten,
+        level_to_run=level_to_run,
+    )
+
+
+def get_level_to_run(
+    is_beat_challenges: bool,
+    level_to_run: Optional[int],
+    max_level: int,
+    max_level_beaten: Optional[int],
+) -> Optional[int]:
+    if level_to_run is not None:
+        if level_to_run > max_level:
+            raise ValueError(
+                f"Level to run ({level_to_run}) is greater than max level ({max_level})"
+            )
+        return level_to_run
+    if is_beat_challenges:
+        if max_level_beaten == max_level:
+            return None
+        return 1 if max_level_beaten is None else max_level_beaten + 1
+    return max_level_beaten
+
+
+def get_challenge_identifiers(func: Callable[..., Any]) -> Tuple[str, str]:
+    full_path = os.path.dirname(os.path.abspath(func.__code__.co_filename))
+    challenge_category = os.path.basename(full_path)
+    challenge_name = func.__name__.replace(CHALLENGE_PREFIX, "")
+    return challenge_category, challenge_name
+
+
+def get_max_level(
+    current_score: Dict[str, Any],
+    challenge_category: str,
+    challenge_name: str,
+) -> int:
+    return (
+        current_score.get(challenge_category, {})
+        .get(challenge_name, {})
+        .get("max_level", 1)
+    )
+
+
+def get_max_level_beaten(
+    current_score: Dict[str, Any],
+    challenge_category: str,
+    challenge_name: str,
+) -> Optional[int]:
+    return (
+        current_score.get(challenge_category, {})
+        .get(challenge_name, {})
+        .get("max_level_beaten", None)
+    )
diff --git a/tests/integration/challenges/challenge_decorator/score_utils.py b/tests/integration/challenges/challenge_decorator/score_utils.py
@@ -0,0 +1,59 @@
+import json
+import os
+from typing import Any, Dict, Optional, Tuple
+
+from tests.integration.challenges.challenge_decorator.challenge import Challenge
+
+CURRENT_SCORE_LOCATION = "../current_score"
+NEW_SCORE_LOCATION = "../new_score"
+
+
+def update_new_score(
+    filename_new_score: str,
+    new_score: Dict[str, Any],
+    challenge: Challenge,
+    new_max_level_beaten: Optional[int],
+) -> None:
+    write_new_score(new_score, challenge, new_max_level_beaten)
+    write_new_score_to_file(new_score, filename_new_score)
+
+
+def write_new_score(
+    new_score: Dict[str, Any], challenge: Challenge, new_max_level_beaten: Optional[int]
+) -> Dict[str, Any]:
+    new_score.setdefault(challenge.category, {})
+    new_score[challenge.category][challenge.name] = {
+        "max_level_beaten": new_max_level_beaten,
+        "max_level": challenge.max_level,
+    }
+    return new_score
+
+
+def write_new_score_to_file(new_score: Dict[str, Any], filename: str) -> None:
+    with open(filename, "w") as file:
+        json.dump(new_score, file, indent=4)
+
+
+def get_scores() -> Tuple[Dict[str, Any], Dict[str, Any], str]:
+    filename_current_score, filename_new_score = get_score_locations()
+    current_score = load_json(filename_current_score)
+    new_score = load_json(filename_new_score)
+    return current_score, new_score, filename_new_score
+
+
+def load_json(filename: str) -> Dict[str, Any]:
+    if os.path.isfile(filename):
+        with open(filename, "r") as file:
+            return json.load(file)
+    else:
+        return {}
+
+
+def get_score_locations() -> Tuple[str, str]:
+    pid = os.getpid()
+    project_root = os.path.dirname(os.path.abspath(__file__))
+    filename_current_score = os.path.join(
+        project_root, f"{CURRENT_SCORE_LOCATION}.json"
+    )
+    filename_new_score = os.path.join(project_root, f"{NEW_SCORE_LOCATION}_{pid}.json")
+    return filename_current_score, filename_new_score
diff --git a/tests/integration/challenges/conftest.py b/tests/integration/challenges/conftest.py
@@ -3,18 +3,33 @@
 from _pytest.config.argparsing import Parser
 from _pytest.fixtures import FixtureRequest
 
+from tests.integration.challenges.challenge_decorator.challenge import Challenge
+
 
 def pytest_addoption(parser: Parser) -> None:
     parser.addoption(
         "--level", action="store", default=None, type=int, help="Specify test level"
     )
+    parser.addoption(
+        "--beat-challenges",
+        action="store_true",
+        help="Spepcifies whether the test suite should attempt to beat challenges",
+    )
 
 
 def pytest_configure(config: Config) -> None:
-    config.option.level = config.getoption("--level")
+    level = config.getoption("--level", default=None)
+    config.option.level = level
+    beat_challenges = config.getoption("--beat-challenges", default=False)
+    config.option.beat_challenges = beat_challenges
 
 
 @pytest.fixture
-def user_selected_level(request: FixtureRequest) -> int:
+def level_to_run(request: FixtureRequest) -> int:
     ## used for challenges in the goal oriented tests
     return request.config.option.level
+
+
+@pytest.fixture(autouse=True)
+def check_beat_challenges(request: FixtureRequest) -> None:
+    Challenge.BEAT_CHALLENGES = request.config.getoption("--beat-challenges")
diff --git a/tests/integration/challenges/current_score.json b/tests/integration/challenges/current_score.json
@@ -0,0 +1,38 @@
+{
+    "basic_abilities": {
+        "browse_website": {
+            "max_level": 1,
+            "max_level_beaten": 1
+        },
+        "write_file": {
+            "max_level": 1,
+            "max_level_beaten": 1
+        }
+    },
+    "information_retrieval": {
+        "information_retrieval_challenge_a": {
+            "max_level": 1,
+            "max_level_beaten": 1
+        }
+    },
+    "kubernetes": {
+        "kubernetes_template_challenge_a": {
+            "max_level": 1,
+            "max_level_beaten": null
+        }
+    },
+    "memory": {
+        "memory_challenge_a": {
+            "max_level": 3,
+            "max_level_beaten": 3
+        },
+        "memory_challenge_b": {
+            "max_level": 5,
+            "max_level_beaten": 1
+        },
+        "memory_challenge_c": {
+            "max_level": 5,
+            "max_level_beaten": 1
+        }
+    }
+}
diff --git a/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py b/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py
@@ -2,6 +2,9 @@
 
 from autogpt.commands.file_operations import read_file
 from autogpt.config import Config
+from tests.integration.challenges.challenge_decorator.challenge_decorator import (
+    challenge,
+)
 from tests.integration.challenges.utils import run_interaction_loop
 from tests.utils import requires_api_key
 
@@ -11,11 +14,13 @@
 
 @pytest.mark.vcr
 @requires_api_key("OPENAI_API_KEY")
+@challenge
 def test_information_retrieval_challenge_a(
     get_company_revenue_agent: Agent,
     monkeypatch: pytest.MonkeyPatch,
     patched_api_requestor: None,
     config: Config,
+    level_to_run: int,
 ) -> None:
     """
     Test the challenge_a function in a given agent by mocking user inputs and checking the output file content.