diff --git a/.circleci/config.yml b/.circleci/config.yml index b22c4edc..f070c6be 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -3,6 +3,122 @@ version: 2.1 orbs: python: circleci/python@2.1 +# Executor for regression testing jobs +executors: + sebs-regression: + docker: + - image: cimg/python:3.11 + resource_class: large + environment: + RESOURCE_PREFIX: sebs-ci + +commands: + + restore-sebs-cache: + description: "Restore SeBS cache directory containing cloud resource metadata" + steps: + - restore_cache: + keys: + - sebs-cache-{{ .Branch }} + + save-caches: + description: "Persist SeBS cache and dependencies" + parameters: + language: + type: enum + enum: [python, nodejs, java, cpp] + steps: + - save_cache: + key: sebs-cache-{{ .Branch }} + paths: + - regression-cache/ + + install-sebs: + description: "Install SeBS with platform-specific dependencies" + parameters: + platform: + type: enum + enum: [aws, azure, gcp] + steps: + - run: + name: Install SeBS + command: pip install . + + setup-cloud-credentials: + description: "Configure cloud authentication" + parameters: + platform: + type: enum + enum: [aws, azure, gcp] + steps: + - when: + condition: + equal: [gcp, << parameters.platform >>] + steps: + - run: + name: Setup GCP Credentials + command: | + echo "$GCP_SERVICE_ACCOUNT_JSON" > /tmp/gcp-credentials.json + echo 'export GOOGLE_APPLICATION_CREDENTIALS=/tmp/gcp-credentials.json' >> $BASH_ENV + + run-regression-tests: + description: "Execute regression test suite" + parameters: + platform: + type: enum + enum: [aws, azure, gcp] + language: + type: enum + enum: [python, nodejs, java, cpp] + version: + type: string + steps: + - run: + name: Run Regression Tests + command: | + set -euo pipefail + sebs benchmark regression test \ + --config configs/example.json \ + --deployment << parameters.platform >> \ + --language << parameters.language >> \ + --language-version << parameters.version >> \ + --architecture x64 --selected-architecture \ + --resource-prefix sebs-ci + no_output_timeout: 5m + + save-results: + description: "Save benchmark results as artifacts" + steps: + - run: + name: Generate Test Summary + command: | + echo "Regression Test Summary" > test-summary.txt + echo "======================" >> test-summary.txt + if ls regression_*.json 1> /dev/null 2>&1; then + ls -1 regression_*.json | wc -l | xargs echo "Benchmarks tested:" >> test-summary.txt + echo "" >> test-summary.txt + echo "Results saved to artifacts/results/" >> test-summary.txt + else + echo "No benchmark results found" >> test-summary.txt + fi + when: always + - store_artifacts: + path: test-summary.txt + - run: + name: Collect regression results + command: | + mkdir -p results + if ls regression_*.json 1> /dev/null 2>&1; then + mv regression_*.json results/ || true + fi + when: always + - store_artifacts: + path: results + destination: results/ + - store_artifacts: + path: cache + destination: cache-snapshot/ + jobs: linting: executor: @@ -43,46 +159,37 @@ jobs: - store_artifacts: path: flake-reports destination: flake-reports - test-aws: - executor: python/default + + regression-aws-python311: + executor: sebs-regression steps: - checkout - - setup_remote_docker - - restore_cache: - key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }} - - run: - command: | - if [[ -d $HOME/docker ]]; - then - ls $HOME/docker/*.tar.gz | xargs -I {file} sh -c "zcat {file} | docker load"; - else - docker pull mcopik/serverless-benchmarks:build.aws.python.3.7 - docker pull mcopik/serverless-benchmarks:build.aws.nodejs.12.x - fi - name: Load Docker images - - run: - command: | - python3 install.py --aws - name: Install pip dependencies - - run: - command: | - mkdir -p $HOME/docker - docker images mcopik/serverless-benchmarks --filter='dangling=false' --format '{{.Repository}}:{{.Tag}} {{.ID}}' |\ - xargs -n 2 -t sh -c 'test -e $HOME/docker/$1.tar.gz || docker save $0 | gzip -2 > $HOME/docker/$1.tar.gz' - name: Save Docker images - - save_cache: - key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }} - paths: - - "sebs-virtualenv" - - $HOME/docker - - run: - command: | - . sebs-virtualenv/bin/activate - tests/test_runner.py --deployment aws - name: Execute AWS tests + - restore-sebs-cache + - setup_remote_docker: + version: 20.10.24 + - setup-cloud-credentials: + platform: aws + - install-sebs: + platform: aws + - run-regression-tests: + platform: aws + language: python + version: "3.11" + - save-results + - save-caches: + language: python workflows: main: jobs: - linting + regression-tests: + jobs: + # AWS jobs + - regression-aws-python311: + filters: + branches: + only: + - master + - /feature\/.*/ diff --git a/benchmarks/wrappers/aws/python/setup.py b/benchmarks/wrappers/aws/python/setup.py index c34245e4..51d9c5f8 100644 --- a/benchmarks/wrappers/aws/python/setup.py +++ b/benchmarks/wrappers/aws/python/setup.py @@ -1,14 +1,9 @@ # Copyright 2020-2025 ETH Zurich and the SeBS authors. All rights reserved. from distutils.core import setup from glob import glob -from pkg_resources import parse_requirements - -with open('requirements.txt') as f: - requirements = [str(r) for r in parse_requirements(f)] setup( name='function', - install_requires=requirements, packages=['function'], package_dir={'function': '.'}, package_data={'function': glob('**', recursive=True)}, diff --git a/sebs/cli.py b/sebs/cli.py index 4e1cc558..870d8603 100755 --- a/sebs/cli.py +++ b/sebs/cli.py @@ -11,6 +11,7 @@ import logging import functools import os +import sys import traceback from typing import cast, List, Optional @@ -455,33 +456,43 @@ def package( multiple=True, help="JSON configuration of deployed storage.", ) -@common_params -@click.option( - "--cache", - default=os.path.join(os.path.curdir, "regression-cache"), - help="Location of experiments cache.", -) @click.option( - "--output-dir", - default=os.path.join(os.path.curdir, "regression-output"), - help="Output directory for results.", + "--selected-architecture/--all-architectures", + type=bool, + default=False, + help="Skip non-selected CPU architectures.", ) -def regression(benchmark_input_size, benchmark_name, storage_configuration, **kwargs): +@common_params +def regression( + benchmark_input_size, benchmark_name, storage_configuration, selected_architecture, **kwargs +): """Run regression test suite across benchmarks.""" + # for regression, deployment client is initialized locally # disable default initialization + + from pathlib import Path + + if Path(kwargs["cache"]) == Path("cache"): + kwargs["cache"] = os.path.join(os.path.curdir, "regression-cache") + (config, output_dir, logging_filename, sebs_client, _) = parse_common_params( initialize_deployment=False, storage_configuration=storage_configuration, **kwargs, ) - regression_suite( + architecture = config["experiments"]["architecture"] if selected_architecture else None + has_failures = regression_suite( sebs_client, config["experiments"], set((config["deployment"]["name"],)), config, + kwargs["resource_prefix"], benchmark_name, + architecture, ) + # Exit with non-zero code if any tests failed + sys.exit(1 if has_failures else 0) @cli.group() diff --git a/sebs/regression.py b/sebs/regression.py index 53336a2a..da8a3e6e 100644 --- a/sebs/regression.py +++ b/sebs/regression.py @@ -80,6 +80,8 @@ # User-defined config passed during initialization, set in regression_suite() cloud_config: Optional[dict] = None +RESOURCE_PREFIX = "regr" + class TestSequenceMeta(type): """Metaclass for dynamically generating regression test cases. @@ -335,7 +337,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): # Synchronize resource initialization with a lock with AWSTestSequencePython.lock: - deployment_client.initialize(resource_prefix="regr") + deployment_client.initialize(resource_prefix=RESOURCE_PREFIX) return deployment_client @@ -389,7 +391,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): # Synchronize resource initialization with a lock with AWSTestSequenceNodejs.lock: - deployment_client.initialize(resource_prefix="regr") + deployment_client.initialize(resource_prefix=RESOURCE_PREFIX) return deployment_client @@ -432,7 +434,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): logging_filename=os.path.join(self.client.output_dir, f), ) with AWSTestSequenceCpp.lock: - deployment_client.initialize(resource_prefix="regr") + deployment_client.initialize(resource_prefix=RESOURCE_PREFIX) return deployment_client @@ -483,7 +485,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): logging_filename=os.path.join(self.client.output_dir, f), ) with AWSTestSequenceJava.lock: - deployment_client.initialize(resource_prefix="regr") + deployment_client.initialize(resource_prefix=RESOURCE_PREFIX) return deployment_client @@ -565,7 +567,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): deployment_client.system_resources.initialize_cli( cli=AzureTestSequencePython.cli, login=True ) - deployment_client.initialize(resource_prefix="regr") + deployment_client.initialize(resource_prefix=RESOURCE_PREFIX) return deployment_client @@ -642,7 +644,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): # Initialize CLI and setup resources (no login needed - reuses Python session) deployment_client.system_resources.initialize_cli(cli=AzureTestSequenceNodejs.cli) - deployment_client.initialize(resource_prefix="regr") + deployment_client.initialize(resource_prefix=RESOURCE_PREFIX) return deployment_client @@ -716,7 +718,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): deployment_client.system_resources.initialize_cli( cli=AzureTestSequenceJava.cli, login=needs_login ) - deployment_client.initialize(resource_prefix="regr") + deployment_client.initialize(resource_prefix=RESOURCE_PREFIX) return deployment_client @@ -770,7 +772,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): # Synchronize resource initialization with a lock with GCPTestSequencePython.lock: - deployment_client.initialize(resource_prefix="regr") + deployment_client.initialize(resource_prefix=RESOURCE_PREFIX) return deployment_client @@ -824,7 +826,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): # Synchronize resource initialization with a lock with GCPTestSequenceNodejs.lock: - deployment_client.initialize(resource_prefix="regr") + deployment_client.initialize(resource_prefix=RESOURCE_PREFIX) return deployment_client @@ -878,7 +880,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): # Synchronize resource initialization with a lock with GCPTestSequenceJava.lock: - deployment_client.initialize(resource_prefix="regr") + deployment_client.initialize(resource_prefix=RESOURCE_PREFIX) return deployment_client @@ -936,7 +938,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): # Synchronize resource initialization with a lock with OpenWhiskTestSequencePython.lock: - deployment_client.initialize(resource_prefix="regr") + deployment_client.initialize(resource_prefix=RESOURCE_PREFIX) return deployment_client @@ -994,7 +996,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): # Synchronize resource initialization with a lock with OpenWhiskTestSequenceNodejs.lock: - deployment_client.initialize(resource_prefix="regr") + deployment_client.initialize(resource_prefix=RESOURCE_PREFIX) return deployment_client @@ -1048,7 +1050,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): # Synchronize resource initialization with a lock with OpenWhiskTestSequenceJava.lock: - deployment_client.initialize(resource_prefix="regr") + deployment_client.initialize(resource_prefix=RESOURCE_PREFIX) return deployment_client @@ -1123,6 +1125,7 @@ def filter_out_benchmarks( language_version: str, architecture: str, deployment_type: str, + selected_architecture: str | None = None, ) -> bool: """Filter out benchmarks that are not supported on specific platforms. @@ -1142,6 +1145,10 @@ def filter_out_benchmarks( """ # fmt: off + # user can asks to use only a selected architecture + if selected_architecture is not None and selected_architecture != architecture: + return False + # Arm architecture currently not supported for C++ if (language == "cpp" and architecture == "arm64"): return False @@ -1174,7 +1181,9 @@ def regression_suite( experiment_config: dict, providers: Set[str], deployment_config: dict, + resource_prefix: str | None = None, benchmark_name: Optional[str] = None, + selected_architecture: str | None = None, ): """Create and run a regression test suite for specified cloud providers. @@ -1195,6 +1204,11 @@ def regression_suite( Raises: AssertionError: If a requested provider is not in the deployment config """ + + global RESOURCE_PREFIX + if resource_prefix is not None: + RESOURCE_PREFIX = resource_prefix + # Create the test suite suite = unittest.TestSuite() @@ -1279,6 +1293,7 @@ def regression_suite( language_version, test_architecture, test_deployment_type, + selected_architecture, ): print(f"Skip test {test_name} - not supported.") continue