From 424f6727c98eeefbedd3f020a74c8078d696b92d Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Tue, 17 Mar 2026 16:15:38 +0100 Subject: [PATCH 1/9] [regression] Multiple bug fixes Correctly get regression cache path, select architecture, and pass resource prefix --- sebs/cli.py | 28 ++++++++++++++++++---------- sebs/regression.py | 40 +++++++++++++++++++++++++++------------- 2 files changed, 45 insertions(+), 23 deletions(-) diff --git a/sebs/cli.py b/sebs/cli.py index 4e1cc558..ad312de3 100755 --- a/sebs/cli.py +++ b/sebs/cli.py @@ -455,32 +455,40 @@ def package( multiple=True, help="JSON configuration of deployed storage.", ) -@common_params -@click.option( - "--cache", - default=os.path.join(os.path.curdir, "regression-cache"), - help="Location of experiments cache.", -) @click.option( - "--output-dir", - default=os.path.join(os.path.curdir, "regression-output"), - help="Output directory for results.", + "--selected-architecture/--all-architectures", + type=bool, + default=False, + help="Skip non-selected CPU architectures.", ) -def regression(benchmark_input_size, benchmark_name, storage_configuration, **kwargs): +@common_params +def regression( + benchmark_input_size, benchmark_name, storage_configuration, selected_architecture, **kwargs +): """Run regression test suite across benchmarks.""" + # for regression, deployment client is initialized locally # disable default initialization + + from pathlib import Path + + if Path(kwargs["cache"]) == Path("cache"): + kwargs["cache"] = os.path.join(os.path.curdir, "regression-cache") + (config, output_dir, logging_filename, sebs_client, _) = parse_common_params( initialize_deployment=False, storage_configuration=storage_configuration, **kwargs, ) + architecture = config["experiments"]["architecture"] if selected_architecture else None regression_suite( sebs_client, config["experiments"], set((config["deployment"]["name"],)), config, + kwargs["resource_prefix"], benchmark_name, + architecture, ) diff --git a/sebs/regression.py b/sebs/regression.py index 53336a2a..6221df7f 100644 --- a/sebs/regression.py +++ b/sebs/regression.py @@ -80,6 +80,8 @@ # User-defined config passed during initialization, set in regression_suite() cloud_config: Optional[dict] = None +RESOURCE_PREFIX = "regr" + class TestSequenceMeta(type): """Metaclass for dynamically generating regression test cases. @@ -335,7 +337,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): # Synchronize resource initialization with a lock with AWSTestSequencePython.lock: - deployment_client.initialize(resource_prefix="regr") + deployment_client.initialize(resource_prefix=RESOURCE_PREFIX) return deployment_client @@ -389,7 +391,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): # Synchronize resource initialization with a lock with AWSTestSequenceNodejs.lock: - deployment_client.initialize(resource_prefix="regr") + deployment_client.initialize(resource_prefix=RESOURCE_PREFIX) return deployment_client @@ -432,7 +434,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): logging_filename=os.path.join(self.client.output_dir, f), ) with AWSTestSequenceCpp.lock: - deployment_client.initialize(resource_prefix="regr") + deployment_client.initialize(resource_prefix=RESOURCE_PREFIX) return deployment_client @@ -483,7 +485,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): logging_filename=os.path.join(self.client.output_dir, f), ) with AWSTestSequenceJava.lock: - deployment_client.initialize(resource_prefix="regr") + deployment_client.initialize(resource_prefix=RESOURCE_PREFIX) return deployment_client @@ -565,7 +567,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): deployment_client.system_resources.initialize_cli( cli=AzureTestSequencePython.cli, login=True ) - deployment_client.initialize(resource_prefix="regr") + deployment_client.initialize(resource_prefix=RESOURCE_PREFIX) return deployment_client @@ -642,7 +644,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): # Initialize CLI and setup resources (no login needed - reuses Python session) deployment_client.system_resources.initialize_cli(cli=AzureTestSequenceNodejs.cli) - deployment_client.initialize(resource_prefix="regr") + deployment_client.initialize(resource_prefix=RESOURCE_PREFIX) return deployment_client @@ -716,7 +718,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): deployment_client.system_resources.initialize_cli( cli=AzureTestSequenceJava.cli, login=needs_login ) - deployment_client.initialize(resource_prefix="regr") + deployment_client.initialize(resource_prefix=RESOURCE_PREFIX) return deployment_client @@ -770,7 +772,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): # Synchronize resource initialization with a lock with GCPTestSequencePython.lock: - deployment_client.initialize(resource_prefix="regr") + deployment_client.initialize(resource_prefix=RESOURCE_PREFIX) return deployment_client @@ -824,7 +826,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): # Synchronize resource initialization with a lock with GCPTestSequenceNodejs.lock: - deployment_client.initialize(resource_prefix="regr") + deployment_client.initialize(resource_prefix=RESOURCE_PREFIX) return deployment_client @@ -878,7 +880,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): # Synchronize resource initialization with a lock with GCPTestSequenceJava.lock: - deployment_client.initialize(resource_prefix="regr") + deployment_client.initialize(resource_prefix=RESOURCE_PREFIX) return deployment_client @@ -936,7 +938,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): # Synchronize resource initialization with a lock with OpenWhiskTestSequencePython.lock: - deployment_client.initialize(resource_prefix="regr") + deployment_client.initialize(resource_prefix=RESOURCE_PREFIX) return deployment_client @@ -994,7 +996,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): # Synchronize resource initialization with a lock with OpenWhiskTestSequenceNodejs.lock: - deployment_client.initialize(resource_prefix="regr") + deployment_client.initialize(resource_prefix=RESOURCE_PREFIX) return deployment_client @@ -1048,7 +1050,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): # Synchronize resource initialization with a lock with OpenWhiskTestSequenceJava.lock: - deployment_client.initialize(resource_prefix="regr") + deployment_client.initialize(resource_prefix=RESOURCE_PREFIX) return deployment_client @@ -1123,6 +1125,7 @@ def filter_out_benchmarks( language_version: str, architecture: str, deployment_type: str, + selected_architecture: str | None = None, ) -> bool: """Filter out benchmarks that are not supported on specific platforms. @@ -1142,6 +1145,10 @@ def filter_out_benchmarks( """ # fmt: off + # user can asks to use only a selected architecture + if selected_architecture is not None and selected_architecture != architecture: + return False + # Arm architecture currently not supported for C++ if (language == "cpp" and architecture == "arm64"): return False @@ -1174,7 +1181,9 @@ def regression_suite( experiment_config: dict, providers: Set[str], deployment_config: dict, + resource_prefix: str = "regr", benchmark_name: Optional[str] = None, + selected_architecture: str | None = None, ): """Create and run a regression test suite for specified cloud providers. @@ -1195,6 +1204,10 @@ def regression_suite( Raises: AssertionError: If a requested provider is not in the deployment config """ + + global RESOURCE_PREFIX + RESOURCE_PREFIX = resource_prefix + # Create the test suite suite = unittest.TestSuite() @@ -1279,6 +1292,7 @@ def regression_suite( language_version, test_architecture, test_deployment_type, + selected_architecture, ): print(f"Skip test {test_name} - not supported.") continue From 458d924057a65e0dcfda1c7c8d175c0d66e91701 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Tue, 17 Mar 2026 16:17:40 +0100 Subject: [PATCH 2/9] [ci] Update test job --- .circleci/config.yml | 135 ++++++++++++++++++++++++++++++++----------- 1 file changed, 102 insertions(+), 33 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index b22c4edc..e393dc3a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -3,6 +3,36 @@ version: 2.1 orbs: python: circleci/python@2.1 +# Executor for regression testing jobs +executors: + sebs-regression: + docker: + - image: cimg/python:3.11 + resource_class: large + environment: + RESOURCE_PREFIX: sebs-ci + +commands: + + restore-sebs-cache: + description: "Restore SeBS cache directory containing cloud resource metadata" + steps: + - restore_cache: + keys: + - sebs-cache-{{ .Branch }} + + save-caches: + description: "Persist SeBS cache and dependencies" + parameters: + language: + type: enum + enum: [python, nodejs, java, cpp] + steps: + - save_cache: + key: sebs-cache-{{ .Branch }} + paths: + - cache/ + jobs: linting: executor: @@ -43,46 +73,85 @@ jobs: - store_artifacts: path: flake-reports destination: flake-reports - test-aws: - executor: python/default + + install-sebs: + description: "Install SeBS with platform-specific dependencies" + parameters: + platform: + type: enum + enum: [aws, azure, gcp] steps: - - checkout - - setup_remote_docker - - restore_cache: - key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }} - - run: - command: | - if [[ -d $HOME/docker ]]; - then - ls $HOME/docker/*.tar.gz | xargs -I {file} sh -c "zcat {file} | docker load"; - else - docker pull mcopik/serverless-benchmarks:build.aws.python.3.7 - docker pull mcopik/serverless-benchmarks:build.aws.nodejs.12.x - fi - name: Load Docker images - run: - command: | - python3 install.py --aws - name: Install pip dependencies - - run: - command: | - mkdir -p $HOME/docker - docker images mcopik/serverless-benchmarks --filter='dangling=false' --format '{{.Repository}}:{{.Tag}} {{.ID}}' |\ - xargs -n 2 -t sh -c 'test -e $HOME/docker/$1.tar.gz || docker save $0 | gzip -2 > $HOME/docker/$1.tar.gz' - name: Save Docker images - - save_cache: - key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }} - paths: - - "sebs-virtualenv" - - $HOME/docker + name: Install SeBS + command: pip install . + + setup-cloud-credentials: + description: "Configure cloud authentication" + parameters: + platform: + type: enum + enum: [aws, azure, gcp] + steps: + - when: + condition: + equal: [gcp, << parameters.platform >>] + steps: + - run: + name: Setup GCP Credentials + command: | + echo "$GCP_SERVICE_ACCOUNT_JSON" > /tmp/gcp-credentials.json + echo 'export GOOGLE_APPLICATION_CREDENTIALS=/tmp/gcp-credentials.json' >> $BASH_ENV + + run-regression-tests: + description: "Execute regression test suite" + parameters: + platform: + type: enum + enum: [aws, azure, gcp] + language: + type: enum + enum: [python, nodejs, java, cpp] + version: + type: string + steps: - run: + name: Run Regression Tests command: | - . sebs-virtualenv/bin/activate - tests/test_runner.py --deployment aws - name: Execute AWS tests + sebs benchmark regression \ + --config config/example.json \ + --deployment << parameters.platform >> \ + --language << parameters.language >> \ + --language-version << parameters.version >> + no_output_timeout: 5m + + regression-aws-python311: + executor: sebs-regression + steps: + - checkout + - restore-sebs-cache + - setup_remote_docker: + version: 20.10.24 + - setup-cloud-credentials: + platform: aws + - install-sebs: + platform: aws + - run-regression-tests: + platform: aws + language: python + version: "3.11" + - save-results + - save-caches: + language: python workflows: main: jobs: - linting + regression-tests: + jobs: + # AWS jobs + - regression-aws-python311: + filters: + branches: + only: master From 28108b2e36c15ec264a49542696f861f18062720 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Tue, 17 Mar 2026 16:24:05 +0100 Subject: [PATCH 3/9] [ci] Update test job --- .circleci/config.yml | 115 ++++++++++++++++++++++++++++--------------- 1 file changed, 74 insertions(+), 41 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index e393dc3a..2f9a24b4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -33,47 +33,6 @@ commands: paths: - cache/ -jobs: - linting: - executor: - name: 'python/default' - tag: '3.10' - steps: - - checkout - - restore_cache: - key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }} - - run: - command: | - sudo apt update && sudo apt install libcurl4-openssl-dev - name: Install curl-config from Ubuntu APT - - run: - command: | - python3 install.py --aws --azure --gcp --no-local - name: Install pip dependencies - - run: - command: | - . python-venv/bin/activate - black sebs --check --config .black.toml - name: Python code formatting with black - - run: - command: | - . python-venv/bin/activate - flake8 sebs --config=.flake8.cfg --tee --output-file flake-reports - name: Python code lint with flake8 - - run: - command: | - . python-venv/bin/activate - mypy sebs --config-file=.mypy.ini - name: Python static code verification with mypy - - run: - command: | - . python-venv/bin/activate - interrogate -v --fail-under 100 sebs - name: Check for Python documentation coverage - - store_artifacts: - path: flake-reports - destination: flake-reports - install-sebs: description: "Install SeBS with platform-specific dependencies" parameters: @@ -124,6 +83,80 @@ jobs: --language-version << parameters.version >> no_output_timeout: 5m + save-results: + description: "Save benchmark results as artifacts" + steps: + - run: + name: Generate Test Summary + command: | + echo "Regression Test Summary" > test-summary.txt + echo "======================" >> test-summary.txt + if ls regression_*.json 1> /dev/null 2>&1; then + ls -1 regression_*.json | wc -l | xargs echo "Benchmarks tested:" >> test-summary.txt + echo "" >> test-summary.txt + echo "Results saved to artifacts/results/" >> test-summary.txt + else + echo "No benchmark results found" >> test-summary.txt + fi + when: always + - store_artifacts: + path: test-summary.txt + - run: + name: Collect regression results + command: | + mkdir -p results + if ls regression_*.json 1> /dev/null 2>&1; then + mv regression_*.json results/ || true + fi + when: always + - store_artifacts: + path: results + destination: results/ + - store_artifacts: + path: cache + destination: cache-snapshot/ + +jobs: + linting: + executor: + name: 'python/default' + tag: '3.10' + steps: + - checkout + - restore_cache: + key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }} + - run: + command: | + sudo apt update && sudo apt install libcurl4-openssl-dev + name: Install curl-config from Ubuntu APT + - run: + command: | + python3 install.py --aws --azure --gcp --no-local + name: Install pip dependencies + - run: + command: | + . python-venv/bin/activate + black sebs --check --config .black.toml + name: Python code formatting with black + - run: + command: | + . python-venv/bin/activate + flake8 sebs --config=.flake8.cfg --tee --output-file flake-reports + name: Python code lint with flake8 + - run: + command: | + . python-venv/bin/activate + mypy sebs --config-file=.mypy.ini + name: Python static code verification with mypy + - run: + command: | + . python-venv/bin/activate + interrogate -v --fail-under 100 sebs + name: Check for Python documentation coverage + - store_artifacts: + path: flake-reports + destination: flake-reports + regression-aws-python311: executor: sebs-regression steps: From 30ccfcb85b62b448aa42ca4e409ac03d691f2f77 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Tue, 17 Mar 2026 16:28:31 +0100 Subject: [PATCH 4/9] [ci] Enable CI job on feature branches --- .circleci/config.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 2f9a24b4..69f7527e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -187,4 +187,6 @@ workflows: - regression-aws-python311: filters: branches: - only: master + only: + - master + - /feature\/.*/ From a46d41047f4331a628b24b04c2d1b49682615c52 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Tue, 17 Mar 2026 16:32:50 +0100 Subject: [PATCH 5/9] [ci] Fix CI command --- .circleci/config.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 69f7527e..cc1438d8 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -76,11 +76,13 @@ commands: - run: name: Run Regression Tests command: | - sebs benchmark regression \ + sebs benchmark regression test \ --config config/example.json \ --deployment << parameters.platform >> \ --language << parameters.language >> \ - --language-version << parameters.version >> + --language-version << parameters.version >> \ + --architecture x64 --selected-architecture \ + --resource-prefix sebs-ci no_output_timeout: 5m save-results: From 628e25270fc9a464a290cc021be065aecb928cf7 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Tue, 17 Mar 2026 16:36:37 +0100 Subject: [PATCH 6/9] [ci] Fix CI command --- .circleci/config.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index cc1438d8..bead91a6 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -76,8 +76,9 @@ commands: - run: name: Run Regression Tests command: | + set -euo pipefail sebs benchmark regression test \ - --config config/example.json \ + --config configs/example.json \ --deployment << parameters.platform >> \ --language << parameters.language >> \ --language-version << parameters.version >> \ From 635f31233b691572b9ceace162daa63d10bfb189 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Tue, 17 Mar 2026 17:15:46 +0100 Subject: [PATCH 7/9] [regression] Non-zero return from failed registration --- sebs/cli.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sebs/cli.py b/sebs/cli.py index ad312de3..870d8603 100755 --- a/sebs/cli.py +++ b/sebs/cli.py @@ -11,6 +11,7 @@ import logging import functools import os +import sys import traceback from typing import cast, List, Optional @@ -481,7 +482,7 @@ def regression( **kwargs, ) architecture = config["experiments"]["architecture"] if selected_architecture else None - regression_suite( + has_failures = regression_suite( sebs_client, config["experiments"], set((config["deployment"]["name"],)), @@ -490,6 +491,8 @@ def regression( benchmark_name, architecture, ) + # Exit with non-zero code if any tests failed + sys.exit(1 if has_failures else 0) @cli.group() From 0dd1e2a2b7462da847af973122ab27f4fb4d8508 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Tue, 17 Mar 2026 17:16:07 +0100 Subject: [PATCH 8/9] [aws] Remove unnecessary pkg_resources from container --- benchmarks/wrappers/aws/python/setup.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/benchmarks/wrappers/aws/python/setup.py b/benchmarks/wrappers/aws/python/setup.py index c34245e4..51d9c5f8 100644 --- a/benchmarks/wrappers/aws/python/setup.py +++ b/benchmarks/wrappers/aws/python/setup.py @@ -1,14 +1,9 @@ # Copyright 2020-2025 ETH Zurich and the SeBS authors. All rights reserved. from distutils.core import setup from glob import glob -from pkg_resources import parse_requirements - -with open('requirements.txt') as f: - requirements = [str(r) for r in parse_requirements(f)] setup( name='function', - install_requires=requirements, packages=['function'], package_dir={'function': '.'}, package_data={'function': glob('**', recursive=True)}, From 9f238ba15c4e0028791fcf3b7977c8b964c87260 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Tue, 17 Mar 2026 17:47:50 +0100 Subject: [PATCH 9/9] [ci] Fixes --- .circleci/config.yml | 2 +- sebs/regression.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index bead91a6..f070c6be 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -31,7 +31,7 @@ commands: - save_cache: key: sebs-cache-{{ .Branch }} paths: - - cache/ + - regression-cache/ install-sebs: description: "Install SeBS with platform-specific dependencies" diff --git a/sebs/regression.py b/sebs/regression.py index 6221df7f..da8a3e6e 100644 --- a/sebs/regression.py +++ b/sebs/regression.py @@ -1181,7 +1181,7 @@ def regression_suite( experiment_config: dict, providers: Set[str], deployment_config: dict, - resource_prefix: str = "regr", + resource_prefix: str | None = None, benchmark_name: Optional[str] = None, selected_architecture: str | None = None, ): @@ -1206,7 +1206,8 @@ def regression_suite( """ global RESOURCE_PREFIX - RESOURCE_PREFIX = resource_prefix + if resource_prefix is not None: + RESOURCE_PREFIX = resource_prefix # Create the test suite suite = unittest.TestSuite()