spcl · mcopik · Mar 17, 2026 · Mar 17, 2026 · Mar 17, 2026 · Mar 17, 2026
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -3,6 +3,122 @@ version: 2.1
 orbs:
   python: circleci/python@2.1
 
+# Executor for regression testing jobs
+executors:
+  sebs-regression:
+    docker:
+      - image: cimg/python:3.11
+    resource_class: large
+    environment:
+      RESOURCE_PREFIX: sebs-ci
+
+commands:
+
+  restore-sebs-cache:
+    description: "Restore SeBS cache directory containing cloud resource metadata"
+    steps:
+      - restore_cache:
+          keys:
+            - sebs-cache-{{ .Branch }}
+
+  save-caches:
+    description: "Persist SeBS cache and dependencies"
+    parameters:
+      language:
+        type: enum
+        enum: [python, nodejs, java, cpp]
+    steps:
+      - save_cache:
+          key: sebs-cache-{{ .Branch }}
+          paths:
+            - regression-cache/
+
+  install-sebs:
+    description: "Install SeBS with platform-specific dependencies"
+    parameters:
+      platform:
+        type: enum
+        enum: [aws, azure, gcp]
+    steps:
+      - run:
+          name: Install SeBS
+          command: pip install .
+
+  setup-cloud-credentials:
+    description: "Configure cloud authentication"
+    parameters:
+      platform:
+        type: enum
+        enum: [aws, azure, gcp]
+    steps:
+      - when:
+          condition:
+            equal: [gcp, << parameters.platform >>]
+          steps:
+            - run:
+                name: Setup GCP Credentials
+                command: |
+                  echo "$GCP_SERVICE_ACCOUNT_JSON" > /tmp/gcp-credentials.json
+                  echo 'export GOOGLE_APPLICATION_CREDENTIALS=/tmp/gcp-credentials.json' >> $BASH_ENV
+
+  run-regression-tests:
+    description: "Execute regression test suite"
+    parameters:
+      platform:
+        type: enum
+        enum: [aws, azure, gcp]
+      language:
+        type: enum
+        enum: [python, nodejs, java, cpp]
+      version:
+        type: string
+    steps:
+      - run:
+          name: Run Regression Tests
+          command: |
+            set -euo pipefail
+            sebs benchmark regression test \
+              --config configs/example.json \
+              --deployment << parameters.platform >> \
+              --language << parameters.language >> \
+              --language-version << parameters.version >> \
+              --architecture x64 --selected-architecture \
+              --resource-prefix sebs-ci
+          no_output_timeout: 5m
+
+  save-results:
+    description: "Save benchmark results as artifacts"
+    steps:
+      - run:
+          name: Generate Test Summary
+          command: |
+            echo "Regression Test Summary" > test-summary.txt
+            echo "======================" >> test-summary.txt
+            if ls regression_*.json 1> /dev/null 2>&1; then
+              ls -1 regression_*.json | wc -l | xargs echo "Benchmarks tested:" >> test-summary.txt
+              echo "" >> test-summary.txt
+              echo "Results saved to artifacts/results/" >> test-summary.txt
+            else
+              echo "No benchmark results found" >> test-summary.txt
+            fi
+          when: always
+      - store_artifacts:
+          path: test-summary.txt
+      - run:
+          name: Collect regression results
+          command: |
+            mkdir -p results
+            if ls regression_*.json 1> /dev/null 2>&1; then
+              mv regression_*.json results/ || true
+            fi
+          when: always
+      - store_artifacts:
+          path: results
+          destination: results/
+      - store_artifacts:
+          path: cache
+          destination: cache-snapshot/
-      - store_artifacts:
-          path: cache
-          destination: cache-snapshot/
+      - store_artifacts:
+          path: regression-cache
+          destination: cache-snapshot/
-      - store_artifacts:
-          path: cache
-          destination: cache-snapshot/
+      - store_artifacts:
+          path: regression-cache
+          destination: cache-snapshot/
+
 jobs:
   linting:
     executor:
@@ -43,46 +159,37 @@ jobs:
       - store_artifacts:
           path: flake-reports
           destination: flake-reports
-  test-aws:
-    executor: python/default
+
+  regression-aws-python311:
+    executor: sebs-regression
     steps:
       - checkout
-      - setup_remote_docker
-      - restore_cache:
-          key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }}
-      - run:
-          command: |
-            if [[ -d $HOME/docker ]];
-            then
-              ls $HOME/docker/*.tar.gz | xargs -I {file} sh -c "zcat {file} | docker load";
-            else
-              docker pull mcopik/serverless-benchmarks:build.aws.python.3.7
-              docker pull mcopik/serverless-benchmarks:build.aws.nodejs.12.x
-            fi
-          name: Load Docker images
-      - run:
-          command: |
-            python3 install.py --aws
-          name: Install pip dependencies
-      - run:
-          command: |
-            mkdir -p $HOME/docker
-            docker images mcopik/serverless-benchmarks --filter='dangling=false' --format '{{.Repository}}:{{.Tag}} {{.ID}}' |\
-            xargs -n 2 -t sh -c 'test -e $HOME/docker/$1.tar.gz || docker save $0 | gzip -2 > $HOME/docker/$1.tar.gz'
-          name: Save Docker images
-      - save_cache:
-          key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }}
-          paths:
-            - "sebs-virtualenv"
-            - $HOME/docker
-      - run:
-          command: |
-            . sebs-virtualenv/bin/activate
-            tests/test_runner.py --deployment aws
-          name: Execute AWS tests
+      - restore-sebs-cache
+      - setup_remote_docker:
+          version: 20.10.24
+      - setup-cloud-credentials:
+          platform: aws
+      - install-sebs:
+          platform: aws
+      - run-regression-tests:
+          platform: aws
+          language: python
+          version: "3.11"
+      - save-results
+      - save-caches:
+          language: python
 
 workflows:
   main:
     jobs:
       - linting
 
+  regression-tests:
+    jobs:
+      # AWS jobs
+      - regression-aws-python311:
+          filters:
+            branches:
+              only:
+                - master
+                - /feature\/.*/
diff --git a/benchmarks/wrappers/aws/python/setup.py b/benchmarks/wrappers/aws/python/setup.py
@@ -1,14 +1,9 @@
 # Copyright 2020-2025 ETH Zurich and the SeBS authors. All rights reserved.
 from distutils.core import setup
 from glob import glob
-from pkg_resources import parse_requirements
-
-with open('requirements.txt') as f:
-    requirements = [str(r) for r in parse_requirements(f)]
 
 setup(
     name='function',
-    install_requires=requirements,
     packages=['function'],
     package_dir={'function': '.'},
     package_data={'function': glob('**', recursive=True)},

diff --git a/sebs/cli.py b/sebs/cli.py
@@ -11,6 +11,7 @@
 import logging
 import functools
 import os
+import sys
 import traceback
 from typing import cast, List, Optional
 
@@ -455,33 +456,43 @@ def package(
     multiple=True,
     help="JSON configuration of deployed storage.",
 )
-@common_params
-@click.option(
-    "--cache",
-    default=os.path.join(os.path.curdir, "regression-cache"),
-    help="Location of experiments cache.",
-)
 @click.option(
-    "--output-dir",
-    default=os.path.join(os.path.curdir, "regression-output"),
-    help="Output directory for results.",
+    "--selected-architecture/--all-architectures",
+    type=bool,
+    default=False,
+    help="Skip non-selected CPU architectures.",
 )
-def regression(benchmark_input_size, benchmark_name, storage_configuration, **kwargs):
+@common_params
+def regression(
+    benchmark_input_size, benchmark_name, storage_configuration, selected_architecture, **kwargs
+):
     """Run regression test suite across benchmarks."""
+
     # for regression, deployment client is initialized locally
     # disable default initialization
+
+    from pathlib import Path
+
+    if Path(kwargs["cache"]) == Path("cache"):
+        kwargs["cache"] = os.path.join(os.path.curdir, "regression-cache")
+
     (config, output_dir, logging_filename, sebs_client, _) = parse_common_params(
         initialize_deployment=False,
         storage_configuration=storage_configuration,
         **kwargs,
     )
-    regression_suite(
+    architecture = config["experiments"]["architecture"] if selected_architecture else None
+    has_failures = regression_suite(
         sebs_client,
         config["experiments"],
         set((config["deployment"]["name"],)),
         config,
+        kwargs["resource_prefix"],
         benchmark_name,
+        architecture,
     )
+    # Exit with non-zero code if any tests failed
+    sys.exit(1 if has_failures else 0)
 
 
 @cli.group()