diff --git a/.circleci/config.yml b/.circleci/config.yml
index b22c4edc..f070c6be 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -3,6 +3,122 @@ version: 2.1
 orbs:
   python: circleci/python@2.1
 
+# Executor for regression testing jobs
+executors:
+  sebs-regression:
+    docker:
+      - image: cimg/python:3.11
+    resource_class: large
+    environment:
+      RESOURCE_PREFIX: sebs-ci
+
+commands:
+
+  restore-sebs-cache:
+    description: "Restore SeBS cache directory containing cloud resource metadata"
+    steps:
+      - restore_cache:
+          keys:
+            - sebs-cache-{{ .Branch }}
+
+  save-caches:
+    description: "Persist SeBS cache and dependencies"
+    parameters:
+      language:
+        type: enum
+        enum: [python, nodejs, java, cpp]
+    steps:
+      - save_cache:
+          key: sebs-cache-{{ .Branch }}
+          paths:
+            - regression-cache/
+
+  install-sebs:
+    description: "Install SeBS with platform-specific dependencies"
+    parameters:
+      platform:
+        type: enum
+        enum: [aws, azure, gcp]
+    steps:
+      - run:
+          name: Install SeBS
+          command: pip install .
+
+  setup-cloud-credentials:
+    description: "Configure cloud authentication"
+    parameters:
+      platform:
+        type: enum
+        enum: [aws, azure, gcp]
+    steps:
+      - when:
+          condition:
+            equal: [gcp, << parameters.platform >>]
+          steps:
+            - run:
+                name: Setup GCP Credentials
+                command: |
+                  echo "$GCP_SERVICE_ACCOUNT_JSON" > /tmp/gcp-credentials.json
+                  echo 'export GOOGLE_APPLICATION_CREDENTIALS=/tmp/gcp-credentials.json' >> $BASH_ENV
+
+  run-regression-tests:
+    description: "Execute regression test suite"
+    parameters:
+      platform:
+        type: enum
+        enum: [aws, azure, gcp]
+      language:
+        type: enum
+        enum: [python, nodejs, java, cpp]
+      version:
+        type: string
+    steps:
+      - run:
+          name: Run Regression Tests
+          command: |
+            set -euo pipefail
+            sebs benchmark regression test \
+              --config configs/example.json \
+              --deployment << parameters.platform >> \
+              --language << parameters.language >> \
+              --language-version << parameters.version >> \
+              --architecture x64 --selected-architecture \
+              --resource-prefix sebs-ci
+          no_output_timeout: 5m
+
+  save-results:
+    description: "Save benchmark results as artifacts"
+    steps:
+      - run:
+          name: Generate Test Summary
+          command: |
+            echo "Regression Test Summary" > test-summary.txt
+            echo "======================" >> test-summary.txt
+            if ls regression_*.json 1> /dev/null 2>&1; then
+              ls -1 regression_*.json | wc -l | xargs echo "Benchmarks tested:" >> test-summary.txt
+              echo "" >> test-summary.txt
+              echo "Results saved to artifacts/results/" >> test-summary.txt
+            else
+              echo "No benchmark results found" >> test-summary.txt
+            fi
+          when: always
+      - store_artifacts:
+          path: test-summary.txt
+      - run:
+          name: Collect regression results
+          command: |
+            mkdir -p results
+            if ls regression_*.json 1> /dev/null 2>&1; then
+              mv regression_*.json results/ || true
+            fi
+          when: always
+      - store_artifacts:
+          path: results
+          destination: results/
+      - store_artifacts:
+          path: cache
+          destination: cache-snapshot/
+
 jobs:
   linting:
     executor:
@@ -43,46 +159,37 @@ jobs:
       - store_artifacts:
           path: flake-reports
           destination: flake-reports
-  test-aws:
-    executor: python/default
+
+  regression-aws-python311:
+    executor: sebs-regression
     steps:
       - checkout
-      - setup_remote_docker
-      - restore_cache:
-          key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }}
-      - run:
-          command: |
-            if [[ -d $HOME/docker ]];
-            then
-              ls $HOME/docker/*.tar.gz | xargs -I {file} sh -c "zcat {file} | docker load";
-            else
-              docker pull mcopik/serverless-benchmarks:build.aws.python.3.7
-              docker pull mcopik/serverless-benchmarks:build.aws.nodejs.12.x
-            fi
-          name: Load Docker images
-      - run:
-          command: |
-            python3 install.py --aws
-          name: Install pip dependencies
-      - run:
-          command: |
-            mkdir -p $HOME/docker
-            docker images mcopik/serverless-benchmarks --filter='dangling=false' --format '{{.Repository}}:{{.Tag}} {{.ID}}' |\
-            xargs -n 2 -t sh -c 'test -e $HOME/docker/$1.tar.gz || docker save $0 | gzip -2 > $HOME/docker/$1.tar.gz'
-          name: Save Docker images
-      - save_cache:
-          key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }}
-          paths:
-            - "sebs-virtualenv"
-            - $HOME/docker
-      - run:
-          command: |
-            . sebs-virtualenv/bin/activate
-            tests/test_runner.py --deployment aws
-          name: Execute AWS tests
+      - restore-sebs-cache
+      - setup_remote_docker:
+          version: 20.10.24
+      - setup-cloud-credentials:
+          platform: aws
+      - install-sebs:
+          platform: aws
+      - run-regression-tests:
+          platform: aws
+          language: python
+          version: "3.11"
+      - save-results
+      - save-caches:
+          language: python
 
 workflows:
   main:
     jobs:
       - linting
 
+  regression-tests:
+    jobs:
+      # AWS jobs
+      - regression-aws-python311:
+          filters:
+            branches:
+              only:
+                - master
+                - /feature\/.*/
diff --git a/benchmarks/wrappers/aws/python/setup.py b/benchmarks/wrappers/aws/python/setup.py
index c34245e4..51d9c5f8 100644
--- a/benchmarks/wrappers/aws/python/setup.py
+++ b/benchmarks/wrappers/aws/python/setup.py
@@ -1,14 +1,9 @@
 # Copyright 2020-2025 ETH Zurich and the SeBS authors. All rights reserved.
 from distutils.core import setup
 from glob import glob
-from pkg_resources import parse_requirements
-
-with open('requirements.txt') as f:
-    requirements = [str(r) for r in parse_requirements(f)]
 
 setup(
     name='function',
-    install_requires=requirements,
     packages=['function'],
     package_dir={'function': '.'},
     package_data={'function': glob('**', recursive=True)},
diff --git a/sebs/cli.py b/sebs/cli.py
index 4e1cc558..870d8603 100755
--- a/sebs/cli.py
+++ b/sebs/cli.py
@@ -11,6 +11,7 @@
 import logging
 import functools
 import os
+import sys
 import traceback
 from typing import cast, List, Optional
 
@@ -455,33 +456,43 @@ def package(
     multiple=True,
     help="JSON configuration of deployed storage.",
 )
-@common_params
-@click.option(
-    "--cache",
-    default=os.path.join(os.path.curdir, "regression-cache"),
-    help="Location of experiments cache.",
-)
 @click.option(
-    "--output-dir",
-    default=os.path.join(os.path.curdir, "regression-output"),
-    help="Output directory for results.",
+    "--selected-architecture/--all-architectures",
+    type=bool,
+    default=False,
+    help="Skip non-selected CPU architectures.",
 )
-def regression(benchmark_input_size, benchmark_name, storage_configuration, **kwargs):
+@common_params
+def regression(
+    benchmark_input_size, benchmark_name, storage_configuration, selected_architecture, **kwargs
+):
     """Run regression test suite across benchmarks."""
+
     # for regression, deployment client is initialized locally
     # disable default initialization
+
+    from pathlib import Path
+
+    if Path(kwargs["cache"]) == Path("cache"):
+        kwargs["cache"] = os.path.join(os.path.curdir, "regression-cache")
+
     (config, output_dir, logging_filename, sebs_client, _) = parse_common_params(
         initialize_deployment=False,
         storage_configuration=storage_configuration,
         **kwargs,
     )
-    regression_suite(
+    architecture = config["experiments"]["architecture"] if selected_architecture else None
+    has_failures = regression_suite(
         sebs_client,
         config["experiments"],
         set((config["deployment"]["name"],)),
         config,
+        kwargs["resource_prefix"],
         benchmark_name,
+        architecture,
     )
+    # Exit with non-zero code if any tests failed
+    sys.exit(1 if has_failures else 0)
 
 
 @cli.group()
diff --git a/sebs/regression.py b/sebs/regression.py
index 53336a2a..da8a3e6e 100644
--- a/sebs/regression.py
+++ b/sebs/regression.py
@@ -80,6 +80,8 @@
 # User-defined config passed during initialization, set in regression_suite()
 cloud_config: Optional[dict] = None
 
+RESOURCE_PREFIX = "regr"
+
 
 class TestSequenceMeta(type):
     """Metaclass for dynamically generating regression test cases.
@@ -335,7 +337,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type):
 
         # Synchronize resource initialization with a lock
         with AWSTestSequencePython.lock:
-            deployment_client.initialize(resource_prefix="regr")
+            deployment_client.initialize(resource_prefix=RESOURCE_PREFIX)
         return deployment_client
 
 
@@ -389,7 +391,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type):
 
         # Synchronize resource initialization with a lock
         with AWSTestSequenceNodejs.lock:
-            deployment_client.initialize(resource_prefix="regr")
+            deployment_client.initialize(resource_prefix=RESOURCE_PREFIX)
         return deployment_client
 
 
@@ -432,7 +434,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type):
             logging_filename=os.path.join(self.client.output_dir, f),
         )
         with AWSTestSequenceCpp.lock:
-            deployment_client.initialize(resource_prefix="regr")
+            deployment_client.initialize(resource_prefix=RESOURCE_PREFIX)
         return deployment_client
 
 
@@ -483,7 +485,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type):
             logging_filename=os.path.join(self.client.output_dir, f),
         )
         with AWSTestSequenceJava.lock:
-            deployment_client.initialize(resource_prefix="regr")
+            deployment_client.initialize(resource_prefix=RESOURCE_PREFIX)
         return deployment_client
 
 
@@ -565,7 +567,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type):
             deployment_client.system_resources.initialize_cli(
                 cli=AzureTestSequencePython.cli, login=True
             )
-            deployment_client.initialize(resource_prefix="regr")
+            deployment_client.initialize(resource_prefix=RESOURCE_PREFIX)
             return deployment_client
 
 
@@ -642,7 +644,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type):
 
             # Initialize CLI and setup resources (no login needed - reuses Python session)
             deployment_client.system_resources.initialize_cli(cli=AzureTestSequenceNodejs.cli)
-            deployment_client.initialize(resource_prefix="regr")
+            deployment_client.initialize(resource_prefix=RESOURCE_PREFIX)
             return deployment_client
 
 
@@ -716,7 +718,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type):
             deployment_client.system_resources.initialize_cli(
                 cli=AzureTestSequenceJava.cli, login=needs_login
             )
-            deployment_client.initialize(resource_prefix="regr")
+            deployment_client.initialize(resource_prefix=RESOURCE_PREFIX)
             return deployment_client
 
 
@@ -770,7 +772,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type):
 
         # Synchronize resource initialization with a lock
         with GCPTestSequencePython.lock:
-            deployment_client.initialize(resource_prefix="regr")
+            deployment_client.initialize(resource_prefix=RESOURCE_PREFIX)
         return deployment_client
 
 
@@ -824,7 +826,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type):
 
         # Synchronize resource initialization with a lock
         with GCPTestSequenceNodejs.lock:
-            deployment_client.initialize(resource_prefix="regr")
+            deployment_client.initialize(resource_prefix=RESOURCE_PREFIX)
         return deployment_client
 
 
@@ -878,7 +880,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type):
 
         # Synchronize resource initialization with a lock
         with GCPTestSequenceJava.lock:
-            deployment_client.initialize(resource_prefix="regr")
+            deployment_client.initialize(resource_prefix=RESOURCE_PREFIX)
         return deployment_client
 
 
@@ -936,7 +938,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type):
 
         # Synchronize resource initialization with a lock
         with OpenWhiskTestSequencePython.lock:
-            deployment_client.initialize(resource_prefix="regr")
+            deployment_client.initialize(resource_prefix=RESOURCE_PREFIX)
         return deployment_client
 
 
@@ -994,7 +996,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type):
 
         # Synchronize resource initialization with a lock
         with OpenWhiskTestSequenceNodejs.lock:
-            deployment_client.initialize(resource_prefix="regr")
+            deployment_client.initialize(resource_prefix=RESOURCE_PREFIX)
         return deployment_client
 
 
@@ -1048,7 +1050,7 @@ def get_deployment(self, benchmark_name, architecture, deployment_type):
 
         # Synchronize resource initialization with a lock
         with OpenWhiskTestSequenceJava.lock:
-            deployment_client.initialize(resource_prefix="regr")
+            deployment_client.initialize(resource_prefix=RESOURCE_PREFIX)
         return deployment_client
 
 
@@ -1123,6 +1125,7 @@ def filter_out_benchmarks(
     language_version: str,
     architecture: str,
     deployment_type: str,
+    selected_architecture: str | None = None,
 ) -> bool:
     """Filter out benchmarks that are not supported on specific platforms.
 
@@ -1142,6 +1145,10 @@ def filter_out_benchmarks(
     """
     # fmt: off
 
+    # user can asks to use only a selected architecture
+    if selected_architecture is not None and selected_architecture != architecture:
+        return False
+
     # Arm architecture currently not supported for C++
     if (language == "cpp" and architecture == "arm64"):
         return False
@@ -1174,7 +1181,9 @@ def regression_suite(
     experiment_config: dict,
     providers: Set[str],
     deployment_config: dict,
+    resource_prefix: str | None = None,
     benchmark_name: Optional[str] = None,
+    selected_architecture: str | None = None,
 ):
     """Create and run a regression test suite for specified cloud providers.
 
@@ -1195,6 +1204,11 @@ def regression_suite(
     Raises:
         AssertionError: If a requested provider is not in the deployment config
     """
+
+    global RESOURCE_PREFIX
+    if resource_prefix is not None:
+        RESOURCE_PREFIX = resource_prefix
+
     # Create the test suite
     suite = unittest.TestSuite()
 
@@ -1279,6 +1293,7 @@ def regression_suite(
                 language_version,
                 test_architecture,
                 test_deployment_type,
+                selected_architecture,
             ):
                 print(f"Skip test {test_name} - not supported.")
                 continue