From 3cbc3b04e64402420ff48d33a09edbed0d48f684 Mon Sep 17 00:00:00 2001
From: toooadi <toooadi0@gmail.com>
Date: Mon, 1 Dec 2025 14:14:56 +0100
Subject: [PATCH 01/31] Implement Pypy local & aws container deployments

---
 .../100.webapps/110.dynamic-html/config.json  |   2 +-
 .../110.dynamic-html/pypy/function.py         |  22 +++
 .../100.webapps/110.dynamic-html/pypy/init.sh |  10 ++
 .../110.dynamic-html/pypy/requirements.txt    |   1 +
 .../pypy/templates/template.html              |  26 ++++
 benchmarks/wrappers/aws/pypy/handler.py       |  71 ++++++++++
 benchmarks/wrappers/aws/pypy/nosql.py         | 121 ++++++++++++++++
 benchmarks/wrappers/aws/pypy/setup.py         |  15 ++
 benchmarks/wrappers/aws/pypy/storage.py       |  53 +++++++
 benchmarks/wrappers/local/pypy/nosql.py       | 131 ++++++++++++++++++
 benchmarks/wrappers/local/pypy/storage.py     |  58 ++++++++
 config/systems.json                           |  50 +++++++
 dockerfiles/aws/pypy/Dockerfile.function      |  52 +++++++
 dockerfiles/local/pypy/Dockerfile.build       |  18 +++
 dockerfiles/local/pypy/Dockerfile.run         |  25 ++++
 dockerfiles/local/pypy/analyzer-runner.py     |  64 +++++++++
 dockerfiles/local/pypy/config.py              |   5 +
 dockerfiles/local/pypy/papi-runner.py         | 104 ++++++++++++++
 dockerfiles/local/pypy/run_server.sh          |   3 +
 dockerfiles/local/pypy/runners.json           |   7 +
 dockerfiles/local/pypy/server.py              |  38 +++++
 dockerfiles/local/pypy/time-in-proc.py        |  59 ++++++++
 dockerfiles/local/pypy/timeit.sh              |   5 +
 dockerfiles/local/pypy/tools.py               |  21 +++
 dockerfiles/local/runner.py                   |   2 +-
 dockerfiles/pypy_installer.sh                 |  42 ++++++
 sebs.py                                       |   2 +-
 sebs/aws/aws.py                               |  23 ++-
 sebs/benchmark.py                             |   8 +-
 sebs/faas/function.py                         |   5 +-
 sebs/local/local.py                           |   1 +
 tools/build_docker_images.py                  |   2 +-
 32 files changed, 1033 insertions(+), 13 deletions(-)
 create mode 100644 benchmarks/100.webapps/110.dynamic-html/pypy/function.py
 create mode 100755 benchmarks/100.webapps/110.dynamic-html/pypy/init.sh
 create mode 100644 benchmarks/100.webapps/110.dynamic-html/pypy/requirements.txt
 create mode 100644 benchmarks/100.webapps/110.dynamic-html/pypy/templates/template.html
 create mode 100644 benchmarks/wrappers/aws/pypy/handler.py
 create mode 100644 benchmarks/wrappers/aws/pypy/nosql.py
 create mode 100644 benchmarks/wrappers/aws/pypy/setup.py
 create mode 100644 benchmarks/wrappers/aws/pypy/storage.py
 create mode 100644 benchmarks/wrappers/local/pypy/nosql.py
 create mode 100644 benchmarks/wrappers/local/pypy/storage.py
 create mode 100644 dockerfiles/aws/pypy/Dockerfile.function
 create mode 100755 dockerfiles/local/pypy/Dockerfile.build
 create mode 100755 dockerfiles/local/pypy/Dockerfile.run
 create mode 100644 dockerfiles/local/pypy/analyzer-runner.py
 create mode 100644 dockerfiles/local/pypy/config.py
 create mode 100644 dockerfiles/local/pypy/papi-runner.py
 create mode 100755 dockerfiles/local/pypy/run_server.sh
 create mode 100644 dockerfiles/local/pypy/runners.json
 create mode 100644 dockerfiles/local/pypy/server.py
 create mode 100644 dockerfiles/local/pypy/time-in-proc.py
 create mode 100755 dockerfiles/local/pypy/timeit.sh
 create mode 100644 dockerfiles/local/pypy/tools.py
 create mode 100644 dockerfiles/pypy_installer.sh

diff --git a/benchmarks/100.webapps/110.dynamic-html/config.json b/benchmarks/100.webapps/110.dynamic-html/config.json
index 25254c247..da7765197 100644
--- a/benchmarks/100.webapps/110.dynamic-html/config.json
+++ b/benchmarks/100.webapps/110.dynamic-html/config.json
@@ -1,6 +1,6 @@
 {
   "timeout": 10,
   "memory": 128,
-  "languages": ["python", "nodejs"],
+  "languages": ["python", "nodejs", "pypy"],
   "modules": []
 }
diff --git a/benchmarks/100.webapps/110.dynamic-html/pypy/function.py b/benchmarks/100.webapps/110.dynamic-html/pypy/function.py
new file mode 100644
index 000000000..7c990f4eb
--- /dev/null
+++ b/benchmarks/100.webapps/110.dynamic-html/pypy/function.py
@@ -0,0 +1,22 @@
+from datetime import datetime                                                   
+from random import sample  
+from os import path
+from time import time                                                           
+import os
+
+from jinja2 import Template
+
+SCRIPT_DIR = path.abspath(path.join(path.dirname(__file__)))
+
+def handler(event):
+
+    # start timing
+    name = event.get('username')
+    size = event.get('random_len')
+    cur_time = datetime.now()
+    random_numbers = sample(range(0, 1000000), size)
+    template = Template( open(path.join(SCRIPT_DIR, 'templates', 'template.html'), 'r').read())
+    html = template.render(username = name, cur_time = cur_time, random_numbers = random_numbers)
+    # end timing
+    # dump stats 
+    return {'result': html}
diff --git a/benchmarks/100.webapps/110.dynamic-html/pypy/init.sh b/benchmarks/100.webapps/110.dynamic-html/pypy/init.sh
new file mode 100755
index 000000000..7b047bff1
--- /dev/null
+++ b/benchmarks/100.webapps/110.dynamic-html/pypy/init.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+DIR=$1
+VERBOSE=$2
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+path="${SCRIPT_DIR}/templates/"
+if [ "$VERBOSE" = true ]; then
+  echo "Update ${DIR} with static templates ${path}"
+fi
+cp -r ${SCRIPT_DIR}/templates ${DIR}
diff --git a/benchmarks/100.webapps/110.dynamic-html/pypy/requirements.txt b/benchmarks/100.webapps/110.dynamic-html/pypy/requirements.txt
new file mode 100644
index 000000000..5ca569440
--- /dev/null
+++ b/benchmarks/100.webapps/110.dynamic-html/pypy/requirements.txt
@@ -0,0 +1 @@
+jinja2>=2.10.3
diff --git a/benchmarks/100.webapps/110.dynamic-html/pypy/templates/template.html b/benchmarks/100.webapps/110.dynamic-html/pypy/templates/template.html
new file mode 100644
index 000000000..d4a11f019
--- /dev/null
+++ b/benchmarks/100.webapps/110.dynamic-html/pypy/templates/template.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <title>Randomly generated data.</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <link href="http://netdna.bootstrapcdn.com/bootstrap/3.0.0/css/bootstrap.min.css" rel="stylesheet" media="screen">
+    <style type="text/css">
+      .container {
+        max-width: 500px;
+        padding-top: 100px;
+      }
+    </style>
+  </head>
+  <body>
+    <div class="container">
+      <p>Welcome {{username}}!</p>
+      <p>Data generated at: {{cur_time}}!</p>
+      <p>Requested random numbers:</p>
+      <ul>
+        {% for n in random_numbers %}
+        <li>{{n}}</li>
+        {% endfor %}
+      </ul>
+    </div>
+  </body>
+</html>
diff --git a/benchmarks/wrappers/aws/pypy/handler.py b/benchmarks/wrappers/aws/pypy/handler.py
new file mode 100644
index 000000000..907b2c612
--- /dev/null
+++ b/benchmarks/wrappers/aws/pypy/handler.py
@@ -0,0 +1,71 @@
+
+import datetime, io, json, os, sys, uuid
+
+# Add current directory to allow location of packages
+sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages'))
+
+# TODO: usual trigger
+# implement support for S3 and others
+def handler(event, context):
+
+    income_timestamp = datetime.datetime.now().timestamp()
+
+    # HTTP trigger with API Gateaway
+    if 'body' in event:
+        event = json.loads(event['body'])
+    req_id = context.aws_request_id
+    event['request-id'] = req_id
+    event['income-timestamp'] = income_timestamp
+    begin = datetime.datetime.now()
+    from function import function
+    ret = function.handler(event)
+    end = datetime.datetime.now()
+
+    log_data = {
+        'output': ret['result']
+    }
+    if 'measurement' in ret:
+        log_data['measurement'] = ret['measurement']
+    if 'logs' in event:
+        log_data['time'] = (end - begin) / datetime.timedelta(microseconds=1)
+        results_begin = datetime.datetime.now()
+        from function import storage
+        storage_inst = storage.storage.get_instance()
+        b = event.get('logs').get('bucket')
+        storage_inst.upload_stream(b, '{}.json'.format(req_id),
+                io.BytesIO(json.dumps(log_data).encode('utf-8')))
+        results_end = datetime.datetime.now()
+        results_time = (results_end - results_begin) / datetime.timedelta(microseconds=1)
+    else:
+        results_time = 0
+
+    # cold test
+    is_cold = False
+    fname = os.path.join('/tmp', 'cold_run')
+    if not os.path.exists(fname):
+        is_cold = True
+        container_id = str(uuid.uuid4())[0:8]
+        with open(fname, 'a') as f:
+            f.write(container_id)
+    else:
+        with open(fname, 'r') as f:
+            container_id = f.read()
+
+    cold_start_var = ""
+    if "cold_start" in os.environ:
+        cold_start_var = os.environ["cold_start"]
+
+    return {
+        'statusCode': 200,
+        'body': json.dumps({
+            'begin': begin.strftime('%s.%f'),
+            'end': end.strftime('%s.%f'),
+            'results_time': results_time,
+            'is_cold': is_cold,
+            'result': log_data,
+            'request_id': context.aws_request_id,
+            'cold_start_var': cold_start_var,
+            'container_id': container_id,
+        })
+    }
+
diff --git a/benchmarks/wrappers/aws/pypy/nosql.py b/benchmarks/wrappers/aws/pypy/nosql.py
new file mode 100644
index 000000000..72bc2d9da
--- /dev/null
+++ b/benchmarks/wrappers/aws/pypy/nosql.py
@@ -0,0 +1,121 @@
+from decimal import Decimal
+from os import environ
+from typing import List, Optional, Union, Tuple
+
+import boto3
+
+
+class nosql:
+
+    instance: Optional["nosql"] = None
+
+    def __init__(self):
+        self.client = boto3.resource("dynamodb")
+        self._tables = {}
+
+    # Based on: https://github.com/boto/boto3/issues/369#issuecomment-157205696
+    def _remove_decimals(self, data: dict) -> Union[dict, list, int, float]:
+
+        if isinstance(data, list):
+            return [self._remove_decimals(x) for x in data]
+        elif isinstance(data, dict):
+            return {k: self._remove_decimals(v) for k, v in data.items()}
+        elif isinstance(data, Decimal):
+            if data.as_integer_ratio()[1] == 1:
+                return int(data)
+            else:
+                return float(data)
+        else:
+            return data
+
+    def _get_table(self, table_name: str):
+
+        if table_name not in self._tables:
+
+            env_name = f"NOSQL_STORAGE_TABLE_{table_name}"
+
+            if env_name in environ:
+                aws_name = environ[env_name]
+                self._tables[table_name] = self.client.Table(aws_name)
+            else:
+                raise RuntimeError(
+                    f"Couldn't find an environment variable {env_name} for table {table_name}"
+                )
+
+        return self._tables[table_name]
+
+    def insert(
+        self,
+        table_name: str,
+        primary_key: Tuple[str, str],
+        secondary_key: Tuple[str, str],
+        data: dict,
+    ):
+        for key in (primary_key, secondary_key):
+            data[key[0]] = key[1]
+
+        self._get_table(table_name).put_item(Item=data)
+
+    def get(
+        self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str]
+    ) -> dict:
+
+        data = {}
+        for key in (primary_key, secondary_key):
+            data[key[0]] = key[1]
+
+        res = self._get_table(table_name).get_item(Key=data)
+        return self._remove_decimals(res["Item"])
+
+    def update(
+        self,
+        table_name: str,
+        primary_key: Tuple[str, str],
+        secondary_key: Tuple[str, str],
+        updates: dict,
+    ):
+
+        key_data = {}
+        for key in (primary_key, secondary_key):
+            key_data[key[0]] = key[1]
+
+        update_expression = "SET "
+        update_values = {}
+        update_names = {}
+
+        # We use attribute names because DynamoDB reserves some keywords, like 'status'
+        for key, value in updates.items():
+
+            update_expression += f" #{key}_name = :{key}_value, "
+            update_values[f":{key}_value"] = value
+            update_names[f"#{key}_name"] = key
+
+        update_expression = update_expression[:-2]
+
+        self._get_table(table_name).update_item(
+            Key=key_data,
+            UpdateExpression=update_expression,
+            ExpressionAttributeValues=update_values,
+            ExpressionAttributeNames=update_names,
+        )
+
+    def query(self, table_name: str, primary_key: Tuple[str, str], _: str) -> List[dict]:
+
+        res = self._get_table(table_name).query(
+            KeyConditionExpression=f"{primary_key[0]} = :keyvalue",
+            ExpressionAttributeValues={":keyvalue": primary_key[1]},
+        )["Items"]
+        return self._remove_decimals(res)
+
+    def delete(self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str]):
+        data = {}
+        for key in (primary_key, secondary_key):
+            data[key[0]] = key[1]
+
+        self._get_table(table_name).delete_item(Key=data)
+
+    @staticmethod
+    def get_instance():
+        if nosql.instance is None:
+            nosql.instance = nosql()
+        return nosql.instance
diff --git a/benchmarks/wrappers/aws/pypy/setup.py b/benchmarks/wrappers/aws/pypy/setup.py
new file mode 100644
index 000000000..b3d878351
--- /dev/null
+++ b/benchmarks/wrappers/aws/pypy/setup.py
@@ -0,0 +1,15 @@
+from distutils.core import setup
+from glob import glob
+from pkg_resources import parse_requirements
+
+with open('requirements.txt') as f:
+    requirements = [str(r) for r in parse_requirements(f)]
+
+setup(
+    name='function',
+    install_requires=requirements,
+    packages=['function'],
+    package_dir={'function': '.'},
+    package_data={'function': glob('**', recursive=True)},
+)
+
diff --git a/benchmarks/wrappers/aws/pypy/storage.py b/benchmarks/wrappers/aws/pypy/storage.py
new file mode 100644
index 000000000..4be0025e8
--- /dev/null
+++ b/benchmarks/wrappers/aws/pypy/storage.py
@@ -0,0 +1,53 @@
+import io
+import os
+import uuid
+
+import boto3
+
+
+class storage:
+    instance = None
+    client = None
+
+    def __init__(self):
+        self.client = boto3.client('s3')
+
+    @staticmethod
+    def unique_name(name):
+        name, extension = os.path.splitext(name)
+        return '{name}.{random}{extension}'.format(
+                    name=name,
+                    extension=extension,
+                    random=str(uuid.uuid4()).split('-')[0]
+                )
+
+    def upload(self, bucket, file, filepath):
+        key_name = storage.unique_name(file)
+        self.client.upload_file(filepath, bucket, key_name)
+        return key_name
+
+    def download(self, bucket, file, filepath):
+        self.client.download_file(bucket, file, filepath)
+
+    def download_directory(self, bucket, prefix, path):
+        objects = self.client.list_objects_v2(Bucket=bucket, Prefix=prefix)
+        for obj in objects['Contents']:
+            file_name = obj['Key']
+            path_to_file = os.path.dirname(file_name)
+            os.makedirs(os.path.join(path, path_to_file), exist_ok=True)
+            self.download(bucket, file_name, os.path.join(path, file_name))
+
+    def upload_stream(self, bucket, file, data):
+        key_name = storage.unique_name(file)
+        self.client.upload_fileobj(data, bucket, key_name)
+        return key_name
+
+    def download_stream(self, bucket, file):
+        data = io.BytesIO()
+        self.client.download_fileobj(bucket, file, data)
+        return data.getbuffer()
+    
+    def get_instance():
+        if storage.instance is None:
+            storage.instance = storage()
+        return storage.instance
diff --git a/benchmarks/wrappers/local/pypy/nosql.py b/benchmarks/wrappers/local/pypy/nosql.py
new file mode 100644
index 000000000..0e816954c
--- /dev/null
+++ b/benchmarks/wrappers/local/pypy/nosql.py
@@ -0,0 +1,131 @@
+from decimal import Decimal
+from os import environ
+from typing import List, Optional, Union, Tuple
+
+import boto3
+
+
+class nosql:
+
+    instance: Optional["nosql"] = None
+
+    def __init__(self):
+
+        if environ["NOSQL_STORAGE_TYPE"] != "scylladb":
+            raise RuntimeError(f"Unsupported NoSQL storage type: {environ['NOSQL_STORAGE_TYPE']}!")
+
+        self.client = boto3.resource(
+            "dynamodb",
+            region_name="None",
+            aws_access_key_id="None",
+            aws_secret_access_key="None",
+            endpoint_url=f"http://{environ['NOSQL_STORAGE_ENDPOINT']}",
+        )
+        self._tables = {}
+
+    # Based on: https://github.com/boto/boto3/issues/369#issuecomment-157205696
+    def _remove_decimals(self, data: dict) -> Union[dict, list, int, float]:
+
+        if isinstance(data, list):
+            return [self._remove_decimals(x) for x in data]
+        elif isinstance(data, dict):
+            return {k: self._remove_decimals(v) for k, v in data.items()}
+        elif isinstance(data, Decimal):
+            if data.as_integer_ratio()[1] == 1:
+                return int(data)
+            else:
+                return float(data)
+        else:
+            return data
+
+    def _get_table(self, table_name: str):
+
+        if table_name not in self._tables:
+
+            env_name = f"NOSQL_STORAGE_TABLE_{table_name}"
+
+            if env_name in environ:
+                aws_name = environ[env_name]
+                self._tables[table_name] = self.client.Table(aws_name)
+            else:
+                raise RuntimeError(
+                    f"Couldn't find an environment variable {env_name} for table {table_name}"
+                )
+
+        return self._tables[table_name]
+
+    def insert(
+        self,
+        table_name: str,
+        primary_key: Tuple[str, str],
+        secondary_key: Tuple[str, str],
+        data: dict,
+    ):
+        for key in (primary_key, secondary_key):
+            data[key[0]] = key[1]
+
+        self._get_table(table_name).put_item(Item=data)
+
+    def get(
+        self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str]
+    ) -> dict:
+
+        data = {}
+        for key in (primary_key, secondary_key):
+            data[key[0]] = key[1]
+
+        res = self._get_table(table_name).get_item(Key=data)
+        return self._remove_decimals(res["Item"])
+
+    def update(
+        self,
+        table_name: str,
+        primary_key: Tuple[str, str],
+        secondary_key: Tuple[str, str],
+        updates: dict,
+    ):
+
+        key_data = {}
+        for key in (primary_key, secondary_key):
+            key_data[key[0]] = key[1]
+
+        update_expression = "SET "
+        update_values = {}
+        update_names = {}
+
+        # We use attribute names because DynamoDB reserves some keywords, like 'status'
+        for key, value in updates.items():
+
+            update_expression += f" #{key}_name = :{key}_value, "
+            update_values[f":{key}_value"] = value
+            update_names[f"#{key}_name"] = key
+
+        update_expression = update_expression[:-2]
+
+        self._get_table(table_name).update_item(
+            Key=key_data,
+            UpdateExpression=update_expression,
+            ExpressionAttributeValues=update_values,
+            ExpressionAttributeNames=update_names,
+        )
+
+    def query(self, table_name: str, primary_key: Tuple[str, str], _: str) -> List[dict]:
+
+        res = self._get_table(table_name).query(
+            KeyConditionExpression=f"{primary_key[0]} = :keyvalue",
+            ExpressionAttributeValues={":keyvalue": primary_key[1]},
+        )["Items"]
+        return self._remove_decimals(res)
+
+    def delete(self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str]):
+        data = {}
+        for key in (primary_key, secondary_key):
+            data[key[0]] = key[1]
+
+        self._get_table(table_name).delete_item(Key=data)
+
+    @staticmethod
+    def get_instance():
+        if nosql.instance is None:
+            nosql.instance = nosql()
+        return nosql.instance
diff --git a/benchmarks/wrappers/local/pypy/storage.py b/benchmarks/wrappers/local/pypy/storage.py
new file mode 100644
index 000000000..b44968408
--- /dev/null
+++ b/benchmarks/wrappers/local/pypy/storage.py
@@ -0,0 +1,58 @@
+import io
+import os
+import uuid
+
+import minio
+
+class storage:
+    instance = None
+    client = None
+
+    def __init__(self):
+        if 'MINIO_ADDRESS' in os.environ:
+            address = os.environ['MINIO_ADDRESS']
+            access_key = os.environ['MINIO_ACCESS_KEY']
+            secret_key = os.environ['MINIO_SECRET_KEY']
+            self.client = minio.Minio(
+                    address,
+                    access_key=access_key,
+                    secret_key=secret_key,
+                    secure=False)
+
+    @staticmethod
+    def unique_name(name):
+        name, extension = os.path.splitext(name)
+        return '{name}.{random}{extension}'.format(
+                    name=name,
+                    extension=extension,
+                    random=str(uuid.uuid4()).split('-')[0]
+                )
+
+    def upload(self, bucket, file, filepath):
+        key_name = storage.unique_name(file)
+        self.client.fput_object(bucket, key_name, filepath)
+        return key_name
+
+    def download(self, bucket, file, filepath):
+        self.client.fget_object(bucket, file, filepath)
+
+    def download_directory(self, bucket, prefix, path):
+        objects = self.client.list_objects_v2(bucket, prefix, recursive=True)
+        for obj in objects:
+            file_name = obj.object_name
+            self.download(bucket, file_name, os.path.join(path, file_name))
+
+    def upload_stream(self, bucket, file, bytes_data):
+        key_name = storage.unique_name(file)
+        self.client.put_object(bucket, key_name, bytes_data, bytes_data.getbuffer().nbytes)
+        return key_name
+
+    def download_stream(self, bucket, file):
+        data = self.client.get_object(bucket, file)
+        return data.read()
+
+    def get_instance():
+        if storage.instance is None:
+            storage.instance = storage()
+        return storage.instance
+
diff --git a/config/systems.json b/config/systems.json
index 5a38b4965..0f37c5ef0 100644
--- a/config/systems.json
+++ b/config/systems.json
@@ -64,6 +64,33 @@
           ],
           "packages": []
         }
+      },
+      "pypy": {
+        "base_images": {
+          "x64": {
+            "3.11": "pypy:latest"
+          },
+          "arm64": {
+            "3.11": "pypy:latest"
+          }
+        },
+        "images": [
+          "run",
+          "build"
+        ],
+        "username": "docker_user",
+        "deployment": {
+          "files": [
+            "storage.py",
+            "nosql.py"
+          ],
+          "packages": [],
+          "module_packages": {
+            "nosql": [
+              "boto3==1.28.3"
+            ]
+          }
+        }
       }
     },
     "architecture": ["x64"],
@@ -121,6 +148,29 @@
             "uuid": "3.4.0"
           }
         }
+      },
+      "pypy": {
+        "base_images": {
+          "x64": {
+            "3.11": "amazon/aws-lambda-provided:al2-x86_64"
+          },
+          "arm64": {
+            "3.11": "amazon/aws-lambda-provided:al2-arm64"
+          }
+        },
+        "images": [
+          "build"
+        ],
+        "deployment": {
+          "files": [
+            "handler.py",
+            "storage.py",
+            "nosql.py",
+            "setup.py"
+          ],
+          "packages": [],
+          "module_packages": {}
+        }
       }
     },
     "architecture": ["x64", "arm64"],
diff --git a/dockerfiles/aws/pypy/Dockerfile.function b/dockerfiles/aws/pypy/Dockerfile.function
new file mode 100644
index 000000000..e2bf9f80b
--- /dev/null
+++ b/dockerfiles/aws/pypy/Dockerfile.function
@@ -0,0 +1,52 @@
+ARG BASE_IMAGE
+FROM $BASE_IMAGE
+ARG VERSION
+ENV PYTHON_VERSION=${VERSION}
+ARG TARGET_ARCHITECTURE
+
+#SHELL ["/bin/bash", "-c"]
+
+COPY . function/
+
+RUN yum install -y tar bzip2 gzip
+
+RUN curl -L -o pypy.tar.bz2 https://downloads.python.org/pypy/pypy3.11-v7.3.20-linux64.tar.bz2 \
+    && tar -xjf pypy.tar.bz2 \
+    && mv pypy3.11-v7.3.20-linux64 /opt/pypy \
+    && rm pypy.tar.bz2
+RUN chmod -R +x /opt/pypy/bin
+ENV PATH=/opt/pypy/bin:$PATH
+RUN python -m ensurepip \
+    && python -mpip install -U pip wheel
+
+ENV PLATFORM_ARG=""
+
+RUN touch function/__init__.py
+    # Probably remove this conditional, might break pypy builds, might lead to installation of CPython libraries
+RUN if [ "${TARGET_ARCHITECTURE}" = "arm64" ]; then \
+      export PLATFORM_ARG="--platform manylinux_2_17_aarch64 --only-binary=:all:"; \
+    fi
+
+RUN ls -la function/
+RUN if [ "${TARGET_ARCHITECTURE}" = "arm64" ] && test -f "function/requirements.txt.arm.${PYTHON_VERSION}"; then \
+        pip install --no-cache-dir ${PLATFORM_ARG} --target . \
+            -r function/requirements.txt \
+            -r function/requirements.txt.arm.${PYTHON_VERSION} \
+            function/ && \
+        pip cache purge; \
+    elif test -f "function/requirements.txt.${PYTHON_VERSION}"; then \
+        pip install --no-cache-dir ${PLATFORM_ARG} --target . \
+            -r function/requirements.txt \
+            -r function/requirements.txt.${PYTHON_VERSION} \
+            function/ && \
+        pip cache purge; \
+    else \
+        pip install --no-cache-dir ${PLATFORM_ARG} --target . \
+            -r function/requirements.txt \
+            function/ && \
+        pip cache purge; \
+    fi
+
+RUN pip install awslambdaric
+ENTRYPOINT ["/opt/pypy/bin/python", "-m", "awslambdaric"]
+CMD ["function/handler.handler"]
diff --git a/dockerfiles/local/pypy/Dockerfile.build b/dockerfiles/local/pypy/Dockerfile.build
new file mode 100755
index 000000000..6edb0bbac
--- /dev/null
+++ b/dockerfiles/local/pypy/Dockerfile.build
@@ -0,0 +1,18 @@
+ARG BASE_IMAGE
+FROM ${BASE_IMAGE}
+ARG VERSION
+ENV PYTHON_VERSION=${VERSION}
+
+RUN apt-get update\
+  && apt-get install -y --no-install-recommends gcc build-essential python3-dev libxml2 libxml2-dev zlib1g-dev gosu\
+  && apt-get purge -y --auto-remove
+
+RUN mkdir -p /sebs/
+COPY dockerfiles/pypy_installer.sh /sebs/installer.sh
+COPY dockerfiles/entrypoint.sh /sebs/entrypoint.sh
+RUN chmod +x /sebs/entrypoint.sh
+
+# useradd and groupmod is installed in /usr/sbin which is not in PATH
+ENV SCRIPT_FILE=/mnt/function/package.sh
+CMD /bin/bash /sebs/installer.sh
+ENTRYPOINT ["/sebs/entrypoint.sh"]
diff --git a/dockerfiles/local/pypy/Dockerfile.run b/dockerfiles/local/pypy/Dockerfile.run
new file mode 100755
index 000000000..25fa2ebdb
--- /dev/null
+++ b/dockerfiles/local/pypy/Dockerfile.run
@@ -0,0 +1,25 @@
+ARG BASE_IMAGE
+FROM ${BASE_IMAGE}
+
+RUN deps=''\
+  && apt-get update\
+  # for route and sudo
+  && apt-get install --no-install-recommends -y curl gosu net-tools sudo ${deps}\
+  && apt-get purge -y --auto-remove ${deps}\
+  && pip3 install cffi minio bottle
+
+RUN mkdir -p /sebs
+COPY dockerfiles/local/run.sh /sebs/
+COPY dockerfiles/local/*.py /sebs/
+COPY dockerfiles/local/pypy/*.py /sebs/
+COPY dockerfiles/local/pypy/run_server.sh /sebs/
+COPY dockerfiles/local/pypy/timeit.sh /sebs/
+COPY dockerfiles/local/pypy/runners.json /sebs/
+ADD third-party/pypapi/pypapi /sebs/pypapi
+ENV PYTHONPATH=/sebs/.python_packages/lib/site-packages:$PYTHONPATH
+
+COPY dockerfiles/local/entrypoint.sh /sebs/entrypoint.sh
+RUN chmod +x /sebs/entrypoint.sh
+RUN chmod +x /sebs/run.sh
+
+ENTRYPOINT ["/sebs/entrypoint.sh"]
diff --git a/dockerfiles/local/pypy/analyzer-runner.py b/dockerfiles/local/pypy/analyzer-runner.py
new file mode 100644
index 000000000..624459795
--- /dev/null
+++ b/dockerfiles/local/pypy/analyzer-runner.py
@@ -0,0 +1,64 @@
+
+import datetime, json, sys, subprocess, os
+ip_address = os.environ['DOCKER_HOST_IP']
+cfg = json.load(open(sys.argv[1], 'r'))
+ret = subprocess.run(['curl', '-X', 'POST',
+    '{}:{}/start'.format(ip_address, cfg['benchmark']['analyzer']['analyzer_port']),
+    '-d',
+    '{{"uuid": "{}" }}'.format(sys.argv[2])],
+    stdout=subprocess.PIPE,
+    stderr=subprocess.PIPE)
+if ret.returncode != 0:
+    import sys
+    print('Analyzer initialization failed!')
+    print(ret.stderr.decode('utf-8'))
+    sys.exit(100)
+
+
+from utils import *
+from tools import *
+# imported function
+from function import function
+
+repetitions = cfg['benchmark']['repetitions']
+disable_gc = cfg['benchmark']['disable_gc']
+input_data = cfg['input']
+
+timedata = [0] * repetitions
+try:
+    start = start_benchmarking(disable_gc)
+    for i in range(0, repetitions):
+        begin = datetime.datetime.now()
+        res = function.handler(input_data)
+        stop = datetime.datetime.now()
+        print(res, file = open(
+                get_result_prefix(LOGS_DIR, 'output', 'txt'),
+                'w'
+            ))
+        timedata[i] = [begin, stop]
+    end = stop_benchmarking()
+
+    ret = subprocess.run(
+            [
+                'curl', '-X', 'POST',
+                '{}:{}/stop'.format(ip_address, cfg['benchmark']['analyzer']['analyzer_port']),
+                '-d',
+                '{{"uuid": "{}" }}'.format(sys.argv[2])
+            ],
+        stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    if ret.returncode != 0:
+        import sys
+        print('Analyzer deinitialization failed!')
+        print(ret.stderr.decode('utf-8'))
+        sys.exit(101)
+    experiment_data = {}
+    experiment_data['repetitions'] = repetitions
+    experiment_data['timestamps'] = process_timestamps(timedata)
+    experiment_data['start'] = str(start)
+    experiment_data['end'] = str(end)
+    print(json.dumps({'experiment': experiment_data, 'runtime': get_config()}, indent=2))
+except Exception as e:
+    print('Exception caught!')
+    print(e)
+    sys.exit(102)
+sys.exit(0)
diff --git a/dockerfiles/local/pypy/config.py b/dockerfiles/local/pypy/config.py
new file mode 100644
index 000000000..e7115cc73
--- /dev/null
+++ b/dockerfiles/local/pypy/config.py
@@ -0,0 +1,5 @@
+import json
+
+from tools import *
+
+print(json.dumps(get_config()))
diff --git a/dockerfiles/local/pypy/papi-runner.py b/dockerfiles/local/pypy/papi-runner.py
new file mode 100644
index 000000000..0c82d476d
--- /dev/null
+++ b/dockerfiles/local/pypy/papi-runner.py
@@ -0,0 +1,104 @@
+
+import datetime, json, sys, traceback, csv
+
+from utils import *
+from tools import *
+
+# imported function
+from function import function
+
+import pypapi.exceptions
+
+class papi_benchmarker:
+    from pypapi import papi_low as papi
+    from pypapi import events as papi_events
+
+    def __init__(self, papi_cfg):
+        self.events = []
+        self.events_names = []
+        self.count = 0
+
+        self.papi.library_init()
+        self.events = self.papi.create_eventset()
+        for event in papi_cfg['events']:
+            try:
+                self.papi.add_event(self.events, getattr(self.papi_events, event))
+            except pypapi.exceptions.PapiInvalidValueError as err:
+                print('Adding event {event} failed!'.format(event=event))
+                sys.exit(100)
+
+        self.events_names = papi_cfg['events']
+        self.count = len(papi_cfg['events'])
+        self.results = []
+
+        self.ins_granularity = papi_cfg['overflow_instruction_granularity']
+        self.buffer_size = papi_cfg['overflow_buffer_size']
+        self.start_time = datetime.datetime.now()
+        
+        self.papi.overflow_sampling(self.events, self.papi_events.PAPI_TOT_INS,
+                int(self.ins_granularity), int(self.buffer_size))
+
+    def start_overflow(self):
+        self.papi.start(self.events)
+
+    def stop_overflow(self):
+        self.papi.stop(self.events)
+
+    def get_results(self):
+        data = self.papi.overflow_sampling_results(self.events)
+        for vals in data:
+            for i in range(0, len(vals), self.count + 1):
+                chunks = vals[i:i+self.count+1]
+                measurement_time = datetime.datetime.fromtimestamp(chunks[0]/1e6)
+                time = (measurement_time - self.start_time) / datetime.timedelta(microseconds = 1)
+                self.results.append([measurement_time.strftime("%s.%f"), time] + list(chunks[1:]))
+
+    def finish(self):
+        self.papi.cleanup_eventset(self.events)
+        self.papi.destroy_eventset(self.events)
+
+
+cfg = json.load(open(sys.argv[1], 'r'))
+repetitions = cfg['benchmark']['repetitions']
+disable_gc = cfg['benchmark']['disable_gc']
+input_data = cfg['input']
+papi_experiments = papi_benchmarker(cfg['benchmark']['papi'])
+
+timedata = [0] * repetitions
+try:
+    start = start_benchmarking(disable_gc)
+    for i in range(0, repetitions):
+        begin = datetime.datetime.now()
+        papi_experiments.start_overflow()
+        res = function.handler(input_data)
+        papi_experiments.stop_overflow()
+        stop = datetime.datetime.now()
+        print(res, file = open(
+                get_result_prefix(LOGS_DIR, 'output', 'txt'),
+                'w'
+            ))
+        timedata[i] = [begin, stop]
+    end = stop_benchmarking()
+except Exception as e:
+    print('Exception caught!')
+    print(e)
+    traceback.print_exc()
+
+
+papi_experiments.get_results()
+papi_experiments.finish()
+result = get_result_prefix(RESULTS_DIR, cfg['benchmark']['name'], 'csv')
+with open(result, 'w') as f:
+    csv_writer = csv.writer(f)
+    csv_writer.writerow(
+            ['Time','RelativeTime'] + papi_experiments.events_names
+        )
+    for val in papi_experiments.results:
+        csv_writer.writerow(val)
+
+experiment_data = {}
+experiment_data['repetitions'] = repetitions
+experiment_data['timestamps'] = process_timestamps(timedata)
+experiment_data['start'] = str(start)
+experiment_data['end'] = str(end)
+print(json.dumps({'experiment': experiment_data, 'runtime': get_config()}, indent=2))
diff --git a/dockerfiles/local/pypy/run_server.sh b/dockerfiles/local/pypy/run_server.sh
new file mode 100755
index 000000000..ab09238e3
--- /dev/null
+++ b/dockerfiles/local/pypy/run_server.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+pypy /sebs/server.py "$@"
diff --git a/dockerfiles/local/pypy/runners.json b/dockerfiles/local/pypy/runners.json
new file mode 100644
index 000000000..3ef17d1f4
--- /dev/null
+++ b/dockerfiles/local/pypy/runners.json
@@ -0,0 +1,7 @@
+{
+  "papi": "papi-runner.py",
+  "time" : {"warm" : "time-in-proc.py", "cold" : "time-out-proc.py"},
+  "memory": "analyzer-runner.py",
+  "disk-io": "analyzer-runner.py",
+  "config": ["pypy", "config.py"]
+}
diff --git a/dockerfiles/local/pypy/server.py b/dockerfiles/local/pypy/server.py
new file mode 100644
index 000000000..4ed1314f2
--- /dev/null
+++ b/dockerfiles/local/pypy/server.py
@@ -0,0 +1,38 @@
+import datetime
+import os
+import sys
+import uuid
+
+import bottle
+from bottle import route, run, template, request
+
+CODE_LOCATION='/function'
+
+@route('/alive', method='GET')
+def alive():
+    return {
+        "result:" "ok"
+    }
+
+@route('/', method='POST')
+def process_request():
+    begin = datetime.datetime.now()
+    from function import function
+    end = datetime.datetime.now()
+    # FIXME: measurements?
+    ret = function.handler(request.json)
+
+    return {
+        'begin': begin.strftime('%s.%f'),
+        'end': end.strftime('%s.%f'),
+        "request_id": str(uuid.uuid4()),
+        "is_cold": False,
+        "result": {
+            "output": ret
+        }
+    }
+
+sys.path.append(os.path.join(CODE_LOCATION))
+sys.path.append(os.path.join(CODE_LOCATION, '.python_packages/lib/site-packages/'))
+run(host='0.0.0.0', port=int(sys.argv[1]), debug=True)
+
diff --git a/dockerfiles/local/pypy/time-in-proc.py b/dockerfiles/local/pypy/time-in-proc.py
new file mode 100644
index 000000000..962da527a
--- /dev/null
+++ b/dockerfiles/local/pypy/time-in-proc.py
@@ -0,0 +1,59 @@
+
+import datetime, json, sys, traceback, csv, resource
+
+from utils import *
+from tools import *
+
+# imported function
+from function import function
+
+
+cfg = json.load(open(sys.argv[1], 'r'))
+repetitions = cfg['benchmark']['repetitions']
+disable_gc = cfg['benchmark']['disable_gc']
+input_data = cfg['input']
+
+timedata = [0] * repetitions
+os_times = [0] * repetitions
+try:
+    start = start_benchmarking(disable_gc)
+    for i in range(0, repetitions):
+        begin = datetime.datetime.now()
+        begin_times = resource.getrusage(resource.RUSAGE_SELF)
+        res = function.handler(input_data)
+        end_times = resource.getrusage(resource.RUSAGE_SELF)
+        stop = datetime.datetime.now()
+        print(res, file = open(
+                get_result_prefix(LOGS_DIR, 'output', 'txt'),
+                'w'
+            ))
+        timedata[i] = [begin, stop]
+        os_times[i] = [begin_times, end_times]
+    end = stop_benchmarking()
+except Exception as e:
+    print('Exception caught!')
+    print(e)
+    traceback.print_exc()
+
+
+result = get_result_prefix(RESULTS_DIR, cfg['benchmark']['name'], 'csv')
+with open(result, 'w') as f:
+    csv_writer = csv.writer(f)
+    csv_writer.writerow(['#Seconds from epoch.microseconds; CPU times are in microseconds'])
+    csv_writer.writerow(['Begin','End','Duration','User','Sys'])
+    for i in range(0, len(timedata)):
+        csv_writer.writerow([
+                timedata[i][0].strftime('%s.%f'),
+                timedata[i][1].strftime('%s.%f'),
+                (timedata[i][1] - timedata[i][0]) /
+                    datetime.timedelta(microseconds=1),
+                (os_times[i][1].ru_utime - os_times[i][0].ru_utime) * 1e6,
+                (os_times[i][1].ru_stime - os_times[i][0].ru_stime) * 1e6
+            ])
+
+experiment_data = {}
+experiment_data['repetitions'] = repetitions
+experiment_data['timestamps'] = process_timestamps(timedata)
+experiment_data['start'] = str(start)
+experiment_data['end'] = str(end)
+print(json.dumps({'experiment': experiment_data, 'runtime': get_config()}, indent=2))
diff --git a/dockerfiles/local/pypy/timeit.sh b/dockerfiles/local/pypy/timeit.sh
new file mode 100755
index 000000000..73e6e5eaf
--- /dev/null
+++ b/dockerfiles/local/pypy/timeit.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+#ts=$(date +%s%N);
+export TIMEFORMAT='%3R,%3U,%3S'
+time pypy -c "from json import load; from function import function; print(function.handler(load(open('input.json', 'r'))))" > $1
+#tt=$((($(date +%s%N) - $ts)/1000)) ; echo $tt
diff --git a/dockerfiles/local/pypy/tools.py b/dockerfiles/local/pypy/tools.py
new file mode 100644
index 000000000..0413489e3
--- /dev/null
+++ b/dockerfiles/local/pypy/tools.py
@@ -0,0 +1,21 @@
+
+import datetime, gc, platform, os, sys
+
+def start_benchmarking(disable_gc):
+    if disable_gc:
+        gc.disable()
+    return datetime.datetime.now()
+
+def stop_benchmarking():
+    end = datetime.datetime.now()
+    gc.enable()
+    return end
+
+def get_config():
+    # get currently loaded modules
+    # https://stackoverflow.com/questions/4858100/how-to-list-imported-modules
+    modulenames = set(sys.modules) & set(globals())
+    allmodules = [sys.modules[name] for name in modulenames]
+    return {'name': 'pypy',
+            'version': platform.python_version(),
+            'modules': str(allmodules)}
diff --git a/dockerfiles/local/runner.py b/dockerfiles/local/runner.py
index 96261fc33..b1d0ca423 100644
--- a/dockerfiles/local/runner.py
+++ b/dockerfiles/local/runner.py
@@ -4,7 +4,7 @@
 from utils import *
 
 def get_language(lang):
-    languages = {'python': 'python3', 'nodejs': 'nodejs'}
+    languages = {'python': 'python3', 'nodejs': 'nodejs', 'pypy': 'pypy'}
     return languages[lang]
 
 def get_runner(experiment, options=None):
diff --git a/dockerfiles/pypy_installer.sh b/dockerfiles/pypy_installer.sh
new file mode 100644
index 000000000..303087d6a
--- /dev/null
+++ b/dockerfiles/pypy_installer.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+cd /mnt/function
+
+#TODO: If the base image OS is not centOS based, change to apt
+yum install -y tar bzip2 gzip
+
+#TODO: make version configurable
+curl -L -o pypy.tar.bz2 https://downloads.python.org/pypy/pypy3.11-v7.3.20-linux64.tar.bz2
+tar -xjf pypy.tar.bz2 
+mv pypy3.11-v7.3.20-linux64 /opt/pypy 
+rm pypy.tar.bz2
+chmod -R +x /opt/pypy/bin
+export PATH=/opt/pypy/bin:$PATH
+python -m ensurepip
+python -mpip install -U pip wheel
+
+#Probably remove this conditional, might break pypy builds, might lead to installation of CPython libraries
+PLATFORM_ARG=""
+if [[ "${TARGET_ARCHITECTURE}" == "arm64" ]]; then
+  PLATFORM_ARG="--platform manylinux_2_17_aarch64 --only-binary=:all:"
+fi
+
+if [[ "${TARGET_ARCHITECTURE}" == "arm64" ]] && [[ -f "requirements.txt.arm.${PYTHON_VERSION}" ]]; then
+
+  pip3 -q install ${PLATFORM_ARG} -r requirements.txt.arm.${PYTHON_VERSION} -t .python_packages/lib/site-packages
+
+elif [[ -f "requirements.txt.${PYTHON_VERSION}" ]]; then
+
+  pip3 -q install ${PLATFORM_ARG} -r requirements.txt.${PYTHON_VERSION} -t .python_packages/lib/site-packages
+
+else
+
+  pip3 -q install ${PLATFORM_ARG} -r requirements.txt -t .python_packages/lib/site-packages
+
+fi
+
+if [[ -f "${SCRIPT_FILE}" ]]; then
+  /bin/bash ${SCRIPT_FILE} .python_packages/lib/site-packages
+fi
+
+
diff --git a/sebs.py b/sebs.py
index 80fb11ed3..2881f40c6 100755
--- a/sebs.py
+++ b/sebs.py
@@ -64,7 +64,7 @@ def simplified_common_params(func):
     @click.option(
         "--language",
         default=None,
-        type=click.Choice(["python", "nodejs"]),
+        type=click.Choice(["python", "nodejs", "pypy"]),
         help="Benchmark language",
     )
     @click.option("--language-version", default=None, type=str, help="Benchmark language version")
diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py
index 243a6f0f9..75de79421 100644
--- a/sebs/aws/aws.py
+++ b/sebs/aws/aws.py
@@ -136,6 +136,7 @@ def package_code(
 
         CONFIG_FILES = {
             "python": ["handler.py", "requirements.txt", ".python_packages"],
+            "pypy": ["handler.py", "requirements.txt", ".python_packages"],
             "nodejs": ["handler.js", "package.json", "node_modules"],
         }
         package_config = CONFIG_FILES[language_name]
@@ -172,8 +173,13 @@ def _map_language_runtime(self, language: str, runtime: str):
 
         # AWS uses different naming scheme for Node.js versions
         # For example, it's 12.x instead of 12.
+        # We use a OS-only runtime for PyPy
         if language == "nodejs":
-            return f"{runtime}.x"
+            return f"{language}{runtime}.x"
+        elif language == "python":
+            return f"{language}{runtime}"
+        elif language == "pypy":
+            return "provided.al2"
         return runtime
 
     def create_function(
@@ -251,9 +257,7 @@ def create_function(
                         "S3Key": code_prefix,
                     }
 
-                create_function_params["Runtime"] = "{}{}".format(
-                    language, self._map_language_runtime(language, language_runtime)
-                )
+                create_function_params["Runtime"] = self._map_language_runtime(language, language_runtime)
                 create_function_params["Handler"] = "handler.handler"
 
             create_function_params = {
@@ -401,15 +405,26 @@ def update_function_configuration(
         self.wait_function_updated(function)
         self.logging.info(f"Updated configuration of {function.name} function. ")
 
+    def get_real_language_name(self, language_name: str) -> str:
+        LANGUAGE_NAMES = {
+            "python": "python",
+            "pypy": "python",
+            "nodejs": "nodejs",
+        }
+        return LANGUAGE_NAMES.get(language_name)
+
     # @staticmethod
     def default_function_name(
         self, code_package: Benchmark, resources: Optional[Resources] = None
     ) -> str:
         # Create function name
         resource_id = resources.resources_id if resources else self.config.resources.resources_id
+        
         func_name = "sebs-{}-{}-{}-{}-{}".format(
             resource_id,
             code_package.benchmark,
+            # see which works
+            #self.get_real_language_name(code_package.language_name),
             code_package.language_name,
             code_package.language_version,
             code_package.architecture,
diff --git a/sebs/benchmark.py b/sebs/benchmark.py
index f159e820c..39b919ef7 100644
--- a/sebs/benchmark.py
+++ b/sebs/benchmark.py
@@ -252,8 +252,9 @@ def hash_directory(directory: str, deployment: str, language: str):
         FILES = {
             "python": ["*.py", "requirements.txt*"],
             "nodejs": ["*.js", "package.json"],
+            "pypy": ["*.py", "requirements.txt*"],
         }
-        WRAPPERS = {"python": "*.py", "nodejs": "*.js"}
+        WRAPPERS = {"python": "*.py", "nodejs": "*.js", "pypy": "*.py"}
         NON_LANG_FILES = ["*.sh", "*.json"]
         selected_files = FILES[language] + NON_LANG_FILES
         for file_type in selected_files:
@@ -316,6 +317,7 @@ def copy_code(self, output_dir):
         FILES = {
             "python": ["*.py", "requirements.txt*"],
             "nodejs": ["*.js", "package.json"],
+            "pypy": ["*.py", "requirements.txt*"],
         }
         path = os.path.join(self.benchmark_path, self.language_name)
         for file_type in FILES[self.language_name]:
@@ -402,7 +404,7 @@ def add_deployment_package_nodejs(self, output_dir):
     def add_deployment_package(self, output_dir):
         from sebs.faas.function import Language
 
-        if self.language == Language.PYTHON:
+        if self.language == Language.PYTHON or self.language == Language.PYPY:
             self.add_deployment_package_python(output_dir)
         elif self.language == Language.NODEJS:
             self.add_deployment_package_nodejs(output_dir)
@@ -483,7 +485,7 @@ def ensure_image(name: str) -> None:
                     }
 
             # run Docker container to install packages
-            PACKAGE_FILES = {"python": "requirements.txt", "nodejs": "package.json"}
+            PACKAGE_FILES = {"python": "requirements.txt", "nodejs": "package.json", "pypy": "requirements.txt"}
             file = os.path.join(output_dir, PACKAGE_FILES[self.language_name])
             if os.path.exists(file):
                 try:
diff --git a/sebs/faas/function.py b/sebs/faas/function.py
index 0fab7bcf4..ddb5117d0 100644
--- a/sebs/faas/function.py
+++ b/sebs/faas/function.py
@@ -263,6 +263,7 @@ def deserialize(cached_config: dict) -> "Trigger":
 class Language(Enum):
     PYTHON = "python"
     NODEJS = "nodejs"
+    PYPY = "pypy"
 
     # FIXME: 3.7+ python with future annotations
     @staticmethod
@@ -270,7 +271,7 @@ def deserialize(val: str) -> Language:
         for member in Language:
             if member.value == val:
                 return member
-        raise Exception(f"Unknown language type {member}")
+        raise Exception(f"Unknown language type {val}")
 
 
 class Architecture(Enum):
@@ -299,7 +300,7 @@ def serialize(self) -> dict:
 
     @staticmethod
     def deserialize(config: dict) -> Runtime:
-        languages = {"python": Language.PYTHON, "nodejs": Language.NODEJS}
+        languages = {"python": Language.PYTHON, "nodejs": Language.NODEJS, "pypy": Language.PYPY}
         return Runtime(language=languages[config["language"]], version=config["version"])
 
 
diff --git a/sebs/local/local.py b/sebs/local/local.py
index 32b9f9ffb..1f40016aa 100644
--- a/sebs/local/local.py
+++ b/sebs/local/local.py
@@ -124,6 +124,7 @@ def package_code(
         CONFIG_FILES = {
             "python": ["handler.py", "requirements.txt", ".python_packages"],
             "nodejs": ["handler.js", "package.json", "node_modules"],
+            "pypy": ["handler.py", "requirements.txt", ".python_packages"],
         }
         package_config = CONFIG_FILES[language_name]
         function_dir = os.path.join(directory, "function")
diff --git a/tools/build_docker_images.py b/tools/build_docker_images.py
index 5336fb485..6d6911aee 100755
--- a/tools/build_docker_images.py
+++ b/tools/build_docker_images.py
@@ -13,7 +13,7 @@
     "--deployment", default=None, choices=["local", "aws", "azure", "gcp"], action="store"
 )
 parser.add_argument("--type", default=None, choices=["build", "run", "manage"], action="store")
-parser.add_argument("--language", default=None, choices=["python", "nodejs"], action="store")
+parser.add_argument("--language", default=None, choices=["python", "nodejs", "pypy"], action="store")
 parser.add_argument("--language-version", default=None, type=str, action="store")
 args = parser.parse_args()
 config = json.load(open(os.path.join(PROJECT_DIR, "config", "systems.json"), "r"))

From c5bf19dc40e710b7ad93606b1912e239fcbc25d8 Mon Sep 17 00:00:00 2001
From: toooadi <toooadi0@gmail.com>
Date: Mon, 1 Dec 2025 23:25:59 +0100
Subject: [PATCH 02/31] Upgrade to  Amazon Linux 2023

---
 config/systems.json                      |  4 ++--
 dockerfiles/aws/pypy/Dockerfile.function | 15 ++++++++++-----
 sebs/aws/aws.py                          |  2 +-
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/config/systems.json b/config/systems.json
index 0f37c5ef0..6a48081b9 100644
--- a/config/systems.json
+++ b/config/systems.json
@@ -152,10 +152,10 @@
       "pypy": {
         "base_images": {
           "x64": {
-            "3.11": "amazon/aws-lambda-provided:al2-x86_64"
+            "3.11": "amazon/aws-lambda-provided:al2023-x86_64"
           },
           "arm64": {
-            "3.11": "amazon/aws-lambda-provided:al2-arm64"
+            "3.11": "amazon/aws-lambda-provided:al2023-arm64"
           }
         },
         "images": [
diff --git a/dockerfiles/aws/pypy/Dockerfile.function b/dockerfiles/aws/pypy/Dockerfile.function
index e2bf9f80b..d42190a97 100644
--- a/dockerfiles/aws/pypy/Dockerfile.function
+++ b/dockerfiles/aws/pypy/Dockerfile.function
@@ -4,15 +4,20 @@ ARG VERSION
 ENV PYTHON_VERSION=${VERSION}
 ARG TARGET_ARCHITECTURE
 
-#SHELL ["/bin/bash", "-c"]
-
 COPY . function/
 
-RUN yum install -y tar bzip2 gzip
+RUN dnf install -y tar bzip2 gzip glibc-langpack-en
+ENV LANG=en_US.UTF-8
+ENV LC_ALL=en_US.UTF-8
 
-RUN curl -L -o pypy.tar.bz2 https://downloads.python.org/pypy/pypy3.11-v7.3.20-linux64.tar.bz2 \
+RUN if [ "${TARGET_ARCHITECTURE}" = "arm64" ]; then \
+      export PYPY_ARCH="aarch64"; \
+    else \
+      export PYPY_ARCH="linux64"; \
+    fi \
+    && curl -L -o pypy.tar.bz2 https://downloads.python.org/pypy/pypy3.11-v7.3.20-${PYPY_ARCH}.tar.bz2 \
     && tar -xjf pypy.tar.bz2 \
-    && mv pypy3.11-v7.3.20-linux64 /opt/pypy \
+    && mv  pypy3.11-v7.3.20-${PYPY_ARCH} /opt/pypy \
     && rm pypy.tar.bz2
 RUN chmod -R +x /opt/pypy/bin
 ENV PATH=/opt/pypy/bin:$PATH
diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py
index 75de79421..457bc3e13 100644
--- a/sebs/aws/aws.py
+++ b/sebs/aws/aws.py
@@ -179,7 +179,7 @@ def _map_language_runtime(self, language: str, runtime: str):
         elif language == "python":
             return f"{language}{runtime}"
         elif language == "pypy":
-            return "provided.al2"
+            return "provided.al2023"
         return runtime
 
     def create_function(

From 5bf464cf23998b1fbd98d1fccd57aaf08434892b Mon Sep 17 00:00:00 2001
From: Alexander Schlieper <alexander@schlieper.ch>
Date: Sun, 7 Dec 2025 17:03:00 +0100
Subject: [PATCH 03/31] pypy azure

---
 .../000.microbenchmarks/010.sleep/config.json |   2 +-
 .../010.sleep/pypy/function.py                |   9 +
 benchmarks/wrappers/azure/pypy/handler.py     | 378 ++++++++++++++++++
 benchmarks/wrappers/azure/pypy/nosql.py       |  94 +++++
 benchmarks/wrappers/azure/pypy/storage.py     |  59 +++
 config/systems.json                           |  27 ++
 dockerfiles/azure/pypy/Dockerfile.build       |  19 +
 dockerfiles/pypy_installer.sh                 |  49 +--
 sebs/azure/azure.py                           |  31 +-
 tools/build_docker_images.py                  |  15 +-
 10 files changed, 652 insertions(+), 31 deletions(-)
 create mode 100644 benchmarks/000.microbenchmarks/010.sleep/pypy/function.py
 create mode 100644 benchmarks/wrappers/azure/pypy/handler.py
 create mode 100644 benchmarks/wrappers/azure/pypy/nosql.py
 create mode 100644 benchmarks/wrappers/azure/pypy/storage.py
 create mode 100644 dockerfiles/azure/pypy/Dockerfile.build

diff --git a/benchmarks/000.microbenchmarks/010.sleep/config.json b/benchmarks/000.microbenchmarks/010.sleep/config.json
index 93ce2f561..53f6349d6 100644
--- a/benchmarks/000.microbenchmarks/010.sleep/config.json
+++ b/benchmarks/000.microbenchmarks/010.sleep/config.json
@@ -1,6 +1,6 @@
 {
   "timeout": 120,
   "memory": 128,
-  "languages": ["python", "nodejs"],
+  "languages": ["python", "nodejs", "pypy"],
   "modules": []
 }
diff --git a/benchmarks/000.microbenchmarks/010.sleep/pypy/function.py b/benchmarks/000.microbenchmarks/010.sleep/pypy/function.py
new file mode 100644
index 000000000..7dda59a57
--- /dev/null
+++ b/benchmarks/000.microbenchmarks/010.sleep/pypy/function.py
@@ -0,0 +1,9 @@
+
+from time import sleep
+
+def handler(event):
+
+    # start timing
+    sleep_time = event.get('sleep')
+    sleep(sleep_time)
+    return { 'result': sleep_time }
diff --git a/benchmarks/wrappers/azure/pypy/handler.py b/benchmarks/wrappers/azure/pypy/handler.py
new file mode 100644
index 000000000..4b6e662da
--- /dev/null
+++ b/benchmarks/wrappers/azure/pypy/handler.py
@@ -0,0 +1,378 @@
+import sys
+import os
+import json
+import logging
+from http.server import BaseHTTPRequestHandler, HTTPServer
+import datetime
+import uuid
+import io
+
+# Add current directory and handler directory to path to find function modules
+# Similar to AWS handler which uses: sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages'))
+current_dir = os.getcwd()
+sys.path.append(current_dir)
+handler_path = os.path.join(current_dir, 'handler')
+if os.path.exists(handler_path):
+    sys.path.insert(0, handler_path)
+# Also add .python_packages like AWS does
+python_packages_path = os.path.join(current_dir, '.python_packages', 'lib', 'site-packages')
+if os.path.exists(python_packages_path):
+    sys.path.append(python_packages_path)
+
+# Initialize logging
+logging.basicConfig(level=logging.INFO)
+
+# Initialize Storage/NoSQL if needed (Environment variables)
+# Wrap in try-except to prevent crashes during initialization
+try:
+    if 'NOSQL_STORAGE_DATABASE' in os.environ:
+        import nosql
+        nosql.nosql.get_instance(
+            os.environ['NOSQL_STORAGE_DATABASE'],
+            os.environ['NOSQL_STORAGE_URL'],
+            os.environ['NOSQL_STORAGE_CREDS']
+        )
+except Exception as e:
+    logging.warning(f"Failed to initialize NoSQL: {e}")
+
+try:
+    if 'STORAGE_CONNECTION_STRING' in os.environ:
+        import storage
+        # Initialize storage instance
+        storage.storage.get_instance(os.environ['STORAGE_CONNECTION_STRING'])
+except Exception as e:
+    logging.warning(f"Failed to initialize storage: {e}")
+
+class Handler(BaseHTTPRequestHandler):
+    def log_message(self, format, *args):
+        # Suppress default logging, we'll handle it ourselves
+        pass
+    
+    def handle_one_request(self):
+        """Override to ensure we always send a response"""
+        # Initialize response tracking before processing
+        self._response_sent = False
+        try:
+            super().handle_one_request()
+        except Exception as e:
+            logging.error(f"Unhandled exception in handle_one_request: {e}", exc_info=True)
+            # Try to send error response if not already sent
+            if not self._response_sent:
+                try:
+                    error_body = json.dumps({'error': f'Unhandled exception: {str(e)}'}).encode('utf-8')
+                    self.send_response(500)
+                    self.send_header('Content-type', 'application/json')
+                    self.send_header('Content-Length', str(len(error_body)))
+                    self.end_headers()
+                    self.wfile.write(error_body)
+                    self.wfile.flush()
+                    self._response_sent = True
+                except Exception as e2:
+                    logging.error(f"Failed to send error response in handle_one_request: {e2}", exc_info=True)
+                    # Last resort: try to write directly
+                    try:
+                        if not self._response_sent:
+                            self.wfile.write(b'{"error":"Internal server error"}')
+                            self.wfile.flush()
+                            self._response_sent = True
+                    except:
+                        pass
+    
+    def send_json_response(self, status_code, data):
+        """Send a JSON response"""
+        # Ensure response tracking is initialized
+        if not hasattr(self, '_response_sent'):
+            self._response_sent = False
+        
+        # Don't send if already sent
+        if self._response_sent:
+            logging.warning("Attempted to send response when already sent")
+            return
+            
+        try:
+            response_body = json.dumps(data).encode('utf-8')
+            self.send_response(status_code)
+            self.send_header('Content-type', 'application/json')
+            self.send_header('Content-Length', str(len(response_body)))
+            self.end_headers()
+            self.wfile.write(response_body)
+            self.wfile.flush()
+            self._response_sent = True
+        except (BrokenPipeError, ConnectionResetError) as e:
+            # Client disconnected, can't send response
+            logging.warning(f"Client disconnected during response: {e}")
+            self._response_sent = True
+        except Exception as e:
+            logging.error(f"Error in send_json_response: {e}", exc_info=True)
+            # Try to send error response - check if headers were sent
+            try:
+                # Check if headers were already sent
+                if hasattr(self, '_headers_buffer') and self._headers_buffer:
+                    # Headers were sent, try to write error to body
+                    error_msg = json.dumps({'error': f'Error sending response: {str(e)}'}).encode('utf-8')
+                    self.wfile.write(error_msg)
+                    self.wfile.flush()
+                else:
+                    # Headers not sent yet, send full error response
+                    error_msg = json.dumps({'error': f'Error sending response: {str(e)}'}).encode('utf-8')
+                    self.send_response(500)
+                    self.send_header('Content-type', 'application/json')
+                    self.send_header('Content-Length', str(len(error_msg)))
+                    self.end_headers()
+                    self.wfile.write(error_msg)
+                    self.wfile.flush()
+                self._response_sent = True
+            except Exception as e2:
+                # Can't write to response, mark as sent to prevent double error handling
+                logging.error(f"Failed to send error response: {e2}", exc_info=True)
+                # Last resort: try minimal response
+                try:
+                    if not self._response_sent:
+                        self.wfile.write(b'{"error":"Internal server error"}')
+                        self.wfile.flush()
+                        self._response_sent = True
+                except:
+                    self._response_sent = True
+    
+    def do_GET(self):
+        # Handle health checks and GET requests
+        self.send_json_response(200, {'status': 'ok'})
+    
+    def do_POST(self):
+        # Initialize response tracking before processing
+        if not hasattr(self, '_response_sent'):
+            self._response_sent = False
+        # Wrap entire method to ensure we always send a response
+        try:
+            self._do_POST()
+        except Exception as e:
+            logging.error(f"Critical error in do_POST: {e}", exc_info=True)
+            # Last resort - try to send error response
+            if not self._response_sent:
+                try:
+                    error_body = json.dumps({'error': f'Critical error: {str(e)}'}).encode('utf-8')
+                    self.send_response(500)
+                    self.send_header('Content-type', 'application/json')
+                    self.send_header('Content-Length', str(len(error_body)))
+                    self.end_headers()
+                    self.wfile.write(error_body)
+                    self.wfile.flush()
+                    self._response_sent = True
+                except Exception as e2:
+                    logging.error(f"Failed to send critical error response: {e2}", exc_info=True)
+                    # Last resort: try to write minimal response
+                    try:
+                        if not self._response_sent:
+                            self.wfile.write(b'{"error":"Internal server error"}')
+                            self.wfile.flush()
+                            self._response_sent = True
+                    except:
+                        pass
+    
+    def _do_POST(self):
+        # Initialize response tracking
+        self._response_sent = False
+        req_json = None
+        invocation_id = None
+        begin = None
+        
+        try:
+            logging.info(f"Received POST request to {self.path}")
+            content_length = int(self.headers.get('Content-Length', 0))
+            if content_length > 0:
+                post_data = self.rfile.read(content_length)
+            else:
+                post_data = b'{}'
+            
+            try:
+                req_json = json.loads(post_data.decode('utf-8'))
+            except json.JSONDecodeError as e:
+                logging.error(f"JSON decode error: {e}, data: {post_data}")
+                self.send_json_response(400, {'error': f'Invalid JSON: {str(e)}'})
+                return
+
+            invocation_id = self.headers.get('X-Azure-Functions-InvocationId', str(uuid.uuid4()))
+            
+            # Update request with ID (consistent with python wrapper)
+            if isinstance(req_json, dict):
+                req_json['request-id'] = invocation_id
+                req_json['income-timestamp'] = datetime.datetime.now().timestamp()
+
+            begin = datetime.datetime.now()
+            
+            # Import user function
+            # In Azure, function.py is in the handler directory
+            # AWS uses: from function import function (because AWS has function/function.py)
+            # Azure structure: handler/function.py, so we use: import function
+            function_module = None
+            try:
+                import function
+                function_module = function
+            except ImportError as e:
+                # Try AWS-style import as fallback
+                try:
+                    from function import function as aws_function_module
+                    function_module = aws_function_module
+                except ImportError as e2:
+                    logging.error(f"Failed to import function: {e}")
+                    logging.error(f"AWS-style import also failed: {e2}")
+                    logging.error(f"sys.path: {sys.path}")
+                    logging.error(f"Current directory: {os.getcwd()}")
+                    logging.error(f"Handler path exists: {os.path.exists(os.path.join(os.getcwd(), 'handler'))}")
+                    # List files in handler directory for debugging
+                    handler_dir = os.path.join(os.getcwd(), 'handler')
+                    if os.path.exists(handler_dir):
+                        try:
+                            files = os.listdir(handler_dir)
+                            logging.error(f"Files in handler directory: {files}")
+                        except Exception as list_err:
+                            logging.error(f"Failed to list handler directory: {list_err}")
+                    self.send_json_response(500, {'error': f'Failed to import function: {str(e)}'})
+                    return
+            
+            if function_module is None:
+                self.send_json_response(500, {'error': 'Function module is None after import'})
+                return
+            
+            try:
+                # Call the user function - AWS uses: ret = function.handler(event)
+                ret = function_module.handler(req_json)
+            except Exception as e:
+                logging.error(f"Function handler error: {e}", exc_info=True)
+                self.send_json_response(500, {'error': str(e)})
+                return
+        except Exception as e:
+            logging.error(f"Unexpected error in _do_POST: {e}", exc_info=True)
+            if not self._response_sent:
+                self.send_json_response(500, {'error': str(e)})
+            return
+
+        # Process response - wrap in try-except to ensure we always send a response
+        try:
+            end = datetime.datetime.now()
+
+            # Logging and storage upload
+            # Handle case where ret might be None or not a dict
+            if ret is None:
+                logging.error("Function handler returned None")
+                self.send_json_response(500, {'error': 'Function handler returned None'})
+                return
+            
+            if not isinstance(ret, dict):
+                logging.warning(f"Function handler returned non-dict: {type(ret)}, value: {ret}")
+                ret = {'result': ret}
+            
+            log_data = {
+                'output': ret.get('result', ret) if isinstance(ret, dict) else ret
+            }
+            if isinstance(ret, dict) and 'measurement' in ret:
+                log_data['measurement'] = ret['measurement']
+            
+            if req_json is not None and isinstance(req_json, dict) and 'logs' in req_json:
+                log_data['time'] = (end - begin) / datetime.timedelta(microseconds=1)
+                results_begin = datetime.datetime.now()
+                try:
+                    import storage
+                    storage_inst = storage.storage.get_instance()
+                    b = req_json.get('logs').get('bucket')
+                    req_id = invocation_id
+                    
+                    storage_inst.upload_stream(b, '{}.json'.format(req_id),
+                            io.BytesIO(json.dumps(log_data).encode('utf-8')))
+                    
+                    results_end = datetime.datetime.now()
+                    results_time = (results_end - results_begin) / datetime.timedelta(microseconds=1)
+                except Exception as e:
+                    logging.warning(f"Failed to upload logs to storage: {e}")
+                    results_time = 0
+            else:
+                results_time = 0
+
+            # Cold start detection
+            is_cold = False
+            container_id = ''
+            try:
+                fname = os.path.join('/tmp','cold_run')
+                if not os.path.exists(fname):
+                    is_cold = True
+                    container_id = str(uuid.uuid4())[0:8]
+                    with open(fname, 'a') as f:
+                        f.write(container_id)
+                else:
+                    with open(fname, 'r') as f:
+                        container_id = f.read()
+            except Exception as e:
+                logging.warning(f"Failed to read/write cold_run file: {e}")
+                container_id = str(uuid.uuid4())[0:8]
+                    
+            is_cold_worker = False
+            global cold_marker
+            try:
+                _ = cold_marker
+            except NameError:
+                cold_marker = True
+                is_cold_worker = True
+
+            response_data = {
+                'begin': begin.strftime('%s.%f'),
+                'end': end.strftime('%s.%f'),
+                'results_time': results_time,
+                'result': log_data,
+                'is_cold': is_cold,
+                'is_cold_worker': is_cold_worker,
+                'container_id': container_id,
+                'environ_container_id': os.environ.get('CONTAINER_NAME', ''),
+                'request_id': invocation_id
+            }
+
+            # Send response
+            self.send_json_response(200, response_data)
+        except Exception as e:
+            logging.error(f"Error processing response: {e}", exc_info=True)
+            # Try to send error response
+            try:
+                self.send_json_response(500, {'error': f'Error processing response: {str(e)}'})
+            except Exception as send_error:
+                logging.error(f"Failed to send error response: {send_error}", exc_info=True)
+                # Last resort - try to send minimal response
+                try:
+                    self.send_response(500)
+                    self.end_headers()
+                    self.wfile.write(json.dumps({'error': 'Internal server error'}).encode('utf-8'))
+                    self.wfile.flush()
+                except:
+                    pass
+
+def run(server_class=HTTPServer, handler_class=Handler):
+    try:
+        # Azure sets FUNCTIONS_CUSTOMHANDLER_PORT
+        port = int(os.environ.get('FUNCTIONS_CUSTOMHANDLER_PORT', 8080))
+        server_address = ('', port)
+        httpd = server_class(server_address, handler_class)
+        logging.info(f"Starting httpd on port {port}...")
+        logging.info(f"Current directory: {os.getcwd()}")
+        logging.info(f"Handler path: {os.path.join(os.getcwd(), 'handler')}")
+        logging.info(f"Handler path exists: {os.path.exists(os.path.join(os.getcwd(), 'handler'))}")
+        logging.info(f"sys.path: {sys.path}")
+        # List files in current directory for debugging
+        try:
+            files = os.listdir(os.getcwd())
+            logging.info(f"Files in current directory: {files}")
+        except Exception as e:
+            logging.warning(f"Failed to list current directory: {e}")
+        httpd.serve_forever()
+    except Exception as e:
+        logging.error(f"Failed to start server: {e}", exc_info=True)
+        # Don't raise - try to log and exit gracefully
+        sys.exit(1)
+
+if __name__ == "__main__":
+    try:
+        run()
+    except KeyboardInterrupt:
+        logging.info("Server interrupted by user")
+        sys.exit(0)
+    except Exception as e:
+        logging.error(f"Fatal error: {e}", exc_info=True)
+        sys.exit(1)
+
diff --git a/benchmarks/wrappers/azure/pypy/nosql.py b/benchmarks/wrappers/azure/pypy/nosql.py
new file mode 100644
index 000000000..f7dd94851
--- /dev/null
+++ b/benchmarks/wrappers/azure/pypy/nosql.py
@@ -0,0 +1,94 @@
+from typing import Dict, List, Optional, Tuple
+
+from azure.cosmos import CosmosClient, ContainerProxy
+
+
+class nosql:
+    instance = None
+    client = None
+
+    def __init__(self, url: str, credential: str, database: str):
+        self._client = CosmosClient(url=url, credential=credential)
+        self._db_client = self._client.get_database_client(database)
+        self._containers: Dict[str, ContainerProxy] = {}
+
+    def _get_table(self, table_name: str):
+
+        if table_name not in self._containers:
+            self._containers[table_name] = self._db_client.get_container_client(table_name)
+
+        return self._containers[table_name]
+
+    def insert(
+        self,
+        table_name: str,
+        primary_key: Tuple[str, str],
+        secondary_key: Tuple[str, str],
+        data: dict,
+    ):
+
+        data[primary_key[0]] = primary_key[1]
+        # secondary key must have that name in CosmosDB
+        data["id"] = secondary_key[1]
+
+        self._get_table(table_name).upsert_item(data)
+
+    def get(
+        self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str]
+    ) -> dict:
+        res = self._get_table(table_name).read_item(
+            item=secondary_key[1], partition_key=primary_key[1]
+        )
+        res[secondary_key[0]] = secondary_key[1]
+
+        return res
+
+    def update(
+        self,
+        table_name: str,
+        primary_key: Tuple[str, str],
+        secondary_key: Tuple[str, str],
+        updates: dict,
+    ):
+
+        ops = []
+        for key, value in updates.items():
+            ops.append({"op": "add", "path": f"/{key}", "value": value})
+
+        self._get_table(table_name).patch_item(
+            item=secondary_key[1], partition_key=primary_key[1], patch_operations=ops
+        )
+
+    """
+        This query must involve partition key - it does not scan across partitions.
+    """
+
+    def query(
+        self, table_name: str, primary_key: Tuple[str, str], secondary_key_name: str
+    ) -> List[dict]:
+
+        res = list(
+            self._get_table(table_name).query_items(
+                f"SELECT * FROM c WHERE c.{primary_key[0]} = '{primary_key[1]}'",
+                enable_cross_partition_query=False,
+            )
+        )
+
+        # Emulate the kind key
+        for item in res:
+            item[secondary_key_name] = item["id"]
+
+        return res
+
+    def delete(self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str]):
+
+        self._get_table(table_name).delete_item(item=secondary_key[1], partition_key=primary_key[1])
+
+    @staticmethod
+    def get_instance(
+        database: Optional[str] = None, url: Optional[str] = None, credential: Optional[str] = None
+    ):
+        if nosql.instance is None:
+            assert database is not None and url is not None and credential is not None
+            nosql.instance = nosql(url, credential, database)
+        return nosql.instance
diff --git a/benchmarks/wrappers/azure/pypy/storage.py b/benchmarks/wrappers/azure/pypy/storage.py
new file mode 100644
index 000000000..42b129c89
--- /dev/null
+++ b/benchmarks/wrappers/azure/pypy/storage.py
@@ -0,0 +1,59 @@
+
+import os
+import uuid
+from typing import Optional
+
+from azure.storage.blob import BlobServiceClient
+
+class storage:
+    instance = None
+    client = None
+
+    def __init__(self, connection_string: str):
+        self.client = BlobServiceClient.from_connection_string(connection_string)
+
+    @staticmethod
+    def unique_name(name):
+        name, extension = os.path.splitext(name)
+        return '{name}.{random}{extension}'.format(
+                    name=name,
+                    extension=extension,
+                    random=str(uuid.uuid4()).split('-')[0]
+                )
+
+    def upload(self, container, file, filepath):
+        with open(filepath, 'rb') as data:
+            return self.upload_stream(container, file, data)
+
+    def download(self, container, file, filepath):
+        with open(filepath, 'wb') as download_file:
+            download_file.write( self.download_stream(container, file) )
+    
+    def download_directory(self, container, prefix, path):
+        client = self.client.get_container_client(container=container)
+        objects = client.list_blobs(name_starts_with=prefix)
+        for obj in objects:
+            file_name = obj.name
+            path_to_file = os.path.dirname(file_name)
+            os.makedirs(os.path.join(path, path_to_file), exist_ok=True)
+            self.download(container, file_name, os.path.join(path, file_name))
+    
+    def upload_stream(self, container, file, data):
+        key_name = storage.unique_name(file)
+        client = self.client.get_blob_client(
+                container=container,
+                blob=key_name
+        )
+        client.upload_blob(data)
+        return key_name
+
+    def download_stream(self, container, file):
+        client = self.client.get_blob_client(container=container, blob=file)
+        return client.download_blob().readall()
+    
+    @staticmethod
+    def get_instance(connection_string: Optional[str] = None):
+        if storage.instance is None:
+            assert connection_string is not None
+            storage.instance = storage(connection_string)
+        return storage.instance
diff --git a/config/systems.json b/config/systems.json
index 0f37c5ef0..ff6f872be 100644
--- a/config/systems.json
+++ b/config/systems.json
@@ -231,6 +231,33 @@
             "uuid": "3.4.0"
           }
         }
+      },
+      "pypy": {
+        "base_images": {
+          "x64": {
+            "3.11": "ubuntu:22.04"
+          }
+        },
+        "images": [
+          "build"
+        ],
+        "username": "docker_user",
+        "deployment": {
+          "files": [
+            "handler.py",
+            "storage.py",
+            "nosql.py"
+          ],
+          "packages": [],
+          "module_packages": {
+            "storage": [
+              "azure-storage-blob"
+            ],
+            "nosql": [
+              "azure-cosmos"
+            ]
+          }
+        }
       }
     },
     "images": {
diff --git a/dockerfiles/azure/pypy/Dockerfile.build b/dockerfiles/azure/pypy/Dockerfile.build
new file mode 100644
index 000000000..ea0ed7e78
--- /dev/null
+++ b/dockerfiles/azure/pypy/Dockerfile.build
@@ -0,0 +1,19 @@
+ARG BASE_IMAGE
+FROM ${BASE_IMAGE}
+ARG VERSION
+ENV PYTHON_VERSION=${VERSION}
+
+RUN apt-get update && apt-get install -y gosu curl tar bzip2 gzip libc6 libgcc-s1 && \
+    mkdir -p /lib64 && \
+    (ln -sf /lib/x86_64-linux-gnu/ld-linux-x86-64.so.2 /lib64/ld-linux-x86-64.so.2 || true)
+
+RUN mkdir -p /sebs/
+COPY dockerfiles/pypy_installer.sh /sebs/installer.sh
+COPY dockerfiles/entrypoint.sh /sebs/entrypoint.sh
+RUN chmod +x /sebs/entrypoint.sh
+
+# useradd and groupmod is installed in /usr/sbin which is not in PATH
+ENV PATH=/usr/sbin:$PATH
+ENV SCRIPT_FILE=/mnt/function/package.sh
+CMD /bin/bash /sebs/installer.sh
+ENTRYPOINT ["/sebs/entrypoint.sh"]
diff --git a/dockerfiles/pypy_installer.sh b/dockerfiles/pypy_installer.sh
index 303087d6a..114e6390e 100644
--- a/dockerfiles/pypy_installer.sh
+++ b/dockerfiles/pypy_installer.sh
@@ -1,42 +1,43 @@
 #!/bin/bash
+set -euo pipefail
 
 cd /mnt/function
 
-#TODO: If the base image OS is not centOS based, change to apt
-yum install -y tar bzip2 gzip
-
-#TODO: make version configurable
+# Download and unpack PyPy
 curl -L -o pypy.tar.bz2 https://downloads.python.org/pypy/pypy3.11-v7.3.20-linux64.tar.bz2
-tar -xjf pypy.tar.bz2 
-mv pypy3.11-v7.3.20-linux64 /opt/pypy 
+tar -xjf pypy.tar.bz2
+mv pypy3.11-v7.3.20-linux64 pypy
 rm pypy.tar.bz2
-chmod -R +x /opt/pypy/bin
-export PATH=/opt/pypy/bin:$PATH
+chmod -R +x pypy/bin
+export PATH=/mnt/function/pypy/bin:$PATH
+
+# Ensure pip is available
 python -m ensurepip
 python -mpip install -U pip wheel
 
-#Probably remove this conditional, might break pypy builds, might lead to installation of CPython libraries
+# Where to place dependencies for Azure/AWS
+REQ_TARGET=".python_packages/lib/site-packages"
+mkdir -p "${REQ_TARGET}"
+
+# Platform pin for arm64 if needed
 PLATFORM_ARG=""
-if [[ "${TARGET_ARCHITECTURE}" == "arm64" ]]; then
+if [[ "${TARGET_ARCHITECTURE:-}" == "arm64" ]]; then
   PLATFORM_ARG="--platform manylinux_2_17_aarch64 --only-binary=:all:"
 fi
 
-if [[ "${TARGET_ARCHITECTURE}" == "arm64" ]] && [[ -f "requirements.txt.arm.${PYTHON_VERSION}" ]]; then
-
-  pip3 -q install ${PLATFORM_ARG} -r requirements.txt.arm.${PYTHON_VERSION} -t .python_packages/lib/site-packages
-
+# Pick the best matching requirements file
+if [[ "${TARGET_ARCHITECTURE:-}" == "arm64" && -f "requirements.txt.arm.${PYTHON_VERSION}" ]]; then
+  REQ_FILE="requirements.txt.arm.${PYTHON_VERSION}"
 elif [[ -f "requirements.txt.${PYTHON_VERSION}" ]]; then
-
-  pip3 -q install ${PLATFORM_ARG} -r requirements.txt.${PYTHON_VERSION} -t .python_packages/lib/site-packages
-
+  REQ_FILE="requirements.txt.${PYTHON_VERSION}"
 else
-
-  pip3 -q install ${PLATFORM_ARG} -r requirements.txt -t .python_packages/lib/site-packages
-
-fi
-
-if [[ -f "${SCRIPT_FILE}" ]]; then
-  /bin/bash ${SCRIPT_FILE} .python_packages/lib/site-packages
+  REQ_FILE="requirements.txt"
 fi
 
+# Install benchmark deps into the target directory
+python -mpip install ${PLATFORM_ARG} -r "${REQ_FILE}" -t "${REQ_TARGET}"
 
+# Run optional benchmark packaging hook
+if [[ -f "${SCRIPT_FILE:-}" ]]; then
+  /bin/bash "${SCRIPT_FILE}" "${REQ_TARGET}"
+fi
\ No newline at end of file
diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py
index d848d724a..ae51f6d2a 100644
--- a/sebs/azure/azure.py
+++ b/sebs/azure/azure.py
@@ -33,7 +33,7 @@ class Azure(System):
     _config: AzureConfig
 
     # runtime mapping
-    AZURE_RUNTIMES = {"python": "python", "nodejs": "node"}
+    AZURE_RUNTIMES = {"python": "python", "nodejs": "node", "pypy": "custom"}
 
     @staticmethod
     def name():
@@ -133,25 +133,30 @@ def package_code(
 
         # In previous step we ran a Docker container which installed packages
         # Python packages are in .python_packages because this is expected by Azure
-        EXEC_FILES = {"python": "handler.py", "nodejs": "handler.js"}
+        EXEC_FILES = {"python": "handler.py", "nodejs": "handler.js", "pypy": "handler.py"}
         CONFIG_FILES = {
             "python": ["requirements.txt", ".python_packages"],
             "nodejs": ["package.json", "node_modules"],
+            # Keep .python_packages at the root so custom handler can import deps.
+            "pypy": ["requirements.txt", ".python_packages", "pypy"],
         }
         package_config = CONFIG_FILES[language_name]
 
         handler_dir = os.path.join(directory, "handler")
         os.makedirs(handler_dir)
         # move all files to 'handler' except package config
+        # For pypy custom handlers, handler.py must stay at root level
+        files_to_exclude = package_config.copy()
+        if language_name == "pypy":
+            files_to_exclude.append(EXEC_FILES[language_name])
         for f in os.listdir(directory):
-            if f not in package_config:
+            if f not in files_to_exclude:
                 source_file = os.path.join(directory, f)
                 shutil.move(source_file, handler_dir)
 
         # generate function.json
         # TODO: extension to other triggers than HTTP
         default_function_json = {
-            "scriptFile": EXEC_FILES[language_name],
             "bindings": [
                 {
                     "authLevel": "anonymous",
@@ -163,6 +168,9 @@ def package_code(
                 {"type": "http", "direction": "out", "name": "$return"},
             ],
         }
+        if language_name != "pypy":
+            default_function_json["scriptFile"] = EXEC_FILES[language_name]
+
         json_out = os.path.join(directory, "handler", "function.json")
         json.dump(default_function_json, open(json_out, "w"), indent=2)
 
@@ -174,6 +182,14 @@ def package_code(
                 "version": "[4.0.0, 5.0.0)",
             },
         }
+        if language_name == "pypy":
+            default_host_json["customHandler"] = {
+                "description": {
+                    "defaultExecutablePath": "pypy/bin/pypy",
+                    "arguments": ["handler.py"],
+                },
+                "enableForwardingHttpRequest": True,
+            }
         json.dump(default_host_json, open(os.path.join(directory, "host.json"), "w"), indent=2)
 
         code_size = Benchmark.directory_size(directory)
@@ -457,11 +473,16 @@ def create_function(
             while True:
                 try:
                     # create function app
+                    # Custom runtime doesn't support --runtime-version parameter
+                    runtime_version_param = ""
+                    if config["runtime"] != "custom":
+                        runtime_version_param = " --runtime-version {runtime_version} "
+                    
                     self.cli_instance.execute(
                         (
                             " az functionapp create --resource-group {resource_group} "
                             " --os-type Linux --consumption-plan-location {region} "
-                            " --runtime {runtime} --runtime-version {runtime_version} "
+                            " --runtime {runtime}" + runtime_version_param +
                             " --name {func_name} --storage-account {storage_account}"
                             " --functions-version 4 "
                         ).format(**config)
diff --git a/tools/build_docker_images.py b/tools/build_docker_images.py
index 6d6911aee..365792fc3 100755
--- a/tools/build_docker_images.py
+++ b/tools/build_docker_images.py
@@ -15,9 +15,13 @@
 parser.add_argument("--type", default=None, choices=["build", "run", "manage"], action="store")
 parser.add_argument("--language", default=None, choices=["python", "nodejs", "pypy"], action="store")
 parser.add_argument("--language-version", default=None, type=str, action="store")
+# Optional: force build platform (e.g., linux/amd64 on Apple Silicon)
+parser.add_argument("--platform", default=None, type=str, action="store")
 args = parser.parse_args()
 config = json.load(open(os.path.join(PROJECT_DIR, "config", "systems.json"), "r"))
 client = docker.from_env()
+# Prefer explicit CLI platform, otherwise fall back to environment
+PLATFORM = args.platform or os.environ.get("DOCKER_DEFAULT_PLATFORM")
 
 
 def build(image_type, system, language=None, version=None, version_name=None):
@@ -51,8 +55,17 @@ def build(image_type, system, language=None, version=None, version_name=None):
             target, PROJECT_DIR, dockerfile, buildargs
         )
     )
+    build_kwargs = {
+        "path": PROJECT_DIR,
+        "dockerfile": dockerfile,
+        "buildargs": buildargs,
+        "tag": target,
+    }
+    if PLATFORM:
+        build_kwargs["platform"] = PLATFORM
+
     try:
-        client.images.build(path=PROJECT_DIR, dockerfile=dockerfile, buildargs=buildargs, tag=target)
+        client.images.build(**build_kwargs)
     except docker.errors.BuildError as exc:
         print("Error! Build failed!")
         print(exc)

From 9db20b984cd6bbdb13768a01f58c64a7356c8d75 Mon Sep 17 00:00:00 2001
From: toooadi <75420344+toooadi@users.noreply.github.com>
Date: Mon, 8 Dec 2025 10:43:49 +0100
Subject: [PATCH 04/31] Update config/systems.json

Yes, correct. Oversaw this.

Co-authored-by: Mark <mrkswrn@gmail.com>
---
 config/systems.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/config/systems.json b/config/systems.json
index 6a48081b9..5b5c17028 100644
--- a/config/systems.json
+++ b/config/systems.json
@@ -68,10 +68,10 @@
       "pypy": {
         "base_images": {
           "x64": {
-            "3.11": "pypy:latest"
+            "3.11": "pypy:3.11"
           },
           "arm64": {
-            "3.11": "pypy:latest"
+            "3.11": "pypy:3.11"
           }
         },
         "images": [

From cdd999841d5a5c43620025b0727c5fd5ec3660f8 Mon Sep 17 00:00:00 2001
From: Alexander Schlieper <alexander@schlieper.ch>
Date: Wed, 10 Dec 2025 21:35:40 +0100
Subject: [PATCH 05/31] added pypy support for other benchmarks

---
 .../020.network-benchmark/config.json         |   4 +-
 .../020.network-benchmark/pypy/function.py    |  58 ++++++++++
 .../030.clock-synchronization/config.json     |   4 +-
 .../pypy/function.py                          |  74 +++++++++++++
 .../030.clock-synchronization/pypy/init.sh    |   6 +
 .../040.server-reply/config.json              |   2 +-
 .../040.server-reply/pypy/function.py         |  13 +++
 .../100.webapps/120.uploader/config.json      |   2 +-
 .../100.webapps/120.uploader/pypy/function.py |  48 ++++++++
 .../120.uploader/pypy/requirements.txt        |   0
 .../100.webapps/130.crud-api/config.json      |   3 +-
 .../100.webapps/130.crud-api/pypy/function.py |  67 ++++++++++++
 .../210.thumbnailer/config.json               |   2 +-
 .../210.thumbnailer/pypy/function.py          |  70 ++++++++++++
 .../210.thumbnailer/pypy/requirements.txt     |   1 +
 .../220.video-processing/config.json          |   2 +-
 .../220.video-processing/pypy/function.py     | 103 ++++++++++++++++++
 .../220.video-processing/pypy/init.sh         |  10 ++
 .../pypy/requirements.txt                     |   0
 .../300.utilities/311.compression/config.json |   2 +-
 .../311.compression/pypy/function.py          |  58 ++++++++++
 .../311.compression/pypy/requirements.txt     |   0
 .../501.graph-pagerank/config.json            |   2 +-
 .../501.graph-pagerank/pypy/function.py       |  29 +++++
 .../501.graph-pagerank/pypy/requirements.txt  |   1 +
 .../500.scientific/502.graph-mst/config.json  |   2 +-
 .../502.graph-mst/pypy/function.py            |  29 +++++
 .../502.graph-mst/pypy/requirements.txt       |   1 +
 .../500.scientific/503.graph-bfs/config.json  |   2 +-
 .../503.graph-bfs/pypy/function.py            |  29 +++++
 .../503.graph-bfs/pypy/requirements.txt       |   1 +
 config/systems.json                           |   2 +-
 sebs/benchmark.py                             |   4 +
 tools/build_docker_images.py                  |   9 ++
 34 files changed, 626 insertions(+), 14 deletions(-)
 create mode 100644 benchmarks/000.microbenchmarks/020.network-benchmark/pypy/function.py
 create mode 100644 benchmarks/000.microbenchmarks/030.clock-synchronization/pypy/function.py
 create mode 100755 benchmarks/000.microbenchmarks/030.clock-synchronization/pypy/init.sh
 create mode 100644 benchmarks/000.microbenchmarks/040.server-reply/pypy/function.py
 create mode 100644 benchmarks/100.webapps/120.uploader/pypy/function.py
 create mode 100644 benchmarks/100.webapps/120.uploader/pypy/requirements.txt
 create mode 100644 benchmarks/100.webapps/130.crud-api/pypy/function.py
 create mode 100644 benchmarks/200.multimedia/210.thumbnailer/pypy/function.py
 create mode 100644 benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt
 create mode 100644 benchmarks/200.multimedia/220.video-processing/pypy/function.py
 create mode 100755 benchmarks/200.multimedia/220.video-processing/pypy/init.sh
 create mode 100644 benchmarks/200.multimedia/220.video-processing/pypy/requirements.txt
 create mode 100644 benchmarks/300.utilities/311.compression/pypy/function.py
 create mode 100644 benchmarks/300.utilities/311.compression/pypy/requirements.txt
 create mode 100644 benchmarks/500.scientific/501.graph-pagerank/pypy/function.py
 create mode 100644 benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt
 create mode 100644 benchmarks/500.scientific/502.graph-mst/pypy/function.py
 create mode 100644 benchmarks/500.scientific/502.graph-mst/pypy/requirements.txt
 create mode 100644 benchmarks/500.scientific/503.graph-bfs/pypy/function.py
 create mode 100644 benchmarks/500.scientific/503.graph-bfs/pypy/requirements.txt

diff --git a/benchmarks/000.microbenchmarks/020.network-benchmark/config.json b/benchmarks/000.microbenchmarks/020.network-benchmark/config.json
index c3c2c73b1..4011ea075 100644
--- a/benchmarks/000.microbenchmarks/020.network-benchmark/config.json
+++ b/benchmarks/000.microbenchmarks/020.network-benchmark/config.json
@@ -1,6 +1,6 @@
 {
   "timeout": 30,
   "memory": 128,
-  "languages": ["python"],
-  "modules": []
+  "languages": ["python", "pypy"],
+  "modules": ["storage"]
 }
diff --git a/benchmarks/000.microbenchmarks/020.network-benchmark/pypy/function.py b/benchmarks/000.microbenchmarks/020.network-benchmark/pypy/function.py
new file mode 100644
index 000000000..44398b7bb
--- /dev/null
+++ b/benchmarks/000.microbenchmarks/020.network-benchmark/pypy/function.py
@@ -0,0 +1,58 @@
+import csv
+import json
+import os.path
+import socket
+from datetime import datetime
+from time import sleep
+
+import storage
+
+def handler(event):
+
+    request_id = event['request-id']
+    address = event['server-address']
+    port = event['server-port']
+    repetitions = event['repetitions']
+    output_bucket = event.get('bucket').get('bucket')
+    output_prefix = event.get('bucket').get('output')
+    times = []
+    i = 0
+    socket.setdefaulttimeout(3)
+    server_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+    server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+    server_socket.bind(('', 0))
+    message = request_id.encode('utf-8')
+    adr = (address, port)
+    consecutive_failures = 0
+    while i < repetitions + 1:
+        try:
+            send_begin = datetime.now().timestamp()
+            server_socket.sendto(message, adr)
+            msg, addr = server_socket.recvfrom(1024)
+            recv_end = datetime.now().timestamp()
+        except socket.timeout:
+            i += 1
+            consecutive_failures += 1
+            if consecutive_failures == 5:
+                print("Can't setup the connection")
+                break
+            continue
+        if i > 0:
+            times.append([i, send_begin, recv_end])
+        i += 1
+        consecutive_failures = 0
+        server_socket.settimeout(2)
+    server_socket.close()
+   
+    if consecutive_failures != 5:
+        with open('/tmp/data.csv', 'w', newline='') as csvfile:
+            writer = csv.writer(csvfile, delimiter=',')
+            writer.writerow(["id", "client_send", "client_rcv"]) 
+            for row in times:
+                writer.writerow(row)
+      
+        client = storage.storage.get_instance()
+        filename = 'results-{}.csv'.format(request_id)
+        key = client.upload(output_bucket, os.path.join(output_prefix, filename), '/tmp/data.csv')
+
+    return { 'result': key }
diff --git a/benchmarks/000.microbenchmarks/030.clock-synchronization/config.json b/benchmarks/000.microbenchmarks/030.clock-synchronization/config.json
index c3c2c73b1..4011ea075 100644
--- a/benchmarks/000.microbenchmarks/030.clock-synchronization/config.json
+++ b/benchmarks/000.microbenchmarks/030.clock-synchronization/config.json
@@ -1,6 +1,6 @@
 {
   "timeout": 30,
   "memory": 128,
-  "languages": ["python"],
-  "modules": []
+  "languages": ["python", "pypy"],
+  "modules": ["storage"]
 }
diff --git a/benchmarks/000.microbenchmarks/030.clock-synchronization/pypy/function.py b/benchmarks/000.microbenchmarks/030.clock-synchronization/pypy/function.py
new file mode 100644
index 000000000..c3f3f3934
--- /dev/null
+++ b/benchmarks/000.microbenchmarks/030.clock-synchronization/pypy/function.py
@@ -0,0 +1,74 @@
+import csv
+import json
+import os
+import socket
+from datetime import datetime
+from time import sleep
+
+import storage
+
+def handler(event):
+
+    request_id = event['request-id']
+    address = event['server-address']
+    port = event['server-port']
+    repetitions = event['repetitions']
+    output_bucket = event.get('bucket').get('bucket')
+    output_prefix = event.get('bucket').get('output')
+    times = []
+    print("Starting communication with {}:{}".format(address, port))
+    i = 0
+    socket.setdefaulttimeout(4)
+    server_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+    server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+    server_socket.bind(('', 0))
+    message = request_id.encode('utf-8')
+    adr = (address, port)
+    consecutive_failures = 0
+    measurements_not_smaller = 0
+    cur_min = 0
+    while i < 1000:
+        try:
+            send_begin = datetime.now().timestamp()
+            server_socket.sendto(message, adr)
+            msg, addr = server_socket.recvfrom(1024)
+            recv_end = datetime.now().timestamp()
+        except socket.timeout:
+            i += 1
+            consecutive_failures += 1
+            if consecutive_failures == 7:
+                print("Can't setup the connection")
+                break
+            continue
+        if i > 0:
+            times.append([i, send_begin, recv_end])
+        cur_time = recv_end - send_begin
+        print("Time {} Min Time {} NotSmaller {}".format(cur_time, cur_min, measurements_not_smaller))
+        if cur_time > cur_min and cur_min > 0:
+            measurements_not_smaller += 1
+            if measurements_not_smaller == repetitions:
+                message = "stop".encode('utf-8')
+                server_socket.sendto(message, adr)
+                break
+        else:
+            cur_min = cur_time
+            measurements_not_smaller = 0
+        i += 1
+        consecutive_failures = 0
+        server_socket.settimeout(4)
+    server_socket.close()
+   
+    if consecutive_failures != 5:
+        with open('/tmp/data.csv', 'w', newline='') as csvfile:
+            writer = csv.writer(csvfile, delimiter=',')
+            writer.writerow(["id", "client_send", "client_rcv"]) 
+            for row in times:
+                writer.writerow(row)
+      
+        client = storage.storage.get_instance()
+        filename = 'results-{}.csv'.format(request_id)
+        key = client.upload(output_bucket, os.path.join(output_prefix, filename), '/tmp/data.csv')
+    else:
+        key = None
+
+    return { 'result': {'bucket-key': key, 'timestamp':  event['income-timestamp']} }
diff --git a/benchmarks/000.microbenchmarks/030.clock-synchronization/pypy/init.sh b/benchmarks/000.microbenchmarks/030.clock-synchronization/pypy/init.sh
new file mode 100755
index 000000000..ecf0aff65
--- /dev/null
+++ b/benchmarks/000.microbenchmarks/030.clock-synchronization/pypy/init.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+DIR=$1
+VERBOSE=$2
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+cp ${SCRIPT_DIR}/file ${DIR}
diff --git a/benchmarks/000.microbenchmarks/040.server-reply/config.json b/benchmarks/000.microbenchmarks/040.server-reply/config.json
index 93ce2f561..53f6349d6 100644
--- a/benchmarks/000.microbenchmarks/040.server-reply/config.json
+++ b/benchmarks/000.microbenchmarks/040.server-reply/config.json
@@ -1,6 +1,6 @@
 {
   "timeout": 120,
   "memory": 128,
-  "languages": ["python", "nodejs"],
+  "languages": ["python", "nodejs", "pypy"],
   "modules": []
 }
diff --git a/benchmarks/000.microbenchmarks/040.server-reply/pypy/function.py b/benchmarks/000.microbenchmarks/040.server-reply/pypy/function.py
new file mode 100644
index 000000000..fb5b57aa3
--- /dev/null
+++ b/benchmarks/000.microbenchmarks/040.server-reply/pypy/function.py
@@ -0,0 +1,13 @@
+
+import socket
+from time import sleep
+
+def handler(event):
+
+    # start timing
+    addr = (event.get('ip-address'), event.get('port'))
+    socket.setdefaulttimeout(20)
+    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    s.connect(addr)
+    msg = s.recv(1024).decode()
+    return {"result": msg}
diff --git a/benchmarks/100.webapps/120.uploader/config.json b/benchmarks/100.webapps/120.uploader/config.json
index cbc635670..90bf42e3b 100644
--- a/benchmarks/100.webapps/120.uploader/config.json
+++ b/benchmarks/100.webapps/120.uploader/config.json
@@ -1,6 +1,6 @@
 {
   "timeout": 30,
   "memory": 128,
-  "languages": ["python", "nodejs"],
+  "languages": ["python", "nodejs", "pypy"],
   "modules": ["storage"]
 }
diff --git a/benchmarks/100.webapps/120.uploader/pypy/function.py b/benchmarks/100.webapps/120.uploader/pypy/function.py
new file mode 100644
index 000000000..e7c55aa35
--- /dev/null
+++ b/benchmarks/100.webapps/120.uploader/pypy/function.py
@@ -0,0 +1,48 @@
+
+import datetime
+import os
+
+import urllib.request
+
+import storage
+client = storage.storage.get_instance()
+
+SEBS_USER_AGENT = "SeBS/1.2 (https://github.com/spcl/serverless-benchmarks) SeBS Benchmark Suite/1.2"
+
+def handler(event):
+
+    bucket = event.get('bucket').get('bucket')
+    output_prefix = event.get('bucket').get('output')
+    url = event.get('object').get('url')
+    name = os.path.basename(url)
+    download_path = '/tmp/{}'.format(name)
+
+    process_begin = datetime.datetime.now()
+    req = urllib.request.Request(url)
+    req.add_header('User-Agent', SEBS_USER_AGENT)
+    with open(download_path, 'wb') as f:
+        with urllib.request.urlopen(req) as response:
+            f.write(response.read())
+    size = os.path.getsize(download_path)
+    process_end = datetime.datetime.now()
+
+    upload_begin = datetime.datetime.now()
+    key_name = client.upload(bucket, os.path.join(output_prefix, name), download_path)
+    upload_end = datetime.datetime.now()
+
+    process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1)
+    upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1)
+    return {
+            'result': {
+                'bucket': bucket,
+                'url': url,
+                'key': key_name
+            },
+            'measurement': {
+                'download_time': 0,
+                'download_size': 0,
+                'upload_time': upload_time,
+                'upload_size': size,
+                'compute_time': process_time
+            }
+    }
diff --git a/benchmarks/100.webapps/120.uploader/pypy/requirements.txt b/benchmarks/100.webapps/120.uploader/pypy/requirements.txt
new file mode 100644
index 000000000..e69de29bb
diff --git a/benchmarks/100.webapps/130.crud-api/config.json b/benchmarks/100.webapps/130.crud-api/config.json
index 25c6cb05e..ba846d706 100644
--- a/benchmarks/100.webapps/130.crud-api/config.json
+++ b/benchmarks/100.webapps/130.crud-api/config.json
@@ -3,7 +3,8 @@
   "memory": 128,
   "languages": [
     "python",
-    "nodejs"
+    "nodejs",
+    "pypy"
   ],
   "modules": [
     "nosql"
diff --git a/benchmarks/100.webapps/130.crud-api/pypy/function.py b/benchmarks/100.webapps/130.crud-api/pypy/function.py
new file mode 100644
index 000000000..0b5e0e8c0
--- /dev/null
+++ b/benchmarks/100.webapps/130.crud-api/pypy/function.py
@@ -0,0 +1,67 @@
+from . import nosql
+
+nosql_client = nosql.nosql.get_instance()
+
+nosql_table_name = "shopping_cart"
+
+
+def add_product(cart_id: str, product_id: str, product_name: str, price: float, quantity: int):
+
+    nosql_client.insert(
+        nosql_table_name,
+        ("cart_id", cart_id),
+        ("product_id", product_id),
+        {"price": price, "quantity": quantity, "name": product_name},
+    )
+
+
+def get_products(cart_id: str, product_id: str):
+    return nosql_client.get(nosql_table_name, ("cart_id", cart_id), ("product_id", product_id))
+
+
+def query_products(cart_id: str):
+
+    res = nosql_client.query(
+        nosql_table_name,
+        ("cart_id", cart_id),
+        "product_id",
+    )
+
+    products = []
+    price_sum = 0
+    quantity_sum = 0
+    for product in res:
+
+        products.append(product["name"])
+        price_sum += product["price"]
+        quantity_sum += product["quantity"]
+
+    avg_price = price_sum / quantity_sum if quantity_sum > 0 else 0.0
+
+    return {"products": products, "total_cost": price_sum, "avg_price": avg_price}
+
+
+def handler(event):
+
+    results = []
+
+    for request in event["requests"]:
+
+        route = request["route"]
+        body = request["body"]
+
+        if route == "PUT /cart":
+            add_product(
+                body["cart"], body["product_id"], body["name"], body["price"], body["quantity"]
+            )
+            res = {}
+        elif route == "GET /cart/{id}":
+            res = get_products(body["cart"], request["path"]["id"])
+        elif route == "GET /cart":
+            res = query_products(body["cart"])
+        else:
+            raise RuntimeError(f"Unknown request route: {route}")
+
+        results.append(res)
+
+    return {"result": results}
diff --git a/benchmarks/200.multimedia/210.thumbnailer/config.json b/benchmarks/200.multimedia/210.thumbnailer/config.json
index 8edb99e52..c250fe15b 100644
--- a/benchmarks/200.multimedia/210.thumbnailer/config.json
+++ b/benchmarks/200.multimedia/210.thumbnailer/config.json
@@ -1,6 +1,6 @@
 {
   "timeout": 60,
   "memory": 256,
-  "languages": ["python", "nodejs"],
+  "languages": ["python", "nodejs", "pypy"],
   "modules": ["storage"]
 }
diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/function.py b/benchmarks/200.multimedia/210.thumbnailer/pypy/function.py
new file mode 100644
index 000000000..ba24f9176
--- /dev/null
+++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/function.py
@@ -0,0 +1,70 @@
+import datetime
+import io
+import os
+import sys
+import uuid
+from urllib.parse import unquote_plus
+from PIL import Image
+
+import storage
+client = storage.storage.get_instance()
+
+# Disk-based solution
+#def resize_image(image_path, resized_path, w, h):
+#    with Image.open(image_path) as image:
+#        image.thumbnail((w,h))
+#        image.save(resized_path)
+
+# Memory-based solution
+def resize_image(image_bytes, w, h):
+    with Image.open(io.BytesIO(image_bytes)) as image:
+        image.thumbnail((w,h))
+        out = io.BytesIO()
+        image.save(out, format='jpeg')
+        # necessary to rewind to the beginning of the buffer
+        out.seek(0)
+        return out
+
+def handler(event):
+  
+    bucket = event.get('bucket').get('bucket')
+    input_prefix = event.get('bucket').get('input')
+    output_prefix = event.get('bucket').get('output')
+    key = unquote_plus(event.get('object').get('key'))
+    width = event.get('object').get('width')
+    height = event.get('object').get('height')
+    # UUID to handle multiple calls
+    #download_path = '/tmp/{}-{}'.format(uuid.uuid4(), key)
+    #upload_path = '/tmp/resized-{}'.format(key)
+    #client.download(input_bucket, key, download_path)
+    #resize_image(download_path, upload_path, width, height)
+    #client.upload(output_bucket, key, upload_path)
+    download_begin = datetime.datetime.now()
+    img = client.download_stream(bucket, os.path.join(input_prefix, key))
+    download_end = datetime.datetime.now()
+
+    process_begin = datetime.datetime.now()
+    resized = resize_image(img, width, height)
+    resized_size = resized.getbuffer().nbytes
+    process_end = datetime.datetime.now()
+
+    upload_begin = datetime.datetime.now()
+    key_name = client.upload_stream(bucket, os.path.join(output_prefix, key), resized)
+    upload_end = datetime.datetime.now()
+
+    download_time = (download_end - download_begin) / datetime.timedelta(microseconds=1)
+    upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1)
+    process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1)
+    return {
+            'result': {
+                'bucket': bucket,
+                'key': key_name
+            },
+            'measurement': {
+                'download_time': download_time,
+                'download_size': len(img),
+                'upload_time': upload_time,
+                'upload_size': resized_size,
+                'compute_time': process_time
+            }
+    }
diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt
new file mode 100644
index 000000000..f29e80646
--- /dev/null
+++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt
@@ -0,0 +1 @@
+Pillow>=8.0.0
diff --git a/benchmarks/200.multimedia/220.video-processing/config.json b/benchmarks/200.multimedia/220.video-processing/config.json
index 94ede7925..8e00a88e2 100644
--- a/benchmarks/200.multimedia/220.video-processing/config.json
+++ b/benchmarks/200.multimedia/220.video-processing/config.json
@@ -1,6 +1,6 @@
 {
   "timeout": 60,
   "memory": 512,
-  "languages": ["python"],
+  "languages": ["python", "pypy"],
   "modules": ["storage"]
 }
diff --git a/benchmarks/200.multimedia/220.video-processing/pypy/function.py b/benchmarks/200.multimedia/220.video-processing/pypy/function.py
new file mode 100644
index 000000000..af5c09a4d
--- /dev/null
+++ b/benchmarks/200.multimedia/220.video-processing/pypy/function.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python
+
+import datetime
+import os
+import stat
+import subprocess
+
+
+import storage
+client = storage.storage.get_instance()
+
+SCRIPT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__)))
+
+def call_ffmpeg(args):
+    ret = subprocess.run([os.path.join(SCRIPT_DIR, 'ffmpeg', 'ffmpeg'), '-y'] + args,
+            #subprocess might inherit Lambda's input for some reason
+            stdin=subprocess.DEVNULL,
+            stdout=subprocess.PIPE, stderr=subprocess.STDOUT
+    )
+    if ret.returncode != 0:
+        print('Invocation of ffmpeg failed!')
+        print('Out: ', ret.stdout.decode('utf-8'))
+        raise RuntimeError()
+
+# https://superuser.com/questions/556029/how-do-i-convert-a-video-to-gif-using-ffmpeg-with-reasonable-quality
+def to_gif(video, duration, event):
+    output = '/tmp/processed-{}.gif'.format(os.path.basename(video))
+    call_ffmpeg(["-i", video,
+        "-t",
+        "{0}".format(duration),
+        "-vf",
+        "fps=10,scale=320:-1:flags=lanczos,split[s0][s1];[s0]palettegen[p];[s1][p]paletteuse",
+        "-loop", "0",
+        output])
+    return output
+
+# https://devopstar.com/2019/01/28/serverless-watermark-using-aws-lambda-layers-ffmpeg/
+def watermark(video, duration, event):
+    output = '/tmp/processed-{}'.format(os.path.basename(video))
+    watermark_file = os.path.dirname(os.path.realpath(__file__))
+    call_ffmpeg([
+        "-i", video,
+        "-i", os.path.join(watermark_file, os.path.join('resources', 'watermark.png')),
+        "-t", "{0}".format(duration),
+        "-filter_complex", "overlay=main_w/2-overlay_w/2:main_h/2-overlay_h/2",
+        output])
+    return output
+
+def transcode_mp3(video, duration, event):
+    pass
+
+operations = { 'transcode' : transcode_mp3, 'extract-gif' : to_gif, 'watermark' : watermark }
+
+def handler(event):
+
+    bucket = event.get('bucket').get('bucket')
+    input_prefix = event.get('bucket').get('input')
+    output_prefix = event.get('bucket').get('output')
+    key = event.get('object').get('key')
+    duration = event.get('object').get('duration')
+    op = event.get('object').get('op')
+    download_path = '/tmp/{}'.format(key)
+
+    # Restore executable permission
+    ffmpeg_binary = os.path.join(SCRIPT_DIR, 'ffmpeg', 'ffmpeg')
+    # needed on Azure but read-only filesystem on AWS
+    try:
+        st = os.stat(ffmpeg_binary)
+        os.chmod(ffmpeg_binary, st.st_mode | stat.S_IEXEC)
+    except OSError:
+        pass
+
+    download_begin = datetime.datetime.now()
+    client.download(bucket, os.path.join(input_prefix, key), download_path)
+    download_size = os.path.getsize(download_path)
+    download_stop = datetime.datetime.now()
+
+    process_begin = datetime.datetime.now()
+    upload_path = operations[op](download_path, duration, event)
+    process_end = datetime.datetime.now()
+
+    upload_begin = datetime.datetime.now()
+    filename = os.path.basename(upload_path)
+    upload_size = os.path.getsize(upload_path)
+    upload_key = client.upload(bucket, os.path.join(output_prefix, filename), upload_path)
+    upload_stop = datetime.datetime.now()
+
+    download_time = (download_stop - download_begin) / datetime.timedelta(microseconds=1)
+    upload_time = (upload_stop - upload_begin) / datetime.timedelta(microseconds=1)
+    process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1)
+    return {
+            'result': {
+                'bucket': bucket,
+                'key': upload_key
+            },
+            'measurement': {
+                'download_time': download_time,
+                'download_size': download_size,
+                'upload_time': upload_time,
+                'upload_size': upload_size,
+                'compute_time': process_time
+            }
+        }
diff --git a/benchmarks/200.multimedia/220.video-processing/pypy/init.sh b/benchmarks/200.multimedia/220.video-processing/pypy/init.sh
new file mode 100755
index 000000000..de6048a36
--- /dev/null
+++ b/benchmarks/200.multimedia/220.video-processing/pypy/init.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+DIR=$1
+VERBOSE=$2
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+path="${SCRIPT_DIR}/../init.sh"
+if [ "$VERBOSE" = true ]; then
+  echo "Update ${DIR} with init script ${path}"
+fi
+bash ${path} ${DIR} ${VERBOSE}
diff --git a/benchmarks/200.multimedia/220.video-processing/pypy/requirements.txt b/benchmarks/200.multimedia/220.video-processing/pypy/requirements.txt
new file mode 100644
index 000000000..e69de29bb
diff --git a/benchmarks/300.utilities/311.compression/config.json b/benchmarks/300.utilities/311.compression/config.json
index 8edb99e52..c250fe15b 100644
--- a/benchmarks/300.utilities/311.compression/config.json
+++ b/benchmarks/300.utilities/311.compression/config.json
@@ -1,6 +1,6 @@
 {
   "timeout": 60,
   "memory": 256,
-  "languages": ["python", "nodejs"],
+  "languages": ["python", "nodejs", "pypy"],
   "modules": ["storage"]
 }
diff --git a/benchmarks/300.utilities/311.compression/pypy/function.py b/benchmarks/300.utilities/311.compression/pypy/function.py
new file mode 100644
index 000000000..1078ca6a2
--- /dev/null
+++ b/benchmarks/300.utilities/311.compression/pypy/function.py
@@ -0,0 +1,58 @@
+import datetime
+import io
+import os
+import shutil
+import uuid
+import zlib
+
+import storage
+client = storage.storage.get_instance()
+
+def parse_directory(directory):
+
+    size = 0
+    for root, dirs, files in os.walk(directory):
+        for file in files:
+            size += os.path.getsize(os.path.join(root, file))
+    return size
+
+def handler(event):
+  
+    bucket = event.get('bucket').get('bucket')
+    input_prefix = event.get('bucket').get('input')
+    output_prefix = event.get('bucket').get('output')
+    key = event.get('object').get('key')
+    download_path = '/tmp/{}-{}'.format(key, uuid.uuid4())
+    os.makedirs(download_path)
+
+    s3_download_begin = datetime.datetime.now()
+    client.download_directory(bucket, os.path.join(input_prefix, key), download_path)
+    s3_download_stop = datetime.datetime.now()
+    size = parse_directory(download_path)
+
+    compress_begin = datetime.datetime.now()
+    shutil.make_archive(os.path.join(download_path, key), 'zip', root_dir=download_path)
+    compress_end = datetime.datetime.now()
+
+    s3_upload_begin = datetime.datetime.now()
+    archive_name = '{}.zip'.format(key)
+    archive_size = os.path.getsize(os.path.join(download_path, archive_name))
+    key_name = client.upload(bucket, os.path.join(output_prefix, archive_name), os.path.join(download_path, archive_name))
+    s3_upload_stop = datetime.datetime.now()
+
+    download_time = (s3_download_stop - s3_download_begin) / datetime.timedelta(microseconds=1)
+    upload_time = (s3_upload_stop - s3_upload_begin) / datetime.timedelta(microseconds=1)
+    process_time = (compress_end - compress_begin) / datetime.timedelta(microseconds=1)
+    return {
+            'result': {
+                'bucket': bucket,
+                'key': key_name
+            },
+            'measurement': {
+                'download_time': download_time,
+                'download_size': size,
+                'upload_time': upload_time,
+                'upload_size': archive_size,
+                'compute_time': process_time
+            }
+        }
diff --git a/benchmarks/300.utilities/311.compression/pypy/requirements.txt b/benchmarks/300.utilities/311.compression/pypy/requirements.txt
new file mode 100644
index 000000000..e69de29bb
diff --git a/benchmarks/500.scientific/501.graph-pagerank/config.json b/benchmarks/500.scientific/501.graph-pagerank/config.json
index e80fb4351..a97e13cf5 100644
--- a/benchmarks/500.scientific/501.graph-pagerank/config.json
+++ b/benchmarks/500.scientific/501.graph-pagerank/config.json
@@ -1,6 +1,6 @@
 {
   "timeout": 120,
   "memory": 512,
-  "languages": ["python"],
+  "languages": ["python", "pypy"],
   "modules": []
 }
diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/function.py b/benchmarks/500.scientific/501.graph-pagerank/pypy/function.py
new file mode 100644
index 000000000..0e462e9b4
--- /dev/null
+++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/function.py
@@ -0,0 +1,29 @@
+import datetime
+import igraph
+
+def handler(event):
+
+    size = event.get('size')
+    if "seed" in event:
+        import random
+
+        random.seed(event["seed"])
+
+    graph_generating_begin = datetime.datetime.now()
+    graph = igraph.Graph.Barabasi(size, 10)
+    graph_generating_end = datetime.datetime.now()
+
+    process_begin = datetime.datetime.now()
+    result = graph.pagerank()
+    process_end = datetime.datetime.now()
+
+    graph_generating_time = (graph_generating_end - graph_generating_begin) / datetime.timedelta(microseconds=1)
+    process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1)
+
+    return {
+            'result': result[0],
+            'measurement': {
+                'graph_generating_time': graph_generating_time,
+                'compute_time': process_time
+            }
+    }
diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt
new file mode 100644
index 000000000..b6287cf18
--- /dev/null
+++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt
@@ -0,0 +1 @@
+python-igraph>=0.9.0
diff --git a/benchmarks/500.scientific/502.graph-mst/config.json b/benchmarks/500.scientific/502.graph-mst/config.json
index e80fb4351..a97e13cf5 100644
--- a/benchmarks/500.scientific/502.graph-mst/config.json
+++ b/benchmarks/500.scientific/502.graph-mst/config.json
@@ -1,6 +1,6 @@
 {
   "timeout": 120,
   "memory": 512,
-  "languages": ["python"],
+  "languages": ["python", "pypy"],
   "modules": []
 }
diff --git a/benchmarks/500.scientific/502.graph-mst/pypy/function.py b/benchmarks/500.scientific/502.graph-mst/pypy/function.py
new file mode 100644
index 000000000..b63fbdce2
--- /dev/null
+++ b/benchmarks/500.scientific/502.graph-mst/pypy/function.py
@@ -0,0 +1,29 @@
+import datetime
+import igraph
+
+def handler(event):
+
+    size = event.get('size')
+    if "seed" in event:
+        import random
+
+        random.seed(event["seed"])
+
+    graph_generating_begin = datetime.datetime.now()
+    graph = igraph.Graph.Barabasi(size, 10)
+    graph_generating_end = datetime.datetime.now()
+
+    process_begin = datetime.datetime.now()
+    result = graph.spanning_tree(None, False)
+    process_end = datetime.datetime.now()
+
+    graph_generating_time = (graph_generating_end - graph_generating_begin) / datetime.timedelta(microseconds=1)
+    process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1)
+
+    return {
+            'result': result[0],
+            'measurement': {
+                'graph_generating_time': graph_generating_time,
+                'compute_time': process_time
+            }
+    }
diff --git a/benchmarks/500.scientific/502.graph-mst/pypy/requirements.txt b/benchmarks/500.scientific/502.graph-mst/pypy/requirements.txt
new file mode 100644
index 000000000..b6287cf18
--- /dev/null
+++ b/benchmarks/500.scientific/502.graph-mst/pypy/requirements.txt
@@ -0,0 +1 @@
+python-igraph>=0.9.0
diff --git a/benchmarks/500.scientific/503.graph-bfs/config.json b/benchmarks/500.scientific/503.graph-bfs/config.json
index e80fb4351..a97e13cf5 100644
--- a/benchmarks/500.scientific/503.graph-bfs/config.json
+++ b/benchmarks/500.scientific/503.graph-bfs/config.json
@@ -1,6 +1,6 @@
 {
   "timeout": 120,
   "memory": 512,
-  "languages": ["python"],
+  "languages": ["python", "pypy"],
   "modules": []
 }
diff --git a/benchmarks/500.scientific/503.graph-bfs/pypy/function.py b/benchmarks/500.scientific/503.graph-bfs/pypy/function.py
new file mode 100644
index 000000000..18423ae1a
--- /dev/null
+++ b/benchmarks/500.scientific/503.graph-bfs/pypy/function.py
@@ -0,0 +1,29 @@
+import datetime
+import igraph
+
+def handler(event):
+
+    size = event.get('size')
+    if "seed" in event:
+        import random
+
+        random.seed(event["seed"])
+
+    graph_generating_begin = datetime.datetime.now()
+    graph = igraph.Graph.Barabasi(size, 10)
+    graph_generating_end = datetime.datetime.now()
+
+    process_begin = datetime.datetime.now()
+    result = graph.bfs(0)
+    process_end = datetime.datetime.now()
+
+    graph_generating_time = (graph_generating_end - graph_generating_begin) / datetime.timedelta(microseconds=1)
+    process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1)
+
+    return {
+            'result': result,
+            'measurement': {
+                'graph_generating_time': graph_generating_time,
+                'compute_time': process_time
+            }
+    }
diff --git a/benchmarks/500.scientific/503.graph-bfs/pypy/requirements.txt b/benchmarks/500.scientific/503.graph-bfs/pypy/requirements.txt
new file mode 100644
index 000000000..b6287cf18
--- /dev/null
+++ b/benchmarks/500.scientific/503.graph-bfs/pypy/requirements.txt
@@ -0,0 +1 @@
+python-igraph>=0.9.0
diff --git a/config/systems.json b/config/systems.json
index ff6f872be..c91b2c5c3 100644
--- a/config/systems.json
+++ b/config/systems.json
@@ -235,7 +235,7 @@
       "pypy": {
         "base_images": {
           "x64": {
-            "3.11": "ubuntu:22.04"
+            "3.11": "ubuntu:20.04"
           }
         },
         "images": [
diff --git a/sebs/benchmark.py b/sebs/benchmark.py
index 39b919ef7..f868e25fd 100644
--- a/sebs/benchmark.py
+++ b/sebs/benchmark.py
@@ -374,6 +374,8 @@ def add_deployment_package_python(self, output_dir):
             )
             for package in packages:
                 out.write(package)
+                if not package.endswith('\n'):
+                    out.write('\n')
 
             module_packages = self._system_config.deployment_module_packages(
                 self._deployment_name, self.language_name
@@ -382,6 +384,8 @@ def add_deployment_package_python(self, output_dir):
                 if bench_module.value in module_packages:
                     for package in module_packages[bench_module.value]:
                         out.write(package)
+                        if not package.endswith('\n'):
+                            out.write('\n')
 
     def add_deployment_package_nodejs(self, output_dir):
         # modify package.json
diff --git a/tools/build_docker_images.py b/tools/build_docker_images.py
index 365792fc3..6b194266e 100755
--- a/tools/build_docker_images.py
+++ b/tools/build_docker_images.py
@@ -63,6 +63,15 @@ def build(image_type, system, language=None, version=None, version_name=None):
     }
     if PLATFORM:
         build_kwargs["platform"] = PLATFORM
+    elif system in config and "architecture" in config[system]:
+        archs = config[system]["architecture"]
+        if len(archs) == 1:
+            if archs[0] == "x64":
+                build_kwargs["platform"] = "linux/amd64"
+                print(f"Automatically using platform linux/amd64 for {system}")
+            elif archs[0] == "arm64":
+                build_kwargs["platform"] = "linux/arm64"
+                print(f"Automatically using platform linux/arm64 for {system}")
 
     try:
         client.images.build(**build_kwargs)

From 274856db6e439ad05cd56e883f549d402ede0d1b Mon Sep 17 00:00:00 2001
From: Alexander Schlieper <alexander@schlieper.ch>
Date: Thu, 11 Dec 2025 13:52:06 +0100
Subject: [PATCH 06/31] Add Java runtime support for AWS Lambda and Azure
 Functions

Implements complete Java runtime infrastructure enabling serverless
function benchmarking on AWS Lambda and Azure Functions platforms.

Key components:
- AWS/Azure function wrappers with cold start tracking and JSON handling
- Maven-based build system with shaded JAR packaging
- Docker build images for both platforms (Java 17)
- Package structure: lib/ for JARs, handler/ for function metadata

Benchmarks implemented:
- 010.sleep: Basic microbenchmark for testing infrastructure
- 110.dynamic-html: Web app with Mustache templating

Critical fixes:
- Removed Maven quiet mode (-q) causing Docker build failures
- Proper dependency scoping (Azure Functions and Jackson must be compiled, not provided)
- Platform-specific packaging logic (JAR structure differs from Python/Node.js)

Tested and verified working on Azure Functions with Java 17.
---
 .../000.microbenchmarks/010.sleep/config.json |   2 +-
 .../java/src/main/java/function/Function.java |  33 ++++++
 .../100.webapps/110.dynamic-html/config.json  |   2 +-
 .../100.webapps/110.dynamic-html/java/init.sh |   6 ++
 .../100.webapps/110.dynamic-html/java/pom.xml |  73 +++++++++++++
 .../java/src/main/java/function/Function.java | 100 ++++++++++++++++++
 .../java/templates/template.html              |  26 +++++
 benchmarks/wrappers/aws/java/pom.xml          |  55 ++++++++++
 .../org/serverlessbench/ColdStartTracker.java |  35 ++++++
 .../org/serverlessbench/FunctionInvoker.java  |  41 +++++++
 .../java/org/serverlessbench/Handler.java     |  49 +++++++++
 benchmarks/wrappers/azure/java/pom.xml        |  71 +++++++++++++
 .../org/serverlessbench/ColdStartTracker.java |  33 ++++++
 .../org/serverlessbench/FunctionInvoker.java  |  41 +++++++
 .../java/org/serverlessbench/Handler.java     |  79 ++++++++++++++
 config/systems.json                           |  40 +++++++
 dockerfiles/aws/java/Dockerfile.build         |  22 ++++
 dockerfiles/aws/java/Dockerfile.function      |  16 +++
 dockerfiles/azure/java/Dockerfile.build       |  18 ++++
 dockerfiles/java_installer.sh                 |  19 ++++
 sebs.py                                       |   2 +-
 sebs/aws/aws.py                               |  54 ++++++----
 sebs/azure/azure.py                           |  94 +++++++++++++---
 sebs/benchmark.py                             |  45 ++++++--
 sebs/faas/function.py                         |   3 +-
 tools/build_docker_images.py                  |  19 +++-
 26 files changed, 927 insertions(+), 51 deletions(-)
 create mode 100644 benchmarks/000.microbenchmarks/010.sleep/java/src/main/java/function/Function.java
 create mode 100755 benchmarks/100.webapps/110.dynamic-html/java/init.sh
 create mode 100644 benchmarks/100.webapps/110.dynamic-html/java/pom.xml
 create mode 100644 benchmarks/100.webapps/110.dynamic-html/java/src/main/java/function/Function.java
 create mode 100644 benchmarks/100.webapps/110.dynamic-html/java/templates/template.html
 create mode 100644 benchmarks/wrappers/aws/java/pom.xml
 create mode 100644 benchmarks/wrappers/aws/java/src/main/java/org/serverlessbench/ColdStartTracker.java
 create mode 100644 benchmarks/wrappers/aws/java/src/main/java/org/serverlessbench/FunctionInvoker.java
 create mode 100644 benchmarks/wrappers/aws/java/src/main/java/org/serverlessbench/Handler.java
 create mode 100644 benchmarks/wrappers/azure/java/pom.xml
 create mode 100644 benchmarks/wrappers/azure/java/src/main/java/org/serverlessbench/ColdStartTracker.java
 create mode 100644 benchmarks/wrappers/azure/java/src/main/java/org/serverlessbench/FunctionInvoker.java
 create mode 100644 benchmarks/wrappers/azure/java/src/main/java/org/serverlessbench/Handler.java
 create mode 100644 dockerfiles/aws/java/Dockerfile.build
 create mode 100644 dockerfiles/aws/java/Dockerfile.function
 create mode 100644 dockerfiles/azure/java/Dockerfile.build
 create mode 100644 dockerfiles/java_installer.sh

diff --git a/benchmarks/000.microbenchmarks/010.sleep/config.json b/benchmarks/000.microbenchmarks/010.sleep/config.json
index 93ce2f561..ce9e1e321 100644
--- a/benchmarks/000.microbenchmarks/010.sleep/config.json
+++ b/benchmarks/000.microbenchmarks/010.sleep/config.json
@@ -1,6 +1,6 @@
 {
   "timeout": 120,
   "memory": 128,
-  "languages": ["python", "nodejs"],
+  "languages": ["python", "nodejs", "java"],
   "modules": []
 }
diff --git a/benchmarks/000.microbenchmarks/010.sleep/java/src/main/java/function/Function.java b/benchmarks/000.microbenchmarks/010.sleep/java/src/main/java/function/Function.java
new file mode 100644
index 000000000..acd2b8f32
--- /dev/null
+++ b/benchmarks/000.microbenchmarks/010.sleep/java/src/main/java/function/Function.java
@@ -0,0 +1,33 @@
+package function;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class Function {
+
+    public Map<String, Object> handler(Map<String, Object> event) {
+        double sleepSeconds = parseSeconds(event.get("sleep"));
+        try {
+            Thread.sleep((long) (sleepSeconds * 1000));
+        } catch (InterruptedException e) {
+            Thread.currentThread().interrupt();
+        }
+        Map<String, Object> result = new HashMap<>();
+        result.put("result", sleepSeconds);
+        return result;
+    }
+
+    private double parseSeconds(Object value) {
+        if (value instanceof Number) {
+            return ((Number) value).doubleValue();
+        }
+        if (value instanceof String) {
+            try {
+                return Double.parseDouble((String) value);
+            } catch (NumberFormatException ignored) {
+                return 0;
+            }
+        }
+        return 0;
+    }
+}
diff --git a/benchmarks/100.webapps/110.dynamic-html/config.json b/benchmarks/100.webapps/110.dynamic-html/config.json
index 25254c247..7e317037b 100644
--- a/benchmarks/100.webapps/110.dynamic-html/config.json
+++ b/benchmarks/100.webapps/110.dynamic-html/config.json
@@ -1,6 +1,6 @@
 {
   "timeout": 10,
   "memory": 128,
-  "languages": ["python", "nodejs"],
+  "languages": ["python", "nodejs", "java"],
   "modules": []
 }
diff --git a/benchmarks/100.webapps/110.dynamic-html/java/init.sh b/benchmarks/100.webapps/110.dynamic-html/java/init.sh
new file mode 100755
index 000000000..b26574290
--- /dev/null
+++ b/benchmarks/100.webapps/110.dynamic-html/java/init.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+OUTPUT_DIR=$1
+
+# Copy templates directory to the output directory
+cp -r templates "$OUTPUT_DIR/"
diff --git a/benchmarks/100.webapps/110.dynamic-html/java/pom.xml b/benchmarks/100.webapps/110.dynamic-html/java/pom.xml
new file mode 100644
index 000000000..fb7e685e6
--- /dev/null
+++ b/benchmarks/100.webapps/110.dynamic-html/java/pom.xml
@@ -0,0 +1,73 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <groupId>function</groupId>
+    <artifactId>dynamic-html</artifactId>
+    <version>1.0</version>
+    <properties>
+        <maven.compiler.source>17</maven.compiler.source>
+        <maven.compiler.target>17</maven.compiler.target>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    </properties>
+    <dependencies>
+        <!-- Mustache templating engine -->
+        <dependency>
+            <groupId>com.github.spullara.mustache.java</groupId>
+            <artifactId>compiler</artifactId>
+            <version>0.9.10</version>
+        </dependency>
+        <!-- Azure Functions (required by wrapper) -->
+        <dependency>
+            <groupId>com.microsoft.azure.functions</groupId>
+            <artifactId>azure-functions-java-library</artifactId>
+            <version>3.0.0</version>
+        </dependency>
+        <!-- Jackson (required by wrapper) -->
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-databind</artifactId>
+            <version>2.17.1</version>
+        </dependency>
+    </dependencies>
+    <build>
+        <resources>
+            <resource>
+                <directory>${project.basedir}/templates</directory>
+                <targetPath>templates</targetPath>
+                <includes>
+                    <include>**/*.html</include>
+                </includes>
+            </resource>
+        </resources>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-shade-plugin</artifactId>
+                <version>3.5.1</version>
+                <executions>
+                    <execution>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>shade</goal>
+                        </goals>
+                        <configuration>
+                            <createDependencyReducedPom>false</createDependencyReducedPom>
+                            <filters>
+                                <filter>
+                                    <artifact>*:*</artifact>
+                                    <excludes>
+                                        <exclude>META-INF/*.SF</exclude>
+                                        <exclude>META-INF/*.RSA</exclude>
+                                        <exclude>META-INF/*.DSA</exclude>
+                                    </excludes>
+                                </filter>
+                            </filters>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+</project>
diff --git a/benchmarks/100.webapps/110.dynamic-html/java/src/main/java/function/Function.java b/benchmarks/100.webapps/110.dynamic-html/java/src/main/java/function/Function.java
new file mode 100644
index 000000000..20b38f474
--- /dev/null
+++ b/benchmarks/100.webapps/110.dynamic-html/java/src/main/java/function/Function.java
@@ -0,0 +1,100 @@
+package function;
+
+import com.github.mustachejava.DefaultMustacheFactory;
+import com.github.mustachejava.Mustache;
+import com.github.mustachejava.MustacheFactory;
+
+import java.io.*;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.*;
+
+public class Function {
+
+    private static final DateTimeFormatter DATE_FORMATTER = 
+        DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+    
+    public Map<String, Object> handler(Map<String, Object> event) {
+        try {
+            // Get input parameters
+            String username = (String) event.getOrDefault("username", "Guest");
+            int randomLen = parseRandomLen(event.get("random_len"));
+            
+            // Generate random numbers
+            List<Integer> randomNumbers = generateRandomNumbers(randomLen);
+            
+            // Get current time
+            String currentTime = LocalDateTime.now().format(DATE_FORMATTER);
+            
+            // Prepare template data
+            Map<String, Object> templateData = new HashMap<>();
+            templateData.put("username", username);
+            templateData.put("cur_time", currentTime);
+            templateData.put("random_numbers", randomNumbers);
+            
+            // Render HTML
+            String html = renderTemplate(templateData);
+            
+            // Return result
+            Map<String, Object> result = new HashMap<>();
+            result.put("result", html);
+            return result;
+            
+        } catch (Exception e) {
+            // Return error as result to avoid crashing
+            Map<String, Object> result = new HashMap<>();
+            StringWriter sw = new StringWriter();
+            PrintWriter pw = new PrintWriter(sw);
+            e.printStackTrace(pw);
+            result.put("result", "<html><body><h1>Error</h1><pre>" + 
+                      sw.toString() + "</pre></body></html>");
+            return result;
+        }
+    }
+    
+    private int parseRandomLen(Object value) {
+        if (value instanceof Number) {
+            return ((Number) value).intValue();
+        }
+        if (value instanceof String) {
+            try {
+                return Integer.parseInt((String) value);
+            } catch (NumberFormatException e) {
+                return 10; // default
+            }
+        }
+        return 10; // default
+    }
+    
+    private List<Integer> generateRandomNumbers(int count) {
+        Random random = new Random();
+        List<Integer> numbers = new ArrayList<>(count);
+        for (int i = 0; i < count; i++) {
+            numbers.add(random.nextInt(1000000));
+        }
+        return numbers;
+    }
+    
+    private String renderTemplate(Map<String, Object> data) throws Exception {
+        // Try to load template from classpath
+        InputStream templateStream = getClass().getClassLoader()
+            .getResourceAsStream("templates/template.html");
+        
+        if (templateStream == null) {
+            throw new IOException("Template not found in classpath");
+        }
+        
+        // Create Mustache factory and compile template
+        MustacheFactory mf = new DefaultMustacheFactory();
+        Mustache mustache;
+        
+        try (InputStreamReader reader = new InputStreamReader(templateStream)) {
+            mustache = mf.compile(reader, "template");
+        }
+        
+        // Render template
+        StringWriter writer = new StringWriter();
+        mustache.execute(writer, data).flush();
+        return writer.toString();
+    }
+}
diff --git a/benchmarks/100.webapps/110.dynamic-html/java/templates/template.html b/benchmarks/100.webapps/110.dynamic-html/java/templates/template.html
new file mode 100644
index 000000000..46199563c
--- /dev/null
+++ b/benchmarks/100.webapps/110.dynamic-html/java/templates/template.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <title>Randomly generated data.</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <link href="http://netdna.bootstrapcdn.com/bootstrap/3.0.0/css/bootstrap.min.css" rel="stylesheet" media="screen">
+    <style type="text/css">
+      .container {
+        max-width: 500px;
+        padding-top: 100px;
+      }
+    </style>
+  </head>
+  <body>
+    <div class="container">
+      <p>Welcome {{username}}!</p>
+      <p>Data generated at: {{cur_time}}!</p>
+      <p>Requested random numbers:</p>
+      <ul>
+        {{#random_numbers}}
+        <li>{{.}}</li>
+        {{/random_numbers}}
+      </ul>
+    </div>
+  </body>
+</html>
diff --git a/benchmarks/wrappers/aws/java/pom.xml b/benchmarks/wrappers/aws/java/pom.xml
new file mode 100644
index 000000000..f4d083216
--- /dev/null
+++ b/benchmarks/wrappers/aws/java/pom.xml
@@ -0,0 +1,55 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <groupId>org.serverlessbench</groupId>
+    <artifactId>function</artifactId>
+    <version>1.0.0</version>
+    <properties>
+        <maven.compiler.source>17</maven.compiler.source>
+        <maven.compiler.target>17</maven.compiler.target>
+    </properties>
+    <dependencies>
+        <dependency>
+            <groupId>com.amazonaws</groupId>
+            <artifactId>aws-lambda-java-core</artifactId>
+            <version>1.2.3</version>
+        </dependency>
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-databind</artifactId>
+            <version>2.17.1</version>
+        </dependency>
+    </dependencies>
+    <build>
+        <finalName>function</finalName>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-shade-plugin</artifactId>
+                <version>3.5.1</version>
+                <executions>
+                    <execution>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>shade</goal>
+                        </goals>
+                        <configuration>
+                            <createDependencyReducedPom>false</createDependencyReducedPom>
+                            <filters>
+                                <filter>
+                                    <artifact>*:*</artifact>
+                                    <excludes>
+                                        <exclude>META-INF/*.SF</exclude>
+                                        <exclude>META-INF/*.RSA</exclude>
+                                        <exclude>META-INF/*.DSA</exclude>
+                                    </excludes>
+                                </filter>
+                            </filters>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+</project>
diff --git a/benchmarks/wrappers/aws/java/src/main/java/org/serverlessbench/ColdStartTracker.java b/benchmarks/wrappers/aws/java/src/main/java/org/serverlessbench/ColdStartTracker.java
new file mode 100644
index 000000000..e7cb2e011
--- /dev/null
+++ b/benchmarks/wrappers/aws/java/src/main/java/org/serverlessbench/ColdStartTracker.java
@@ -0,0 +1,35 @@
+package org.serverlessbench;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.UUID;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+final class ColdStartTracker {
+
+    private static final AtomicBoolean COLD = new AtomicBoolean(true);
+    private static final Path MARKER = Path.of("/tmp/cold_run");
+
+    private ColdStartTracker() {}
+
+    static boolean isCold() {
+        if (Files.exists(MARKER)) {
+            COLD.set(false);
+            return false;
+        }
+        boolean first = COLD.getAndSet(false);
+        if (first) {
+            try {
+                Files.writeString(
+                        MARKER,
+                        UUID.randomUUID().toString().substring(0, 8),
+                        StandardCharsets.UTF_8);
+            } catch (IOException ignored) {
+                // best-effort marker write
+            }
+        }
+        return first;
+    }
+}
diff --git a/benchmarks/wrappers/aws/java/src/main/java/org/serverlessbench/FunctionInvoker.java b/benchmarks/wrappers/aws/java/src/main/java/org/serverlessbench/FunctionInvoker.java
new file mode 100644
index 000000000..7d9c83570
--- /dev/null
+++ b/benchmarks/wrappers/aws/java/src/main/java/org/serverlessbench/FunctionInvoker.java
@@ -0,0 +1,41 @@
+package org.serverlessbench;
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.HashMap;
+import java.util.Map;
+
+final class FunctionInvoker {
+
+    private static final String DEFAULT_CLASS = "function.Function";
+    private static final String DEFAULT_METHOD = "handler";
+
+    private FunctionInvoker() {}
+
+    static Map<String, Object> invoke(Map<String, Object> input) {
+        try {
+            Class<?> fnClass = Class.forName(DEFAULT_CLASS);
+            Object instance = fnClass.getDeclaredConstructor().newInstance();
+            Method method = fnClass.getMethod(DEFAULT_METHOD, Map.class);
+            Object result = method.invoke(instance, input);
+            if (result instanceof Map) {
+                @SuppressWarnings("unchecked")
+                Map<String, Object> casted = (Map<String, Object>) result;
+                return casted;
+            }
+        } catch (ClassNotFoundException e) {
+            return defaultResponse("Function implementation not found");
+        } catch (NoSuchMethodException e) {
+            return defaultResponse("Function.handler(Map<String,Object>) missing");
+        } catch (InvocationTargetException | InstantiationException | IllegalAccessException e) {
+            return defaultResponse("Failed to invoke function: " + e.getMessage());
+        }
+        return defaultResponse("Function returned unsupported type");
+    }
+
+    private static Map<String, Object> defaultResponse(String message) {
+        Map<String, Object> out = new HashMap<>();
+        out.put("output", message);
+        return out;
+    }
+}
diff --git a/benchmarks/wrappers/aws/java/src/main/java/org/serverlessbench/Handler.java b/benchmarks/wrappers/aws/java/src/main/java/org/serverlessbench/Handler.java
new file mode 100644
index 000000000..5c1781e6b
--- /dev/null
+++ b/benchmarks/wrappers/aws/java/src/main/java/org/serverlessbench/Handler.java
@@ -0,0 +1,49 @@
+package org.serverlessbench;
+
+import com.amazonaws.services.lambda.runtime.Context;
+import com.amazonaws.services.lambda.runtime.RequestHandler;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class Handler implements RequestHandler<Map<String, Object>, Map<String, Object>> {
+
+    private static final ObjectMapper MAPPER = new ObjectMapper();
+
+    @Override
+    public Map<String, Object> handleRequest(Map<String, Object> event, Context context) {
+        long beginNs = System.nanoTime();
+        Map<String, Object> normalized = normalize(event);
+        Map<String, Object> result = FunctionInvoker.invoke(normalized);
+        long endNs = System.nanoTime();
+
+        Map<String, Object> body = new HashMap<>();
+        body.put("begin", beginNs / 1_000_000_000.0);
+        body.put("end", endNs / 1_000_000_000.0);
+        body.put("compute_time", (endNs - beginNs) / 1_000.0);
+        body.put("results_time", 0);
+        body.put("result", result);
+        body.put("is_cold", ColdStartTracker.isCold());
+        body.put("request_id", context != null ? context.getAwsRequestId() : "");
+
+        return body;
+    }
+
+    private Map<String, Object> normalize(Map<String, Object> event) {
+        if (event == null) {
+            return new HashMap<>();
+        }
+        Object body = event.get("body");
+        if (body instanceof String) {
+            try {
+                @SuppressWarnings("unchecked")
+                Map<String, Object> parsed = MAPPER.readValue((String) body, Map.class);
+                return parsed;
+            } catch (Exception ignored) {
+                // fall back to original event
+            }
+        }
+        return new HashMap<>(event);
+    }
+}
diff --git a/benchmarks/wrappers/azure/java/pom.xml b/benchmarks/wrappers/azure/java/pom.xml
new file mode 100644
index 000000000..195df6160
--- /dev/null
+++ b/benchmarks/wrappers/azure/java/pom.xml
@@ -0,0 +1,71 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <groupId>org.serverlessbench</groupId>
+    <artifactId>function</artifactId>
+    <version>1.0.0</version>
+    <properties>
+        <maven.compiler.source>17</maven.compiler.source>
+        <maven.compiler.target>17</maven.compiler.target>
+    </properties>
+    <dependencies>
+        <dependency>
+            <groupId>com.microsoft.azure.functions</groupId>
+            <artifactId>azure-functions-java-library</artifactId>
+            <version>3.0.0</version>
+        </dependency>
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-databind</artifactId>
+            <version>2.17.1</version>
+        </dependency>
+    </dependencies>
+    <build>
+        <finalName>function</finalName>
+        <plugins>
+            <plugin>
+                <groupId>com.microsoft.azure</groupId>
+                <artifactId>azure-functions-maven-plugin</artifactId>
+                <version>1.31.0</version>
+                <configuration>
+                    <resourceGroup>unused</resourceGroup>
+                    <appName>unused</appName>
+                    <region>westeurope</region>
+                    <runtime>
+                        <os>linux</os>
+                        <javaVersion>17</javaVersion>
+                    </runtime>
+                </configuration>
+                <!-- Don't run automatically - causes permission issues in Docker -->
+                <!-- Can be run manually with: mvn azure-functions:package -->
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-shade-plugin</artifactId>
+                <version>3.5.1</version>
+                <executions>
+                    <execution>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>shade</goal>
+                        </goals>
+                        <configuration>
+                            <createDependencyReducedPom>false</createDependencyReducedPom>
+                            <filters>
+                                <filter>
+                                    <artifact>*:*</artifact>
+                                    <excludes>
+                                        <exclude>META-INF/*.SF</exclude>
+                                        <exclude>META-INF/*.RSA</exclude>
+                                        <exclude>META-INF/*.DSA</exclude>
+                                    </excludes>
+                                </filter>
+                            </filters>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+</project>
diff --git a/benchmarks/wrappers/azure/java/src/main/java/org/serverlessbench/ColdStartTracker.java b/benchmarks/wrappers/azure/java/src/main/java/org/serverlessbench/ColdStartTracker.java
new file mode 100644
index 000000000..fbedaa208
--- /dev/null
+++ b/benchmarks/wrappers/azure/java/src/main/java/org/serverlessbench/ColdStartTracker.java
@@ -0,0 +1,33 @@
+package org.serverlessbench;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.UUID;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+final class ColdStartTracker {
+
+    private static final AtomicBoolean WORKER_COLD = new AtomicBoolean(true);
+    private static final Path MARKER = Path.of("/tmp/cold_run");
+
+    private ColdStartTracker() {}
+
+    static boolean isCold() {
+        if (Files.exists(MARKER)) {
+            return false;
+        }
+        try {
+            Files.writeString(
+                    MARKER, UUID.randomUUID().toString().substring(0, 8), StandardCharsets.UTF_8);
+        } catch (IOException ignored) {
+            // best-effort marker write
+        }
+        return true;
+    }
+
+    static boolean isWorkerCold() {
+        return WORKER_COLD.getAndSet(false);
+    }
+}
diff --git a/benchmarks/wrappers/azure/java/src/main/java/org/serverlessbench/FunctionInvoker.java b/benchmarks/wrappers/azure/java/src/main/java/org/serverlessbench/FunctionInvoker.java
new file mode 100644
index 000000000..7d9c83570
--- /dev/null
+++ b/benchmarks/wrappers/azure/java/src/main/java/org/serverlessbench/FunctionInvoker.java
@@ -0,0 +1,41 @@
+package org.serverlessbench;
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.HashMap;
+import java.util.Map;
+
+final class FunctionInvoker {
+
+    private static final String DEFAULT_CLASS = "function.Function";
+    private static final String DEFAULT_METHOD = "handler";
+
+    private FunctionInvoker() {}
+
+    static Map<String, Object> invoke(Map<String, Object> input) {
+        try {
+            Class<?> fnClass = Class.forName(DEFAULT_CLASS);
+            Object instance = fnClass.getDeclaredConstructor().newInstance();
+            Method method = fnClass.getMethod(DEFAULT_METHOD, Map.class);
+            Object result = method.invoke(instance, input);
+            if (result instanceof Map) {
+                @SuppressWarnings("unchecked")
+                Map<String, Object> casted = (Map<String, Object>) result;
+                return casted;
+            }
+        } catch (ClassNotFoundException e) {
+            return defaultResponse("Function implementation not found");
+        } catch (NoSuchMethodException e) {
+            return defaultResponse("Function.handler(Map<String,Object>) missing");
+        } catch (InvocationTargetException | InstantiationException | IllegalAccessException e) {
+            return defaultResponse("Failed to invoke function: " + e.getMessage());
+        }
+        return defaultResponse("Function returned unsupported type");
+    }
+
+    private static Map<String, Object> defaultResponse(String message) {
+        Map<String, Object> out = new HashMap<>();
+        out.put("output", message);
+        return out;
+    }
+}
diff --git a/benchmarks/wrappers/azure/java/src/main/java/org/serverlessbench/Handler.java b/benchmarks/wrappers/azure/java/src/main/java/org/serverlessbench/Handler.java
new file mode 100644
index 000000000..8f9b3e1cf
--- /dev/null
+++ b/benchmarks/wrappers/azure/java/src/main/java/org/serverlessbench/Handler.java
@@ -0,0 +1,79 @@
+package org.serverlessbench;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.microsoft.azure.functions.*;
+import com.microsoft.azure.functions.annotation.AuthorizationLevel;
+import com.microsoft.azure.functions.annotation.FunctionName;
+import com.microsoft.azure.functions.annotation.HttpTrigger;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Optional;
+
+public class Handler {
+
+    private static final ObjectMapper MAPPER = new ObjectMapper();
+
+    @FunctionName("handler")
+    public HttpResponseMessage handleRequest(
+            @HttpTrigger(
+                            name = "req",
+                            methods = {HttpMethod.GET, HttpMethod.POST},
+                            authLevel = AuthorizationLevel.ANONYMOUS)
+                    final HttpRequestMessage<Optional<String>> request,
+            final ExecutionContext context) {
+
+        long beginNs = System.nanoTime();
+        Map<String, Object> normalized = normalizeRequest(request);
+        Map<String, Object> result = FunctionInvoker.invoke(normalized);
+        long endNs = System.nanoTime();
+
+        Map<String, Object> body = new HashMap<>();
+        body.put("begin", beginNs / 1_000_000_000.0);
+        body.put("end", endNs / 1_000_000_000.0);
+        body.put("compute_time", (endNs - beginNs) / 1_000.0);
+        body.put("results_time", 0);
+        body.put("result", result);
+        body.put("is_cold", ColdStartTracker.isCold());
+        body.put("is_cold_worker", ColdStartTracker.isWorkerCold());
+        body.put("request_id", context != null ? context.getInvocationId() : "");
+
+        String coldStartVar = System.getenv("cold_start");
+        if (coldStartVar != null) {
+            body.put("cold_start_var", coldStartVar);
+        }
+
+        String json = toJson(body);
+        return request
+                .createResponseBuilder(HttpStatus.OK)
+                .header("Content-Type", "application/json")
+                .body(json)
+                .build();
+    }
+
+    private Map<String, Object> normalizeRequest(HttpRequestMessage<Optional<String>> request) {
+        if (request == null) {
+            return new HashMap<>();
+        }
+        Optional<String> body = request.getBody();
+        if (body.isPresent()) {
+            try {
+                @SuppressWarnings("unchecked")
+                Map<String, Object> parsed = MAPPER.readValue(body.get(), Map.class);
+                return parsed;
+            } catch (IOException ignored) {
+                // ignore and continue
+            }
+        }
+        return new HashMap<>(request.getQueryParameters());
+    }
+
+    private String toJson(Map<String, Object> payload) {
+        try {
+            return MAPPER.writeValueAsString(payload);
+        } catch (IOException e) {
+            return "{}";
+        }
+    }
+}
diff --git a/config/systems.json b/config/systems.json
index 5a38b4965..5541f67cc 100644
--- a/config/systems.json
+++ b/config/systems.json
@@ -121,6 +121,27 @@
             "uuid": "3.4.0"
           }
         }
+      },
+      "java": {
+        "base_images": {
+          "x64": {
+            "17": "public.ecr.aws/lambda/java:17"
+          },
+          "arm64": {
+            "17": "public.ecr.aws/lambda/java:17"
+          }
+        },
+        "images": [
+          "build"
+        ],
+        "deployment": {
+          "files": [
+            "pom.xml",
+            "src"
+          ],
+          "packages": {},
+          "module_packages": {}
+        }
       }
     },
     "architecture": ["x64", "arm64"],
@@ -181,6 +202,25 @@
             "uuid": "3.4.0"
           }
         }
+      },
+      "java": {
+        "base_images": {
+          "x64": {
+            "17": "mcr.microsoft.com/azure-functions/java:4-java17"
+          }
+        },
+        "images": [
+          "build"
+        ],
+        "username": "docker_user",
+        "deployment": {
+          "files": [
+            "pom.xml",
+            "src"
+          ],
+          "packages": {},
+          "module_packages": {}
+        }
       }
     },
     "images": {
diff --git a/dockerfiles/aws/java/Dockerfile.build b/dockerfiles/aws/java/Dockerfile.build
new file mode 100644
index 000000000..2990a296b
--- /dev/null
+++ b/dockerfiles/aws/java/Dockerfile.build
@@ -0,0 +1,22 @@
+ARG BASE_IMAGE
+FROM ${BASE_IMAGE}
+ARG VERSION
+ENV JAVA_VERSION=${VERSION}
+
+# useradd, groupmod, build tooling
+RUN yum install -y shadow-utils unzip tar gzip maven zip
+ENV GOSU_VERSION 1.14
+# https://github.com/tianon/gosu/releases/tag/1.14
+# key https://keys.openpgp.org/search?q=tianon%40debian.org
+RUN curl -o /usr/local/bin/gosu -SL "https://github.com/tianon/gosu/releases/download/${GOSU_VERSION}/gosu-amd64" \
+    && chmod +x /usr/local/bin/gosu
+RUN mkdir -p /sebs/
+COPY dockerfiles/java_installer.sh /sebs/installer.sh
+COPY dockerfiles/entrypoint.sh /sebs/entrypoint.sh
+RUN chmod +x /sebs/entrypoint.sh /sebs/installer.sh
+
+# useradd and groupmod is installed in /usr/sbin which is not in PATH
+ENV PATH=/usr/sbin:$PATH
+ENV SCRIPT_FILE=/mnt/function/package.sh
+CMD /bin/bash /sebs/installer.sh
+ENTRYPOINT ["/sebs/entrypoint.sh"]
diff --git a/dockerfiles/aws/java/Dockerfile.function b/dockerfiles/aws/java/Dockerfile.function
new file mode 100644
index 000000000..07ae2f1c7
--- /dev/null
+++ b/dockerfiles/aws/java/Dockerfile.function
@@ -0,0 +1,16 @@
+ARG BASE_IMAGE
+FROM $BASE_IMAGE
+ARG VERSION
+ENV JAVA_VERSION=${VERSION}
+ARG TARGET_ARCHITECTURE
+
+COPY . function/
+WORKDIR /function
+
+# Ensure packaged jar is present for the Lambda base image
+RUN if [ -d "target" ] && ls target/*.jar >/dev/null 2>&1; then \
+      cp target/*.jar function.jar; \
+    fi \
+    && test -f function.jar
+
+CMD ["org.serverlessbench.Handler::handleRequest"]
diff --git a/dockerfiles/azure/java/Dockerfile.build b/dockerfiles/azure/java/Dockerfile.build
new file mode 100644
index 000000000..6d7a36972
--- /dev/null
+++ b/dockerfiles/azure/java/Dockerfile.build
@@ -0,0 +1,18 @@
+ARG BASE_IMAGE
+FROM ${BASE_IMAGE}
+ARG VERSION
+ENV JAVA_VERSION=${VERSION}
+
+RUN apt-get update && apt-get install -y gosu maven unzip zip \
+    && apt-get clean
+
+RUN mkdir -p /sebs/
+COPY dockerfiles/java_installer.sh /sebs/installer.sh
+COPY dockerfiles/entrypoint.sh /sebs/entrypoint.sh
+RUN chmod +x /sebs/entrypoint.sh /sebs/installer.sh
+
+# useradd and groupmod are in /usr/sbin which is not in PATH
+ENV PATH=/usr/sbin:$PATH
+ENV SCRIPT_FILE=/mnt/function/package.sh
+CMD /bin/bash /sebs/installer.sh
+ENTRYPOINT ["/sebs/entrypoint.sh"]
diff --git a/dockerfiles/java_installer.sh b/dockerfiles/java_installer.sh
new file mode 100644
index 000000000..2fd4fc1b2
--- /dev/null
+++ b/dockerfiles/java_installer.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+set -euo pipefail
+
+cd /mnt/function
+
+if [[ -f "pom.xml" ]]; then
+  # Note: -q flag causes issues in Docker, removed for reliable builds
+  mvn -DskipTests package
+
+  if ls target/*.jar >/dev/null 2>&1; then
+    JAR_PATH=$(ls target/*.jar | head -n1)
+    cp "${JAR_PATH}" function.jar
+  fi
+fi
+
+if [[ -f "${SCRIPT_FILE:-}" ]]; then
+  /bin/bash "${SCRIPT_FILE}" .
+fi
diff --git a/sebs.py b/sebs.py
index 80fb11ed3..9334c6f69 100755
--- a/sebs.py
+++ b/sebs.py
@@ -64,7 +64,7 @@ def simplified_common_params(func):
     @click.option(
         "--language",
         default=None,
-        type=click.Choice(["python", "nodejs"]),
+        type=click.Choice(["python", "nodejs", "java"]),
         help="Benchmark language",
     )
     @click.option("--language-version", default=None, type=str, help="Benchmark language version")
diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py
index 243a6f0f9..8e9e10f6f 100644
--- a/sebs/aws/aws.py
+++ b/sebs/aws/aws.py
@@ -134,33 +134,49 @@ def package_code(
                 directory, language_name, language_version, architecture, benchmark, is_cached
             )
 
-        CONFIG_FILES = {
-            "python": ["handler.py", "requirements.txt", ".python_packages"],
-            "nodejs": ["handler.js", "package.json", "node_modules"],
-        }
-        package_config = CONFIG_FILES[language_name]
-        function_dir = os.path.join(directory, "function")
-        os.makedirs(function_dir)
-        # move all files to 'function' except handler.py
-        for file in os.listdir(directory):
-            if file not in package_config:
-                file = os.path.join(directory, file)
-                shutil.move(file, function_dir)
-        # FIXME: use zipfile
-        # create zip with hidden directory but without parent directory
-        execute("zip -qu -r9 {}.zip * .".format(benchmark), shell=True, cwd=directory)
-        benchmark_archive = "{}.zip".format(os.path.join(directory, benchmark))
+        if language_name == "java":
+            jar_path = os.path.join(directory, "function.jar")
+            if not os.path.exists(jar_path):
+                raise RuntimeError("function.jar missing. Ensure Java build produced the jar.")
+            package_dir = os.path.join(directory, "package")
+            os.makedirs(package_dir, exist_ok=True)
+            shutil.copy2(jar_path, os.path.join(package_dir, "function.jar"))
+            execute("zip -qu -r9 {}.zip .".format(benchmark), shell=True, cwd=package_dir)
+            benchmark_archive = "{}.zip".format(os.path.join(package_dir, benchmark))
+        else:
+            CONFIG_FILES = {
+                "python": ["handler.py", "requirements.txt", ".python_packages"],
+                "nodejs": ["handler.js", "package.json", "node_modules"],
+            }
+            package_config = CONFIG_FILES[language_name]
+            function_dir = os.path.join(directory, "function")
+            os.makedirs(function_dir)
+            # move all files to 'function' except handler.py
+            for file in os.listdir(directory):
+                if file not in package_config:
+                    file = os.path.join(directory, file)
+                    shutil.move(file, function_dir)
+            # FIXME: use zipfile
+            # create zip with hidden directory but without parent directory
+            execute("zip -qu -r9 {}.zip * .".format(benchmark), shell=True, cwd=directory)
+            benchmark_archive = "{}.zip".format(os.path.join(directory, benchmark))
         self.logging.info("Created {} archive".format(benchmark_archive))
 
-        bytes_size = os.path.getsize(os.path.join(directory, benchmark_archive))
+        bytes_size = os.path.getsize(benchmark_archive)
         mbytes = bytes_size / 1024.0 / 1024.0
         self.logging.info("Zip archive size {:2f} MB".format(mbytes))
 
         return (
-            os.path.join(directory, "{}.zip".format(benchmark)),
+            benchmark_archive,
             bytes_size,
             container_uri,
         )
+    def _default_handler(self, language: str) -> str:
+
+        if language == "java":
+            return "org.serverlessbench.Handler::handleRequest"
+        return "handler.handler"
+
 
     def _map_architecture(self, architecture: str) -> str:
 
@@ -254,7 +270,7 @@ def create_function(
                 create_function_params["Runtime"] = "{}{}".format(
                     language, self._map_language_runtime(language, language_runtime)
                 )
-                create_function_params["Handler"] = "handler.handler"
+                create_function_params["Handler"] = self._default_handler(language)
 
             create_function_params = {
                 k: v for k, v in create_function_params.items() if v is not None
diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py
index d848d724a..c9aa0b6d6 100644
--- a/sebs/azure/azure.py
+++ b/sebs/azure/azure.py
@@ -33,12 +33,23 @@ class Azure(System):
     _config: AzureConfig
 
     # runtime mapping
-    AZURE_RUNTIMES = {"python": "python", "nodejs": "node"}
+    AZURE_RUNTIMES = {"python": "python", "nodejs": "node", "java": "java"}
 
     @staticmethod
     def name():
         return "azure"
 
+    @staticmethod
+    def _normalize_runtime_version(language: str, version: str) -> str:
+        """
+        Azure Functions Java expects versions with a minor component
+        (e.g. 17.0 instead of 17). Other languages can keep the version
+        as-is.
+        """
+        if language == "java" and re.match(r"^\d+$", str(version)):
+            return f"{version}.0"
+        return version
+
     @property
     def config(self) -> AzureConfig:
         return self._config
@@ -133,36 +144,81 @@ def package_code(
 
         # In previous step we ran a Docker container which installed packages
         # Python packages are in .python_packages because this is expected by Azure
-        EXEC_FILES = {"python": "handler.py", "nodejs": "handler.js"}
+        EXEC_FILES = {"python": "handler.py", "nodejs": "handler.js", "java": "../function.jar"}
         CONFIG_FILES = {
             "python": ["requirements.txt", ".python_packages"],
             "nodejs": ["package.json", "node_modules"],
+            "java": ["function.jar"],
         }
         package_config = CONFIG_FILES[language_name]
 
         handler_dir = os.path.join(directory, "handler")
         os.makedirs(handler_dir)
+        
+        # For Java, create lib directory for JARs and exclude build artifacts
+        if language_name == "java":
+            lib_dir = os.path.join(directory, "lib")
+            os.makedirs(lib_dir, exist_ok=True)
+            # Move function.jar to lib directory
+            if os.path.exists(os.path.join(directory, "function.jar")):
+                shutil.move(os.path.join(directory, "function.jar"), os.path.join(lib_dir, "function.jar"))
+            # For Java, we want to keep lib and exclude source files/build artifacts
+            package_config = ["lib", "src", "pom.xml", "target", ".mvn", "mvnw", "mvnw.cmd"]
+        
         # move all files to 'handler' except package config
         for f in os.listdir(directory):
             if f not in package_config:
                 source_file = os.path.join(directory, f)
                 shutil.move(source_file, handler_dir)
+        
+        # For Java, clean up build artifacts that we don't want to deploy
+        if language_name == "java":
+            for artifact in ["src", "pom.xml", "target", ".mvn", "mvnw", "mvnw.cmd"]:
+                artifact_path = os.path.join(directory, artifact)
+                if os.path.exists(artifact_path):
+                    if os.path.isdir(artifact_path):
+                        shutil.rmtree(artifact_path)
+                    else:
+                        os.remove(artifact_path)
 
         # generate function.json
         # TODO: extension to other triggers than HTTP
-        default_function_json = {
-            "scriptFile": EXEC_FILES[language_name],
-            "bindings": [
-                {
-                    "authLevel": "anonymous",
-                    "type": "httpTrigger",
-                    "direction": "in",
-                    "name": "req",
-                    "methods": ["get", "post"],
-                },
-                {"type": "http", "direction": "out", "name": "$return"},
-            ],
-        }
+        if language_name == "java":
+            # Java Azure Functions - For annotation-based functions, function.json
+            # should include scriptFile and entryPoint
+            # The @FunctionName annotation determines the function name
+            default_function_json = {
+                "scriptFile": "../lib/function.jar",
+                "entryPoint": "org.serverlessbench.Handler.handleRequest",
+                "bindings": [
+                    {
+                        "type": "httpTrigger",
+                        "direction": "in",
+                        "name": "req",
+                        "methods": ["get", "post"],
+                        "authLevel": "anonymous"
+                    },
+                    {
+                        "type": "http",
+                        "direction": "out",
+                        "name": "$return"
+                    }
+                ]
+            }
+        else:
+            default_function_json = {
+                "scriptFile": EXEC_FILES[language_name],
+                "bindings": [
+                    {
+                        "authLevel": "anonymous",
+                        "type": "httpTrigger",
+                        "direction": "in",
+                        "name": "req",
+                        "methods": ["get", "post"],
+                    },
+                    {"type": "http", "direction": "out", "name": "$return"},
+                ],
+            }
         json_out = os.path.join(directory, "handler", "function.json")
         json.dump(default_function_json, open(json_out, "w"), indent=2)
 
@@ -418,7 +474,13 @@ def create_function(
             raise NotImplementedError("Container deployment is not supported in Azure")
 
         language = code_package.language_name
-        language_runtime = code_package.language_version
+        language_runtime = self._normalize_runtime_version(
+            language, code_package.language_version
+        )
+        # ensure string form is passed to Azure CLI
+        language_runtime = str(language_runtime)
+        if language == "java" and "." not in language_runtime:
+            language_runtime = f"{language_runtime}.0"
         resource_group = self.config.resources.resource_group(self.cli_instance)
         region = self.config.region
         function_cfg = FunctionConfig.from_benchmark(code_package)
diff --git a/sebs/benchmark.py b/sebs/benchmark.py
index f159e820c..1f6518cc9 100644
--- a/sebs/benchmark.py
+++ b/sebs/benchmark.py
@@ -252,8 +252,13 @@ def hash_directory(directory: str, deployment: str, language: str):
         FILES = {
             "python": ["*.py", "requirements.txt*"],
             "nodejs": ["*.js", "package.json"],
+            "java": [],
+        }
+        WRAPPERS = {
+            "python": ["*.py"],
+            "nodejs": ["*.js"],
+            "java": ["src", "pom.xml"],
         }
-        WRAPPERS = {"python": "*.py", "nodejs": "*.js"}
         NON_LANG_FILES = ["*.sh", "*.json"]
         selected_files = FILES[language] + NON_LANG_FILES
         for file_type in selected_files:
@@ -262,13 +267,21 @@ def hash_directory(directory: str, deployment: str, language: str):
                 with open(path, "rb") as opened_file:
                     hash_sum.update(opened_file.read())
         # wrappers
-        wrappers = project_absolute_path(
-            "benchmarks", "wrappers", deployment, language, WRAPPERS[language]
-        )
-        for f in glob.glob(wrappers):
-            path = os.path.join(directory, f)
-            with open(path, "rb") as opened_file:
-                hash_sum.update(opened_file.read())
+        wrapper_patterns = WRAPPERS[language]
+        for pattern in wrapper_patterns:
+            wrappers = project_absolute_path(
+                "benchmarks", "wrappers", deployment, language, pattern
+            )
+            for f in glob.glob(wrappers):
+                if os.path.isdir(f):
+                    for root, _, files in os.walk(f):
+                        for file in files:
+                            path = os.path.join(root, file)
+                            with open(path, "rb") as opened_file:
+                                hash_sum.update(opened_file.read())
+                else:
+                    with open(f, "rb") as opened_file:
+                        hash_sum.update(opened_file.read())
         return hash_sum.hexdigest()
 
     def serialize(self) -> dict:
@@ -316,8 +329,12 @@ def copy_code(self, output_dir):
         FILES = {
             "python": ["*.py", "requirements.txt*"],
             "nodejs": ["*.js", "package.json"],
+            "java": [],
         }
         path = os.path.join(self.benchmark_path, self.language_name)
+        if self.language_name == "java":
+            shutil.copytree(path, output_dir, dirs_exist_ok=True)
+            return
         for file_type in FILES[self.language_name]:
             for f in glob.glob(os.path.join(path, file_type)):
                 shutil.copy2(os.path.join(path, f), output_dir)
@@ -356,7 +373,12 @@ def add_deployment_files(self, output_dir):
             )
         ]
         for file in handlers:
-            shutil.copy2(file, os.path.join(output_dir))
+            destination = os.path.join(output_dir, os.path.basename(file))
+            if os.path.isdir(file):
+                shutil.copytree(file, destination, dirs_exist_ok=True)
+            else:
+                if not os.path.exists(destination):
+                    shutil.copy2(file, destination)
 
     def add_deployment_package_python(self, output_dir):
 
@@ -406,6 +428,9 @@ def add_deployment_package(self, output_dir):
             self.add_deployment_package_python(output_dir)
         elif self.language == Language.NODEJS:
             self.add_deployment_package_nodejs(output_dir)
+        elif self.language == Language.JAVA:
+            # Java dependencies are handled by Maven in the wrapper
+            return
         else:
             raise NotImplementedError
 
@@ -483,7 +508,7 @@ def ensure_image(name: str) -> None:
                     }
 
             # run Docker container to install packages
-            PACKAGE_FILES = {"python": "requirements.txt", "nodejs": "package.json"}
+            PACKAGE_FILES = {"python": "requirements.txt", "nodejs": "package.json", "java": "pom.xml"}
             file = os.path.join(output_dir, PACKAGE_FILES[self.language_name])
             if os.path.exists(file):
                 try:
diff --git a/sebs/faas/function.py b/sebs/faas/function.py
index 0fab7bcf4..9ddad97f4 100644
--- a/sebs/faas/function.py
+++ b/sebs/faas/function.py
@@ -263,6 +263,7 @@ def deserialize(cached_config: dict) -> "Trigger":
 class Language(Enum):
     PYTHON = "python"
     NODEJS = "nodejs"
+    JAVA = "java"
 
     # FIXME: 3.7+ python with future annotations
     @staticmethod
@@ -299,7 +300,7 @@ def serialize(self) -> dict:
 
     @staticmethod
     def deserialize(config: dict) -> Runtime:
-        languages = {"python": Language.PYTHON, "nodejs": Language.NODEJS}
+        languages = {"python": Language.PYTHON, "nodejs": Language.NODEJS, "java": Language.JAVA}
         return Runtime(language=languages[config["language"]], version=config["version"])
 
 
diff --git a/tools/build_docker_images.py b/tools/build_docker_images.py
index 5336fb485..448c78b76 100755
--- a/tools/build_docker_images.py
+++ b/tools/build_docker_images.py
@@ -13,7 +13,14 @@
     "--deployment", default=None, choices=["local", "aws", "azure", "gcp"], action="store"
 )
 parser.add_argument("--type", default=None, choices=["build", "run", "manage"], action="store")
-parser.add_argument("--language", default=None, choices=["python", "nodejs"], action="store")
+parser.add_argument(
+    "--language", default=None, choices=["python", "nodejs", "java"], action="store"
+)
+parser.add_argument(
+    "--platform",
+    default=None,
+    help="Optional Docker platform (e.g., linux/amd64) to override host architecture.",
+)
 parser.add_argument("--language-version", default=None, type=str, action="store")
 args = parser.parse_args()
 config = json.load(open(os.path.join(PROJECT_DIR, "config", "systems.json"), "r"))
@@ -51,8 +58,16 @@ def build(image_type, system, language=None, version=None, version_name=None):
             target, PROJECT_DIR, dockerfile, buildargs
         )
     )
+    platform_arg = args.platform or os.environ.get("DOCKER_DEFAULT_PLATFORM")
+
     try:
-        client.images.build(path=PROJECT_DIR, dockerfile=dockerfile, buildargs=buildargs, tag=target)
+        client.images.build(
+            path=PROJECT_DIR,
+            dockerfile=dockerfile,
+            buildargs=buildargs,
+            tag=target,
+            platform=platform_arg,
+        )
     except docker.errors.BuildError as exc:
         print("Error! Build failed!")
         print(exc)

From dbf648d2c3fd9e06f2da2937620ec31f9acbd3fe Mon Sep 17 00:00:00 2001
From: Alexander Schlieper <alexander@schlieper.ch>
Date: Fri, 12 Dec 2025 11:38:58 +0100
Subject: [PATCH 07/31] rust support for aws

---
 .../000.microbenchmarks/010.sleep/config.json |   2 +-
 .../010.sleep/rust/.gitignore                 |   3 +
 .../010.sleep/rust/Cargo.toml                 |  14 +
 .../010.sleep/rust/src/main.rs                |  61 +++++
 config/systems.json                           |  31 +++
 dockerfiles/aws/rust/Dockerfile.build         |  32 +++
 dockerfiles/aws/rust/Dockerfile.function      |  24 ++
 dockerfiles/rust_installer.sh                 |  32 +++
 docs/rust-support.md                          | 239 ++++++++++++++++++
 sebs.py                                       |   2 +-
 sebs/aws/aws.py                               |  17 +-
 sebs/benchmark.py                             |  46 +++-
 sebs/faas/function.py                         |   3 +-
 tools/build_docker_images.py                  |   2 +-
 14 files changed, 490 insertions(+), 18 deletions(-)
 create mode 100644 benchmarks/000.microbenchmarks/010.sleep/rust/.gitignore
 create mode 100644 benchmarks/000.microbenchmarks/010.sleep/rust/Cargo.toml
 create mode 100644 benchmarks/000.microbenchmarks/010.sleep/rust/src/main.rs
 create mode 100644 dockerfiles/aws/rust/Dockerfile.build
 create mode 100644 dockerfiles/aws/rust/Dockerfile.function
 create mode 100755 dockerfiles/rust_installer.sh
 create mode 100644 docs/rust-support.md

diff --git a/benchmarks/000.microbenchmarks/010.sleep/config.json b/benchmarks/000.microbenchmarks/010.sleep/config.json
index 93ce2f561..5a67c0afd 100644
--- a/benchmarks/000.microbenchmarks/010.sleep/config.json
+++ b/benchmarks/000.microbenchmarks/010.sleep/config.json
@@ -1,6 +1,6 @@
 {
   "timeout": 120,
   "memory": 128,
-  "languages": ["python", "nodejs"],
+  "languages": ["python", "nodejs", "rust"],
   "modules": []
 }
diff --git a/benchmarks/000.microbenchmarks/010.sleep/rust/.gitignore b/benchmarks/000.microbenchmarks/010.sleep/rust/.gitignore
new file mode 100644
index 000000000..a73202db5
--- /dev/null
+++ b/benchmarks/000.microbenchmarks/010.sleep/rust/.gitignore
@@ -0,0 +1,3 @@
+target/
+Cargo.lock
+bootstrap
diff --git a/benchmarks/000.microbenchmarks/010.sleep/rust/Cargo.toml b/benchmarks/000.microbenchmarks/010.sleep/rust/Cargo.toml
new file mode 100644
index 000000000..0c55df5e9
--- /dev/null
+++ b/benchmarks/000.microbenchmarks/010.sleep/rust/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "sleep-benchmark"
+version = "0.1.0"
+edition = "2021"
+
+[[bin]]
+name = "bootstrap"
+path = "src/main.rs"
+
+[dependencies]
+lambda_runtime = "0.13"
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+tokio = { version = "1", features = ["full"] }
diff --git a/benchmarks/000.microbenchmarks/010.sleep/rust/src/main.rs b/benchmarks/000.microbenchmarks/010.sleep/rust/src/main.rs
new file mode 100644
index 000000000..86308c155
--- /dev/null
+++ b/benchmarks/000.microbenchmarks/010.sleep/rust/src/main.rs
@@ -0,0 +1,61 @@
+use lambda_runtime::{service_fn, Error, LambdaEvent};
+use serde::{Deserialize, Serialize};
+use std::time::{Duration, SystemTime, UNIX_EPOCH};
+
+#[derive(Deserialize)]
+struct Request {
+    sleep: Option<f64>,
+}
+
+#[derive(Serialize)]
+struct Response {
+    result: f64,
+    begin: f64,
+    end: f64,
+    is_cold: bool,
+    request_id: String,
+}
+
+static mut IS_COLD: bool = true;
+
+async fn handler(event: LambdaEvent<Request>) -> Result<Response, Error> {
+    let (payload, context) = event.into_parts();
+    
+    let begin = SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .unwrap()
+        .as_secs_f64();
+    
+    // Get the cold start status
+    let is_cold = unsafe {
+        let cold = IS_COLD;
+        IS_COLD = false;
+        cold
+    };
+    
+    // Get sleep time from event
+    let sleep_time = payload.sleep.unwrap_or(0.0);
+    
+    // Sleep for the specified time
+    if sleep_time > 0.0 {
+        tokio::time::sleep(Duration::from_secs_f64(sleep_time)).await;
+    }
+    
+    let end = SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .unwrap()
+        .as_secs_f64();
+    
+    Ok(Response {
+        result: sleep_time,
+        begin,
+        end,
+        is_cold,
+        request_id: context.request_id,
+    })
+}
+
+#[tokio::main]
+async fn main() -> Result<(), Error> {
+    lambda_runtime::run(service_fn(handler)).await
+}
diff --git a/config/systems.json b/config/systems.json
index 5a38b4965..b7bd23ba0 100644
--- a/config/systems.json
+++ b/config/systems.json
@@ -121,6 +121,37 @@
             "uuid": "3.4.0"
           }
         }
+      },
+      "rust": {
+        "base_images": {
+          "x64": {
+            "1.75": "amazonlinux:2023",
+            "1.76": "amazonlinux:2023",
+            "1.77": "amazonlinux:2023",
+            "1.78": "amazonlinux:2023",
+            "1.79": "amazonlinux:2023",
+            "1.80": "amazonlinux:2023",
+            "1.81": "amazonlinux:2023",
+            "1.82": "amazonlinux:2023"
+          },
+          "arm64": {
+            "1.75": "amazonlinux:2023",
+            "1.76": "amazonlinux:2023",
+            "1.77": "amazonlinux:2023",
+            "1.78": "amazonlinux:2023",
+            "1.79": "amazonlinux:2023",
+            "1.80": "amazonlinux:2023",
+            "1.81": "amazonlinux:2023",
+            "1.82": "amazonlinux:2023"
+          }
+        },
+        "images": [
+          "build"
+        ],
+        "deployment": {
+          "files": [],
+          "packages": {}
+        }
       }
     },
     "architecture": ["x64", "arm64"],
diff --git a/dockerfiles/aws/rust/Dockerfile.build b/dockerfiles/aws/rust/Dockerfile.build
new file mode 100644
index 000000000..020cc9c7c
--- /dev/null
+++ b/dockerfiles/aws/rust/Dockerfile.build
@@ -0,0 +1,32 @@
+ARG BASE_IMAGE
+FROM ${BASE_IMAGE}
+ARG VERSION
+ENV RUST_VERSION=${VERSION}
+
+# Install required build tools
+RUN yum install -y gcc gcc-c++ make openssl-devel shadow-utils zip
+
+# Install Rust
+ENV RUSTUP_HOME=/usr/local/rustup \
+    CARGO_HOME=/usr/local/cargo \
+    PATH=/usr/local/cargo/bin:$PATH
+
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain ${RUST_VERSION} \
+    && chmod -R a+w $RUSTUP_HOME $CARGO_HOME
+
+# Install gosu for user management
+ENV GOSU_VERSION 1.14
+RUN curl -o /usr/local/bin/gosu -SL "https://github.com/tianon/gosu/releases/download/${GOSU_VERSION}/gosu-amd64" \
+    && chmod +x /usr/local/bin/gosu
+
+# Setup SEBS scripts
+RUN mkdir -p /sebs/
+COPY dockerfiles/rust_installer.sh /sebs/installer.sh
+COPY dockerfiles/entrypoint.sh /sebs/entrypoint.sh
+RUN chmod +x /sebs/entrypoint.sh /sebs/installer.sh
+
+# useradd and groupmod is installed in /usr/sbin which is not in PATH
+ENV PATH=/usr/sbin:$PATH
+ENV SCRIPT_FILE=/mnt/function/package.sh
+CMD /bin/bash /sebs/installer.sh
+ENTRYPOINT ["/sebs/entrypoint.sh"]
diff --git a/dockerfiles/aws/rust/Dockerfile.function b/dockerfiles/aws/rust/Dockerfile.function
new file mode 100644
index 000000000..2c662310a
--- /dev/null
+++ b/dockerfiles/aws/rust/Dockerfile.function
@@ -0,0 +1,24 @@
+ARG BASE_IMAGE
+FROM $BASE_IMAGE
+
+# Install Rust
+ARG RUST_VERSION
+ENV RUSTUP_HOME=/usr/local/rustup \
+    CARGO_HOME=/usr/local/cargo \
+    PATH=/usr/local/cargo/bin:$PATH
+
+RUN yum install -y gcc openssl-devel && \
+    curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain ${RUST_VERSION} && \
+    chmod -R a+w $RUSTUP_HOME $CARGO_HOME
+
+# Copy function code
+COPY . /var/task/
+
+# Build the function
+WORKDIR /var/task
+RUN cargo build --release && \
+    cp target/release/bootstrap /var/runtime/bootstrap || \
+    cp target/release/handler /var/runtime/bootstrap
+
+# Set the CMD to the handler (AWS Lambda will execute this)
+CMD ["/var/runtime/bootstrap"]
diff --git a/dockerfiles/rust_installer.sh b/dockerfiles/rust_installer.sh
new file mode 100755
index 000000000..87a95887f
--- /dev/null
+++ b/dockerfiles/rust_installer.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+cd /mnt/function
+
+# Install Rust target for AWS Lambda
+if [ "${TARGET_ARCHITECTURE}" == "arm64" ]; then
+    RUST_TARGET="aarch64-unknown-linux-gnu"
+elif [ "${TARGET_ARCHITECTURE}" == "x64" ]; then
+    RUST_TARGET="x86_64-unknown-linux-gnu"
+else
+    echo "Unsupported architecture: $TARGET_ARCHITECTURE"
+    exit 1
+fi
+
+# Ensure Cargo.toml exists
+if [ ! -f "Cargo.toml" ]; then
+    echo "Error: Cargo.toml not found"
+    exit 1
+fi
+
+# Add the target if not already added
+rustup target add ${RUST_TARGET}
+
+# Build the release binary
+cargo build --release --target ${RUST_TARGET}
+
+# Copy the binary to the root as 'bootstrap' (required by AWS Lambda custom runtime)
+cp target/${RUST_TARGET}/release/bootstrap bootstrap || \
+    cp target/${RUST_TARGET}/release/handler bootstrap || \
+    (ls target/${RUST_TARGET}/release/ && exit 1)
+
+chmod +x bootstrap
diff --git a/docs/rust-support.md b/docs/rust-support.md
new file mode 100644
index 000000000..83267e1a5
--- /dev/null
+++ b/docs/rust-support.md
@@ -0,0 +1,239 @@
+# Rust Support for AWS Lambda Benchmarks
+
+This document describes the Rust support implementation for AWS Lambda benchmarks in the SeBS framework.
+
+## Overview
+
+As of November 2025, AWS Lambda officially supports Rust as a Generally Available runtime. This implementation adds full Rust support to the SeBS benchmarking framework for AWS Lambda.
+
+## Requirements
+
+- Rust toolchain (version specified in benchmark configuration)
+- AWS Lambda Runtime for Rust (`lambda_runtime` crate)
+- Cargo build system
+
+## Architecture
+
+### Runtime Configuration
+
+Rust functions on AWS Lambda use the `provided.al2023` custom runtime. The compiled binary must be named `bootstrap` to be recognized by the Lambda execution environment.
+
+### Build Process
+
+1. Rust code is compiled using Cargo with the appropriate target architecture:
+   - `x86_64-unknown-linux-gnu` for x64 architecture
+   - `aarch64-unknown-linux-gnu` for ARM64 architecture
+
+2. The compiled binary is renamed to `bootstrap` if necessary
+
+3. The bootstrap binary is packaged in a ZIP file for deployment
+
+## Creating a Rust Benchmark
+
+### 1. Project Structure
+
+Create a Rust directory in your benchmark folder:
+
+```
+benchmarks/
+  └── YOUR_BENCHMARK/
+      └── rust/
+          ├── Cargo.toml
+          ├── src/
+          │   └── main.rs
+          └── .gitignore
+```
+
+### 2. Cargo.toml Configuration
+
+Your `Cargo.toml` must specify the binary name as `bootstrap`:
+
+```toml
+[package]
+name = "your-benchmark"
+version = "0.1.0"
+edition = "2021"
+
+[[bin]]
+name = "bootstrap"
+path = "src/main.rs"
+
+[dependencies]
+lambda_runtime = "0.13"
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+tokio = { version = "1", features = ["full"] }
+```
+
+### 3. Handler Implementation
+
+Your Rust handler must follow the Lambda Runtime API format:
+
+```rust
+use lambda_runtime::{service_fn, Error, LambdaEvent};
+use serde::{Deserialize, Serialize};
+use std::time::{SystemTime, UNIX_EPOCH};
+
+#[derive(Deserialize)]
+struct Request {
+    // Your input fields
+}
+
+#[derive(Serialize)]
+struct Response {
+    result: YourResultType,
+    begin: f64,
+    end: f64,
+    is_cold: bool,
+    request_id: String,
+}
+
+static mut IS_COLD: bool = true;
+
+async fn handler(event: LambdaEvent<Request>) -> Result<Response, Error> {
+    let (payload, context) = event.into_parts();
+    
+    let begin = SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .unwrap()
+        .as_secs_f64();
+    
+    let is_cold = unsafe {
+        let cold = IS_COLD;
+        IS_COLD = false;
+        cold
+    };
+    
+    // Your benchmark logic here
+    
+    let end = SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .unwrap()
+        .as_secs_f64();
+    
+    Ok(Response {
+        result: your_result,
+        begin,
+        end,
+        is_cold,
+        request_id: context.request_id,
+    })
+}
+
+#[tokio::main]
+async fn main() -> Result<(), Error> {
+    lambda_runtime::run(service_fn(handler)).await
+}
+```
+
+### 4. Update Benchmark Configuration
+
+Add `"rust"` to the languages array in your benchmark's `config.json`:
+
+```json
+{
+  "timeout": 120,
+  "memory": 128,
+  "languages": ["python", "nodejs", "rust"],
+  "modules": []
+}
+```
+
+## Running Rust Benchmarks
+
+Use the standard SeBS command-line interface:
+
+```bash
+# Deploy and run a Rust benchmark
+./sebs.py benchmark invoke 010.sleep --language rust --language-version 1.80 --deployment aws
+
+# Use container deployment (recommended for consistent builds)
+./sebs.py benchmark invoke 010.sleep --language rust --language-version 1.80 --deployment aws --container-deployment
+```
+
+## Implementation Details
+
+### Dockerfiles
+
+Two Dockerfiles are provided for Rust:
+
+1. **Dockerfile.build**: Used for building the function code with dependencies
+   - Installs Rust toolchain
+   - Configures cross-compilation targets
+   - Runs the build process
+
+2. **Dockerfile.function**: Used for container-based Lambda deployment
+   - Contains the compiled binary
+   - Minimal runtime environment
+
+### Build Script
+
+The `rust_installer.sh` script handles:
+- Target architecture selection
+- Rust target installation
+- Cargo build execution
+- Binary extraction and naming
+
+### AWS Integration
+
+The AWS deployment module (`sebs/aws/aws.py`) has been updated to:
+- Recognize Rust as a language option
+- Map Rust to the `provided.al2023` runtime
+- Use `bootstrap` as the Lambda handler
+- Package Rust binaries correctly
+
+## Example: Sleep Benchmark
+
+A complete example is available at:
+`benchmarks/000.microbenchmarks/010.sleep/rust/`
+
+This benchmark demonstrates:
+- Basic Lambda Runtime usage
+- Cold start detection
+- Request/response handling
+- Timing measurements
+
+## Performance Considerations
+
+Rust provides several advantages for Lambda functions:
+
+1. **Fast Execution**: Compiled, optimized native code
+2. **Low Memory Usage**: No runtime overhead
+3. **Fast Cold Starts**: Smaller binary size compared to some runtimes
+4. **Predictable Performance**: No garbage collection pauses
+
+## Troubleshooting
+
+### Binary Size Issues
+
+If your binary is too large for direct ZIP upload (>50MB):
+- The framework will automatically use S3 upload
+- Consider using container deployment for large binaries
+
+### Architecture Mismatch
+
+Ensure you're building for the correct architecture:
+- Use `--architecture x64` or `--architecture arm64` flag
+- The build system will automatically select the correct Rust target
+
+### Dependencies Not Building
+
+For dependencies with native code:
+- Ensure they support Linux targets
+- Consider using container deployment for consistent builds
+
+## Additional Resources
+
+- [AWS Lambda Rust Support Announcement](https://aws.amazon.com/about-aws/whats-new/2025/11/aws-lambda-rust/)
+- [AWS Lambda Rust Runtime Documentation](https://docs.aws.amazon.com/lambda/latest/dg/lambda-rust.html)
+- [Rust Lambda Runtime Crate](https://github.com/awslabs/aws-lambda-rust-runtime)
+
+## Contributing
+
+When adding new Rust benchmarks:
+
+1. Follow the project structure outlined above
+2. Include appropriate error handling
+3. Document any special dependencies or requirements
+4. Test on both x64 and ARM64 architectures if possible
+5. Update this documentation if you encounter issues or have suggestions
diff --git a/sebs.py b/sebs.py
index 80fb11ed3..3757b247c 100755
--- a/sebs.py
+++ b/sebs.py
@@ -64,7 +64,7 @@ def simplified_common_params(func):
     @click.option(
         "--language",
         default=None,
-        type=click.Choice(["python", "nodejs"]),
+        type=click.Choice(["python", "nodejs", "rust"]),
         help="Benchmark language",
     )
     @click.option("--language-version", default=None, type=str, help="Benchmark language version")
diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py
index 243a6f0f9..7393f0b22 100644
--- a/sebs/aws/aws.py
+++ b/sebs/aws/aws.py
@@ -137,6 +137,7 @@ def package_code(
         CONFIG_FILES = {
             "python": ["handler.py", "requirements.txt", ".python_packages"],
             "nodejs": ["handler.js", "package.json", "node_modules"],
+            "rust": ["bootstrap", "Cargo.toml", "Cargo.lock", "target"],
         }
         package_config = CONFIG_FILES[language_name]
         function_dir = os.path.join(directory, "function")
@@ -174,6 +175,9 @@ def _map_language_runtime(self, language: str, runtime: str):
         # For example, it's 12.x instead of 12.
         if language == "nodejs":
             return f"{runtime}.x"
+        # Rust uses provided.al2023 runtime (custom runtime)
+        elif language == "rust":
+            return "provided.al2023"
         return runtime
 
     def create_function(
@@ -251,10 +255,15 @@ def create_function(
                         "S3Key": code_prefix,
                     }
 
-                create_function_params["Runtime"] = "{}{}".format(
-                    language, self._map_language_runtime(language, language_runtime)
-                )
-                create_function_params["Handler"] = "handler.handler"
+                # Rust uses custom runtime with different handler
+                if language == "rust":
+                    create_function_params["Runtime"] = self._map_language_runtime(language, language_runtime)
+                    create_function_params["Handler"] = "bootstrap"
+                else:
+                    create_function_params["Runtime"] = "{}{}".format(
+                        language, self._map_language_runtime(language, language_runtime)
+                    )
+                    create_function_params["Handler"] = "handler.handler"
 
             create_function_params = {
                 k: v for k, v in create_function_params.items() if v is not None
diff --git a/sebs/benchmark.py b/sebs/benchmark.py
index f159e820c..8e040bd2b 100644
--- a/sebs/benchmark.py
+++ b/sebs/benchmark.py
@@ -252,8 +252,9 @@ def hash_directory(directory: str, deployment: str, language: str):
         FILES = {
             "python": ["*.py", "requirements.txt*"],
             "nodejs": ["*.js", "package.json"],
+            "rust": ["*.rs", "Cargo.toml", "Cargo.lock"],
         }
-        WRAPPERS = {"python": "*.py", "nodejs": "*.js"}
+        WRAPPERS = {"python": "*.py", "nodejs": "*.js", "rust": None}
         NON_LANG_FILES = ["*.sh", "*.json"]
         selected_files = FILES[language] + NON_LANG_FILES
         for file_type in selected_files:
@@ -261,14 +262,25 @@ def hash_directory(directory: str, deployment: str, language: str):
                 path = os.path.join(directory, f)
                 with open(path, "rb") as opened_file:
                     hash_sum.update(opened_file.read())
-        # wrappers
-        wrappers = project_absolute_path(
-            "benchmarks", "wrappers", deployment, language, WRAPPERS[language]
-        )
-        for f in glob.glob(wrappers):
-            path = os.path.join(directory, f)
-            with open(path, "rb") as opened_file:
-                hash_sum.update(opened_file.read())
+        # For rust, also hash the src directory recursively
+        if language == "rust":
+            src_dir = os.path.join(directory, "src")
+            if os.path.exists(src_dir):
+                for root, dirs, files in os.walk(src_dir):
+                    for file in sorted(files):
+                        if file.endswith('.rs'):
+                            path = os.path.join(root, file)
+                            with open(path, "rb") as opened_file:
+                                hash_sum.update(opened_file.read())
+        # wrappers (Rust doesn't use wrapper files)
+        if WRAPPERS[language] is not None:
+            wrappers = project_absolute_path(
+                "benchmarks", "wrappers", deployment, language, WRAPPERS[language]
+            )
+            for f in glob.glob(wrappers):
+                path = os.path.join(directory, f)
+                with open(path, "rb") as opened_file:
+                    hash_sum.update(opened_file.read())
         return hash_sum.hexdigest()
 
     def serialize(self) -> dict:
@@ -316,11 +328,22 @@ def copy_code(self, output_dir):
         FILES = {
             "python": ["*.py", "requirements.txt*"],
             "nodejs": ["*.js", "package.json"],
+            "rust": ["Cargo.toml", "Cargo.lock"],
         }
         path = os.path.join(self.benchmark_path, self.language_name)
         for file_type in FILES[self.language_name]:
             for f in glob.glob(os.path.join(path, file_type)):
                 shutil.copy2(os.path.join(path, f), output_dir)
+        
+        # For Rust, copy the entire src directory
+        if self.language_name == "rust":
+            src_path = os.path.join(path, "src")
+            if os.path.exists(src_path):
+                dest_src = os.path.join(output_dir, "src")
+                if os.path.exists(dest_src):
+                    shutil.rmtree(dest_src)
+                shutil.copytree(src_path, dest_src)
+        
         # support node.js benchmarks with language specific packages
         nodejs_package_json = os.path.join(path, f"package.json.{self.language_version}")
         if os.path.exists(nodejs_package_json):
@@ -406,6 +429,9 @@ def add_deployment_package(self, output_dir):
             self.add_deployment_package_python(output_dir)
         elif self.language == Language.NODEJS:
             self.add_deployment_package_nodejs(output_dir)
+        elif self.language == Language.RUST:
+            # Rust dependencies are managed by Cargo, no additional packages needed
+            pass
         else:
             raise NotImplementedError
 
@@ -483,7 +509,7 @@ def ensure_image(name: str) -> None:
                     }
 
             # run Docker container to install packages
-            PACKAGE_FILES = {"python": "requirements.txt", "nodejs": "package.json"}
+            PACKAGE_FILES = {"python": "requirements.txt", "nodejs": "package.json", "rust": "Cargo.toml"}
             file = os.path.join(output_dir, PACKAGE_FILES[self.language_name])
             if os.path.exists(file):
                 try:
diff --git a/sebs/faas/function.py b/sebs/faas/function.py
index 0fab7bcf4..5fb5bde81 100644
--- a/sebs/faas/function.py
+++ b/sebs/faas/function.py
@@ -263,6 +263,7 @@ def deserialize(cached_config: dict) -> "Trigger":
 class Language(Enum):
     PYTHON = "python"
     NODEJS = "nodejs"
+    RUST = "rust"
 
     # FIXME: 3.7+ python with future annotations
     @staticmethod
@@ -299,7 +300,7 @@ def serialize(self) -> dict:
 
     @staticmethod
     def deserialize(config: dict) -> Runtime:
-        languages = {"python": Language.PYTHON, "nodejs": Language.NODEJS}
+        languages = {"python": Language.PYTHON, "nodejs": Language.NODEJS, "rust": Language.RUST}
         return Runtime(language=languages[config["language"]], version=config["version"])
 
 
diff --git a/tools/build_docker_images.py b/tools/build_docker_images.py
index 5336fb485..e0fc3ec6e 100755
--- a/tools/build_docker_images.py
+++ b/tools/build_docker_images.py
@@ -13,7 +13,7 @@
     "--deployment", default=None, choices=["local", "aws", "azure", "gcp"], action="store"
 )
 parser.add_argument("--type", default=None, choices=["build", "run", "manage"], action="store")
-parser.add_argument("--language", default=None, choices=["python", "nodejs"], action="store")
+parser.add_argument("--language", default=None, choices=["python", "nodejs", "rust"], action="store")
 parser.add_argument("--language-version", default=None, type=str, action="store")
 args = parser.parse_args()
 config = json.load(open(os.path.join(PROJECT_DIR, "config", "systems.json"), "r"))

From 892cd38bde3d2075e43c3046b028e888905b1fa4 Mon Sep 17 00:00:00 2001
From: Alexander Schlieper <alexander@schlieper.ch>
Date: Fri, 12 Dec 2025 12:15:39 +0100
Subject: [PATCH 08/31] gitignore fix

---
 benchmarks/000.microbenchmarks/010.sleep/rust/.gitignore | 1 -
 1 file changed, 1 deletion(-)

diff --git a/benchmarks/000.microbenchmarks/010.sleep/rust/.gitignore b/benchmarks/000.microbenchmarks/010.sleep/rust/.gitignore
index a73202db5..34b463310 100644
--- a/benchmarks/000.microbenchmarks/010.sleep/rust/.gitignore
+++ b/benchmarks/000.microbenchmarks/010.sleep/rust/.gitignore
@@ -1,3 +1,2 @@
 target/
-Cargo.lock
 bootstrap

From 47f6a8f363c313196fe97c9d95515b9ad4e3be39 Mon Sep 17 00:00:00 2001
From: toooadi <toooadi0@gmail.com>
Date: Fri, 12 Dec 2025 12:34:59 +0100
Subject: [PATCH 09/31] Adapt pypy installer

---
 dockerfiles/pypy_installer.sh | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/dockerfiles/pypy_installer.sh b/dockerfiles/pypy_installer.sh
index 303087d6a..3d749ab4e 100644
--- a/dockerfiles/pypy_installer.sh
+++ b/dockerfiles/pypy_installer.sh
@@ -2,19 +2,6 @@
 
 cd /mnt/function
 
-#TODO: If the base image OS is not centOS based, change to apt
-yum install -y tar bzip2 gzip
-
-#TODO: make version configurable
-curl -L -o pypy.tar.bz2 https://downloads.python.org/pypy/pypy3.11-v7.3.20-linux64.tar.bz2
-tar -xjf pypy.tar.bz2 
-mv pypy3.11-v7.3.20-linux64 /opt/pypy 
-rm pypy.tar.bz2
-chmod -R +x /opt/pypy/bin
-export PATH=/opt/pypy/bin:$PATH
-python -m ensurepip
-python -mpip install -U pip wheel
-
 #Probably remove this conditional, might break pypy builds, might lead to installation of CPython libraries
 PLATFORM_ARG=""
 if [[ "${TARGET_ARCHITECTURE}" == "arm64" ]]; then

From ea2dfb9025f1b039eed34e6adc13e24811998d0c Mon Sep 17 00:00:00 2001
From: Alexander Schlieper <alexander@schlieper.ch>
Date: Fri, 12 Dec 2025 12:48:43 +0100
Subject: [PATCH 10/31] benchmark merge

---
 sebs/benchmark.py | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/sebs/benchmark.py b/sebs/benchmark.py
index 1184a520b..59887d6cd 100644
--- a/sebs/benchmark.py
+++ b/sebs/benchmark.py
@@ -252,21 +252,17 @@ def hash_directory(directory: str, deployment: str, language: str):
         FILES = {
             "python": ["*.py", "requirements.txt*"],
             "nodejs": ["*.js", "package.json"],
-<<<<<<< HEAD
             "rust": ["*.rs", "Cargo.toml", "Cargo.lock"],
             "java": [],
+            "pypy": ["*.py", "requirements.txt*"],
         }
         WRAPPERS = {
             "python": ["*.py"],
             "nodejs": ["*.js"],
             "rust": None,
             "java": ["src", "pom.xml"],
+            "pypy": ["*.py"],
         }
-=======
-            "pypy": ["*.py", "requirements.txt*"],
-        }
-        WRAPPERS = {"python": "*.py", "nodejs": "*.js", "pypy": "*.py"}
->>>>>>> features/pypy-runtime-azure
         NON_LANG_FILES = ["*.sh", "*.json"]
         selected_files = FILES[language] + NON_LANG_FILES
         for file_type in selected_files:
@@ -348,12 +344,9 @@ def copy_code(self, output_dir):
         FILES = {
             "python": ["*.py", "requirements.txt*"],
             "nodejs": ["*.js", "package.json"],
-<<<<<<< HEAD
             "rust": ["Cargo.toml", "Cargo.lock"],
             "java": [],
-=======
             "pypy": ["*.py", "requirements.txt*"],
->>>>>>> features/pypy-runtime-azure
         }
         path = os.path.join(self.benchmark_path, self.language_name)
         if self.language_name == "java":
@@ -549,11 +542,7 @@ def ensure_image(name: str) -> None:
                     }
 
             # run Docker container to install packages
-<<<<<<< HEAD
-            PACKAGE_FILES = {"python": "requirements.txt", "nodejs": "package.json", "rust": "Cargo.toml", "java": "pom.xml"}
-=======
-            PACKAGE_FILES = {"python": "requirements.txt", "nodejs": "package.json", "pypy": "requirements.txt"}
->>>>>>> features/pypy-runtime-azure
+            PACKAGE_FILES = {"python": "requirements.txt", "nodejs": "package.json", "rust": "Cargo.toml", "java": "pom.xml", "pypy": "requirements.txt"}
             file = os.path.join(output_dir, PACKAGE_FILES[self.language_name])
             if os.path.exists(file):
                 try:

From 0788ab3eaf14871d510fec6c832838791ba0b951 Mon Sep 17 00:00:00 2001
From: Alexander Schlieper <alexander@schlieper.ch>
Date: Fri, 12 Dec 2025 13:59:14 +0100
Subject: [PATCH 11/31] more merge changes

---
 config/systems.json          | 48 +++++++++++++++++++++---------------
 sebs/azure/azure.py          | 36 ++++-----------------------
 tools/build_docker_images.py | 28 +++++++--------------
 3 files changed, 42 insertions(+), 70 deletions(-)

diff --git a/config/systems.json b/config/systems.json
index d1ee6eeae..8e7879c38 100644
--- a/config/systems.json
+++ b/config/systems.json
@@ -149,7 +149,6 @@
           }
         }
       },
-<<<<<<< HEAD
       "rust": {
         "base_images": {
           "x64": {
@@ -188,7 +187,20 @@
           },
           "arm64": {
             "17": "public.ecr.aws/lambda/java:17"
-=======
+          }
+        },
+        "images": [
+          "build"
+        ],
+        "deployment": {
+          "files": [
+            "pom.xml",
+            "src"
+          ],
+          "packages": {},
+          "module_packages": {}
+        }
+      },
       "pypy": {
         "base_images": {
           "x64": {
@@ -196,7 +208,6 @@
           },
           "arm64": {
             "3.11": "amazon/aws-lambda-provided:al2-arm64"
->>>>>>> features/pypy-runtime-azure
           }
         },
         "images": [
@@ -204,19 +215,12 @@
         ],
         "deployment": {
           "files": [
-<<<<<<< HEAD
-            "pom.xml",
-            "src"
-          ],
-          "packages": {},
-=======
             "handler.py",
             "storage.py",
             "nosql.py",
             "setup.py"
           ],
           "packages": [],
->>>>>>> features/pypy-runtime-azure
           "module_packages": {}
         }
       }
@@ -280,17 +284,10 @@
           }
         }
       },
-<<<<<<< HEAD
       "java": {
         "base_images": {
           "x64": {
             "17": "mcr.microsoft.com/azure-functions/java:4-java17"
-=======
-      "pypy": {
-        "base_images": {
-          "x64": {
-            "3.11": "ubuntu:20.04"
->>>>>>> features/pypy-runtime-azure
           }
         },
         "images": [
@@ -299,13 +296,25 @@
         "username": "docker_user",
         "deployment": {
           "files": [
-<<<<<<< HEAD
             "pom.xml",
             "src"
           ],
           "packages": {},
           "module_packages": {}
-=======
+        }
+      },
+      "pypy": {
+        "base_images": {
+          "x64": {
+            "3.11": "ubuntu:20.04"
+          }
+        },
+        "images": [
+          "build"
+        ],
+        "username": "docker_user",
+        "deployment": {
+          "files": [
             "handler.py",
             "storage.py",
             "nosql.py"
@@ -319,7 +328,6 @@
               "azure-cosmos"
             ]
           }
->>>>>>> features/pypy-runtime-azure
         }
       }
     },
diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py
index db5f74c77..859227179 100644
--- a/sebs/azure/azure.py
+++ b/sebs/azure/azure.py
@@ -33,11 +33,7 @@ class Azure(System):
     _config: AzureConfig
 
     # runtime mapping
-<<<<<<< HEAD
-    AZURE_RUNTIMES = {"python": "python", "nodejs": "node", "java": "java"}
-=======
-    AZURE_RUNTIMES = {"python": "python", "nodejs": "node", "pypy": "custom"}
->>>>>>> features/pypy-runtime-azure
+    AZURE_RUNTIMES = {"python": "python", "nodejs": "node", "java": "java", "pypy": "custom"}
 
     @staticmethod
     def name():
@@ -148,20 +144,13 @@ def package_code(
 
         # In previous step we ran a Docker container which installed packages
         # Python packages are in .python_packages because this is expected by Azure
-<<<<<<< HEAD
-        EXEC_FILES = {"python": "handler.py", "nodejs": "handler.js", "java": "../function.jar"}
+        EXEC_FILES = {"python": "handler.py", "nodejs": "handler.js", "java": "../function.jar", "pypy": "handler.py"}
         CONFIG_FILES = {
             "python": ["requirements.txt", ".python_packages"],
             "nodejs": ["package.json", "node_modules"],
             "java": ["function.jar"],
-=======
-        EXEC_FILES = {"python": "handler.py", "nodejs": "handler.js", "pypy": "handler.py"}
-        CONFIG_FILES = {
-            "python": ["requirements.txt", ".python_packages"],
-            "nodejs": ["package.json", "node_modules"],
             # Keep .python_packages at the root so custom handler can import deps.
             "pypy": ["requirements.txt", ".python_packages", "pypy"],
->>>>>>> features/pypy-runtime-azure
         }
         package_config = CONFIG_FILES[language_name]
 
@@ -200,7 +189,6 @@ def package_code(
 
         # generate function.json
         # TODO: extension to other triggers than HTTP
-<<<<<<< HEAD
         if language_name == "java":
             # Java Azure Functions - For annotation-based functions, function.json
             # should include scriptFile and entryPoint
@@ -225,7 +213,6 @@ def package_code(
             }
         else:
             default_function_json = {
-                "scriptFile": EXEC_FILES[language_name],
                 "bindings": [
                     {
                         "authLevel": "anonymous",
@@ -237,23 +224,10 @@ def package_code(
                     {"type": "http", "direction": "out", "name": "$return"},
                 ],
             }
-=======
-        default_function_json = {
-            "bindings": [
-                {
-                    "authLevel": "anonymous",
-                    "type": "httpTrigger",
-                    "direction": "in",
-                    "name": "req",
-                    "methods": ["get", "post"],
-                },
-                {"type": "http", "direction": "out", "name": "$return"},
-            ],
-        }
-        if language_name != "pypy":
-            default_function_json["scriptFile"] = EXEC_FILES[language_name]
+            # PyPy uses custom handler, no scriptFile needed
+            if language_name != "pypy":
+                default_function_json["scriptFile"] = EXEC_FILES[language_name]
 
->>>>>>> features/pypy-runtime-azure
         json_out = os.path.join(directory, "handler", "function.json")
         json.dump(default_function_json, open(json_out, "w"), indent=2)
 
diff --git a/tools/build_docker_images.py b/tools/build_docker_images.py
index bbd0a9a7c..8428a0f59 100755
--- a/tools/build_docker_images.py
+++ b/tools/build_docker_images.py
@@ -13,18 +13,14 @@
     "--deployment", default=None, choices=["local", "aws", "azure", "gcp"], action="store"
 )
 parser.add_argument("--type", default=None, choices=["build", "run", "manage"], action="store")
-<<<<<<< HEAD
 parser.add_argument(
-    "--language", default=None, choices=["python", "nodejs", "java", "rust"], action="store"
+    "--language", default=None, choices=["python", "nodejs", "java", "rust", "pypy"], action="store"
 )
 parser.add_argument(
     "--platform",
     default=None,
     help="Optional Docker platform (e.g., linux/amd64) to override host architecture.",
 )
-=======
-parser.add_argument("--language", default=None, choices=["python", "nodejs", "pypy"], action="store")
->>>>>>> features/pypy-runtime-azure
 parser.add_argument("--language-version", default=None, type=str, action="store")
 # Optional: force build platform (e.g., linux/amd64 on Apple Silicon)
 parser.add_argument("--platform", default=None, type=str, action="store")
@@ -66,25 +62,20 @@ def build(image_type, system, language=None, version=None, version_name=None):
             target, PROJECT_DIR, dockerfile, buildargs
         )
     )
-<<<<<<< HEAD
-    platform_arg = args.platform or os.environ.get("DOCKER_DEFAULT_PLATFORM")
-
-    try:
-        client.images.build(
-            path=PROJECT_DIR,
-            dockerfile=dockerfile,
-            buildargs=buildargs,
-            tag=target,
-            platform=platform_arg,
-        )
-=======
+    
+    # Build kwargs with platform support
     build_kwargs = {
         "path": PROJECT_DIR,
         "dockerfile": dockerfile,
         "buildargs": buildargs,
         "tag": target,
     }
-    if PLATFORM:
+    
+    # Platform selection priority: CLI arg > env var > system config
+    platform_arg = args.platform or os.environ.get("DOCKER_DEFAULT_PLATFORM")
+    if platform_arg:
+        build_kwargs["platform"] = platform_arg
+    elif PLATFORM:
         build_kwargs["platform"] = PLATFORM
     elif system in config and "architecture" in config[system]:
         archs = config[system]["architecture"]
@@ -98,7 +89,6 @@ def build(image_type, system, language=None, version=None, version_name=None):
 
     try:
         client.images.build(**build_kwargs)
->>>>>>> features/pypy-runtime-azure
     except docker.errors.BuildError as exc:
         print("Error! Build failed!")
         print(exc)

From f4470c21e0d100ba5ece22c88c9800664641676c Mon Sep 17 00:00:00 2001
From: toooadi <toooadi0@gmail.com>
Date: Fri, 12 Dec 2025 18:38:06 +0100
Subject: [PATCH 12/31] Adapt system.json

---
 config/systems.json | 78 ++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 66 insertions(+), 12 deletions(-)

diff --git a/config/systems.json b/config/systems.json
index 6a48081b9..d6ec62dc2 100644
--- a/config/systems.json
+++ b/config/systems.json
@@ -93,8 +93,12 @@
         }
       }
     },
-    "architecture": ["x64"],
-    "deployments": ["package"]
+    "architecture": [
+      "x64"
+    ],
+    "deployments": [
+      "package"
+    ]
   },
   "aws": {
     "languages": {
@@ -173,8 +177,14 @@
         }
       }
     },
-    "architecture": ["x64", "arm64"],
-    "deployments": ["package", "container"]
+    "architecture": [
+      "x64",
+      "arm64"
+    ],
+    "deployments": [
+      "package",
+      "container"
+    ]
   },
   "azure": {
     "languages": {
@@ -238,8 +248,12 @@
         "username": "docker_user"
       }
     },
-    "architecture": ["x64"],
-    "deployments": ["package"]
+    "architecture": [
+      "x64"
+    ],
+    "deployments": [
+      "package"
+    ]
   },
   "gcp": {
     "languages": {
@@ -261,7 +275,38 @@
           "files": [
             "handler.py",
             "storage.py",
-            "nosql.py"
+            "nosql.py",
+            "setup.py"
+          ],
+          "packages": [],
+          "module_packages": {
+            "storage": [
+              "google-cloud-storage"
+            ],
+            "nosql": [
+              "google-cloud-datastore"
+            ]
+          }
+        }
+      },
+      "pypy": {
+        "base_images": {
+          "x64": {
+            "3.10": "pypy:3.10-slim",
+            "3.11": "pypy:3.11-slim",
+            "3.12": "pypy:3.12-slim"
+          }
+        },
+        "images": [
+          "build"
+        ],
+        "username": "docker_user",
+        "deployment": {
+          "files": [
+            "handler.py",
+            "storage.py",
+            "nosql.py",
+            "setup.py"
           ],
           "packages": [],
           "module_packages": {
@@ -302,8 +347,13 @@
         "username": "docker_user"
       }
     },
-    "architecture": ["x64"],
-    "deployments": ["package"]
+    "architecture": [
+      "x64"
+    ],
+    "deployments": [
+      "package",
+      "container"
+    ]
   },
   "openwhisk": {
     "languages": {
@@ -363,7 +413,11 @@
         }
       }
     },
-    "architecture": ["x64"],
-    "deployments": ["container"]
+    "architecture": [
+      "x64"
+    ],
+    "deployments": [
+      "container"
+    ]
   }
-}
+}
\ No newline at end of file

From c98b7b7767b31161cd6919520d5991ad5f4df577 Mon Sep 17 00:00:00 2001
From: toooadi <toooadi0@gmail.com>
Date: Fri, 12 Dec 2025 18:44:57 +0100
Subject: [PATCH 13/31] Add GCP container support

---
 sebs/faas/container.py |   5 +
 sebs/gcp/config.py     |  62 +++++-
 sebs/gcp/container.py  |  88 ++++++++
 sebs/gcp/gcp.py        | 486 +++++++++++++++++++++++++++++------------
 4 files changed, 506 insertions(+), 135 deletions(-)
 create mode 100644 sebs/gcp/container.py

diff --git a/sebs/faas/container.py b/sebs/faas/container.py
index b17525f7b..073218ed3 100644
--- a/sebs/faas/container.py
+++ b/sebs/faas/container.py
@@ -126,6 +126,9 @@ def registry_name(
     ) -> Tuple[str, str, str, str]:
         pass
 
+    def get_adapted_image_name(self, image_name: str, language_name: str,language_version: str, architecture: str):
+        return image_name
+
     def build_base_image(
         self,
         directory: str,
@@ -196,6 +199,8 @@ def build_base_image(
                 "our documentation. We recommend QEMU as it can be configured to run automatically."
             )
 
+        builder_image = self.get_adapted_image_name(builder_image, language_name, language_version, architecture)
+
         buildargs = {
             "VERSION": language_version,
             "BASE_IMAGE": builder_image,
diff --git a/sebs/gcp/config.py b/sebs/gcp/config.py
index 56d3b5c41..0ae550be3 100644
--- a/sebs/gcp/config.py
+++ b/sebs/gcp/config.py
@@ -1,6 +1,8 @@
 import json
 import os
 from typing import cast, List, Optional, Tuple
+import time
+from googleapiclient.errors import HttpError
 
 from sebs.cache import Cache
 from sebs.faas.config import Config, Credentials, Resources
@@ -108,6 +110,7 @@ def update_cache(self, cache: Cache):
 class GCPResources(Resources):
     def __init__(self):
         super().__init__(name="gcp")
+        self._container_repository = None
 
     @staticmethod
     def initialize(res: Resources, dct: dict):
@@ -120,7 +123,9 @@ def initialize(res: Resources, dct: dict):
     """
 
     def serialize(self) -> dict:
-        return super().serialize()
+        out = super().serialize()
+        out["container_repository"] = self._container_repository
+        return out
 
     @staticmethod
     def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Resources":
@@ -147,6 +152,61 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Resou
     def update_cache(self, cache: Cache):
         super().update_cache(cache)
 
+    @property
+    def container_repository(self) -> str:
+        return self._container_repository
+
+    def check_container_repository_exists(self, config: Config, ar_client):
+        try:
+            parent = f"projects/{config.credentials.project_name}/locations/{config.region}"
+            repo_full_name = f"{parent}/repositories/{self._container_repository}"
+            self.logging.info("Checking if container repository exists...")
+            ar_client.projects().locations().repositories().get(name=repo_full_name).execute()
+            return True
+        except HttpError as e:
+            if e.resp.status == 404:
+                self.logging.error("Container repository does not exist.")
+                return False
+            else:
+                raise e
+
+    def create_container_repository(self, ar_client, parent):
+        request_body = {
+            "format": "DOCKER",
+            "description": "Container repository for SEBS"
+        }
+        self._container_repository = f"sebs-benchmarks-{self._resources_id}"
+        operation = ar_client.projects().locations().repositories().create(
+            parent=parent,
+            body=request_body,
+            repositoryId=self._container_repository
+        ).execute()
+        
+        while True:
+            # Operations for AR are global or location specific
+            op_name = operation['name']
+            op = ar_client.projects().locations().operations().get(name=op_name).execute()
+            
+            if op.get('done'):
+                if 'error' in op:
+                    raise Exception(f"Failed to create repo: {op['error']}")
+                self.logging.info("Repository created successfully.")
+                break
+            time.sleep(2)
+
+    def get_container_repository(self, config: Config, ar_client):
+        if self._container_repository is not None:
+            return self._container_repository
+        
+        self._container_repository = f"sebs-benchmarks-{self._resources_id}"
+        if self.check_container_repository_exists(config, ar_client):
+            return self._container_repository
+
+        parent = f"projects/{config.credentials.project_name}/locations/{config.region}"
+        self.create_container_repository(ar_client, parent)
+        return self._container_repository
+
+        
 
 """
     FaaS system config defining cloud region (if necessary), credentials and
diff --git a/sebs/gcp/container.py b/sebs/gcp/container.py
new file mode 100644
index 000000000..2159ac7bf
--- /dev/null
+++ b/sebs/gcp/container.py
@@ -0,0 +1,88 @@
+import docker
+from typing import Tuple
+
+from sebs.gcp.config import GCPConfig
+from sebs.config import SeBSConfig
+from sebs.faas.container import DockerContainer
+from googleapiclient.discovery import build
+from google.oauth2 import service_account
+from googleapiclient.errors import HttpError
+from google.auth.transport.requests import Request
+
+
+class GCRContainer(DockerContainer):
+    @staticmethod
+    def name():
+        return "gcp"
+
+    @staticmethod
+    def typename() -> str:
+        return "GCP.GCRContainer"
+
+    def __init__(
+        self,
+        system_config: SeBSConfig,
+        config: GCPConfig,
+        docker_client: docker.client.DockerClient,
+    ):
+        super().__init__(system_config, docker_client)
+        self.config = config
+        self.creds = service_account.Credentials.from_service_account_file(self.config.credentials.gcp_credentials, scopes=["https://www.googleapis.com/auth/cloud-platform"])
+        self.ar_client = build("artifactregistry", "v1", credentials=self.creds)
+
+    def registry_name(
+        self, benchmark: str, language_name: str, language_version: str, architecture: str
+    ) -> Tuple[str, str, str, str]:
+        
+        project_id = self.config.credentials.project_name
+        region = self.config.region
+        registry_name = f"{region}-docker.pkg.dev/{project_id}"
+        repository_name = self.config.resources.get_container_repository(self.config, self.ar_client)
+        
+        image_tag = self.system_config.benchmark_image_tag(
+            self.name(), benchmark, language_name, language_version, architecture
+        )
+        image_uri = f"{registry_name}/{repository_name}/{benchmark}:{image_tag}"
+
+        return registry_name, repository_name, image_tag, image_uri
+
+    def find_image(self, repository_name, image_tag) -> bool:
+        try:
+            response = self.ar_client.projects().locations().repositories().dockerImages().list(
+                parent=f"projects/{self.config.credentials.project_id}/locations/{self.config.region}/repositories/{repository_name}"
+            )
+            if "dockerImages" in response:
+                for image in response["dockerImages"]:
+                    if "latest" in image["tags"] and image_tag in image["tags"]:
+                        return True
+        except HttpError as e:
+            if (e.content.code == 404):
+                return False
+            raise e
+        return False
+
+    def get_adapted_image_name(self, image_name: str, language_name: str,language_version: str, architecture: str):
+        if language_name == "python":
+            return f"python:{language_version}-slim"
+        elif language_name == "nodejs":
+            return f"node:{language_version}-slim"
+
+        return image_name
+
+    def push_image(self, repository_uri, image_tag):        
+        self.logging.info("Authenticating Docker against Artifact Registry...")
+        self.creds.refresh(Request())
+        auth_token = self.creds.token
+
+        try:
+            self.docker_client.login(
+                username="oauth2accesstoken",
+                password=auth_token,
+                registry=repository_uri
+            )
+            super().push_image(repository_uri, image_tag)
+            self.logging.info(f"Successfully pushed the image to registry {repository_uri}.")
+        except docker.errors.DockerException as e:
+            self.logging.error(f"Failed to push the image to registry {repository_uri}.")
+            self.logging.error(f"Error: {str(e)}")
+            raise RuntimeError("Couldn't push to registry.")
diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py
index 6525034c2..0c709194f 100644
--- a/sebs/gcp/gcp.py
+++ b/sebs/gcp/gcp.py
@@ -23,6 +23,7 @@
 from sebs.gcp.resources import GCPSystemResources
 from sebs.gcp.storage import GCPStorage
 from sebs.gcp.function import GCPFunction
+from sebs.gcp.container import GCRContainer
 from sebs.utils import LoggingHandlers
 
 """
@@ -77,10 +78,14 @@ def function_type() -> "Type[Function]":
 
         :param config: systems-specific parameters
     """
-
     def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None):
         self.function_client = build("cloudfunctions", "v1", cache_discovery=False)
+        # Container-based functions are created via run-client
+        self.run_client = build("run", "v2", cache_discovery=False)
         self.initialize_resources(select_prefix=resource_prefix)
+        self.gcr_client = GCRContainer(
+            self.system_config, self.config, self.docker_client
+        )
 
     def get_function_client(self):
         return self.function_client
@@ -96,7 +101,7 @@ def default_function_name(
             code_package.language_name,
             code_package.language_version,
         )
-        return GCP.format_function_name(func_name)
+        return GCP.format_function_name(func_name) if not code_package.container_deployment else func_name.replace(".", "-")
 
     @staticmethod
     def format_function_name(func_name: str) -> str:
@@ -133,17 +138,22 @@ def package_code(
     ) -> Tuple[str, int, str]:
 
         container_uri = ""
-
+        
         if container_deployment:
-            raise NotImplementedError("Container Deployment is not supported in GCP")
+            # build base image and upload to GCR
+            _, container_uri = self.gcr_client.build_base_image(
+                directory, language_name, language_version, architecture, benchmark, is_cached
+            )
 
         CONFIG_FILES = {
             "python": ["handler.py", ".python_packages"],
             "nodejs": ["handler.js", "node_modules"],
+            "pypy" : ["handler.py", ".python_packages"]
         }
         HANDLER = {
             "python": ("handler.py", "main.py"),
             "nodejs": ("handler.js", "index.js"),
+            "pypy": ("handler.py", "main.py"),
         }
         package_config = CONFIG_FILES[language_name]
         function_dir = os.path.join(directory, "function")
@@ -154,10 +164,11 @@ def package_code(
                 shutil.move(file, function_dir)
 
         # rename handler function.py since in gcp it has to be caled main.py
-        old_name, new_name = HANDLER[language_name]
-        old_path = os.path.join(directory, old_name)
-        new_path = os.path.join(directory, new_name)
-        shutil.move(old_path, new_path)
+        if not container_deployment:
+            old_name, new_name = HANDLER[language_name]
+            old_path = os.path.join(directory, old_name)
+            new_path = os.path.join(directory, new_name)
+            shutil.move(old_path, new_path)
 
         """
             zip the whole directory (the zip-file gets uploaded to gcp later)
@@ -179,7 +190,8 @@ def package_code(
         logging.info("Zip archive size {:2f} MB".format(mbytes))
 
         # rename the main.py back to handler.py
-        shutil.move(new_path, old_path)
+        if not container_deployment:
+            shutil.move(new_path, old_path)
 
         return os.path.join(directory, "{}.zip".format(benchmark)), bytes_size, container_uri
 
@@ -191,8 +203,6 @@ def create_function(
         container_uri: str,
     ) -> "GCPFunction":
 
-        if container_deployment:
-            raise NotImplementedError("Container deployment is not supported in GCP")
 
         package = code_package.code_location
         benchmark = code_package.benchmark
@@ -206,16 +216,19 @@ def create_function(
         function_cfg = FunctionConfig.from_benchmark(code_package)
         architecture = function_cfg.architecture.value
 
-        code_package_name = cast(str, os.path.basename(package))
-        code_package_name = f"{architecture}-{code_package_name}"
-        code_bucket = storage_client.get_bucket(Resources.StorageBucketType.DEPLOYMENT)
-        code_prefix = os.path.join(benchmark, code_package_name)
-        storage_client.upload(code_bucket, package, code_prefix)
-
-        self.logging.info("Uploading function {} code to {}".format(func_name, code_bucket))
+        if container_deployment:
+            full_service_name = GCP.get_full_service_name(project_name, location, func_name)
+            get_req = self.run_client.projects().locations().services().get(name=full_service_name)
+        else:
+            full_func_name = GCP.get_full_function_name(project_name, location, func_name)
+            code_package_name = cast(str, os.path.basename(package))
+            code_package_name = f"{architecture}-{code_package_name}"
+            code_bucket = storage_client.get_bucket(Resources.StorageBucketType.DEPLOYMENT)
+            code_prefix = os.path.join(benchmark, code_package_name)
+            storage_client.upload(code_bucket, package, code_prefix)
 
-        full_func_name = GCP.get_full_function_name(project_name, location, func_name)
-        get_req = self.function_client.projects().locations().functions().get(name=full_func_name)
+            self.logging.info("Uploading function {} code to {}".format(func_name, code_bucket))
+            get_req = self.function_client.projects().locations().functions().get(name=full_func_name)
 
         try:
             get_req.execute()
@@ -223,45 +236,95 @@ def create_function(
 
             envs = self._generate_function_envs(code_package)
 
-            create_req = (
-                self.function_client.projects()
-                .locations()
-                .functions()
-                .create(
-                    location="projects/{project_name}/locations/{location}".format(
-                        project_name=project_name, location=location
-                    ),
-                    body={
-                        "name": full_func_name,
-                        "entryPoint": "handler",
-                        "runtime": code_package.language_name + language_runtime.replace(".", ""),
-                        "availableMemoryMb": memory,
-                        "timeout": str(timeout) + "s",
-                        "httpsTrigger": {},
-                        "ingressSettings": "ALLOW_ALL",
-                        "sourceArchiveUrl": "gs://" + code_bucket + "/" + code_prefix,
-                        "environmentVariables": envs,
-                    },
+            if container_deployment:
+                # In the service model, envs is a list of objects with attributes name and value
+                envs = self._transform_service_envs(envs)
+                self.logging.info("Deploying run container service")
+                parent = f"projects/{project_name}/locations/{location}"
+                create_req = (
+                    self.run_client.projects()
+                    .locations()
+                    .services()
+                    .create(
+                        parent=parent,
+                        serviceId=func_name,
+                        body={
+                            "template": {
+                                "containers": [
+                                    {
+                                        "image": container_uri,
+                                        "ports": [{"containerPort": 8080}],
+                                        "env": envs,
+                                        "resources": {
+                                            "limits": {
+                                                "memory": f"{memory if memory >= 512 else 512}Mi",
+                                            }
+                                        }
+                                    }
+                                ],
+                                "timeout": f"{timeout}s",
+                            },
+                            "ingress": "INGRESS_TRAFFIC_ALL"
+                        },
+                    )
+                )
+            else:
+                create_req = (
+                    self.function_client.projects()
+                    .locations()
+                    .functions()
+                    .create(
+                        location="projects/{project_name}/locations/{location}".format(
+                            project_name=project_name, location=location
+                        ),
+                        body={
+                            "name": full_func_name,
+                            "entryPoint": "handler",
+                            "runtime": code_package.language_name + language_runtime.replace(".", ""),
+                            "availableMemoryMb": memory,
+                            "timeout": str(timeout) + "s",
+                            "httpsTrigger": {},
+                            "ingressSettings": "ALLOW_ALL",
+                            "sourceArchiveUrl": "gs://" + code_bucket + "/" + code_prefix,
+                            "environmentVariables": envs,
+                        },
+                    )
                 )
-            )
             create_req.execute()
             self.logging.info(f"Function {func_name} has been created!")
 
-            allow_unauthenticated_req = (
-                self.function_client.projects()
-                .locations()
-                .functions()
-                .setIamPolicy(
-                    resource=full_func_name,
-                    body={
-                        "policy": {
-                            "bindings": [
-                                {"role": "roles/cloudfunctions.invoker", "members": ["allUsers"]}
-                            ]
-                        }
-                    },
+            if container_deployment:
+                allow_unauthenticated_req = (
+                    self.run_client.projects()
+                    .locations()
+                    .services()
+                    .setIamPolicy(
+                        resource=full_service_name,
+                        body={
+                            "policy": {
+                                "bindings": [
+                                    {"role": "roles/run.invoker", "members": ["allUsers"]}
+                                ]
+                            }
+                        },
+                    )
+                )
+            else:
+                allow_unauthenticated_req = (
+                    self.function_client.projects()
+                    .locations()
+                    .functions()
+                    .setIamPolicy(
+                        resource=full_func_name,
+                        body={
+                            "policy": {
+                                "bindings": [
+                                    {"role": "roles/cloudfunctions.invoker", "members": ["allUsers"]}
+                                ]
+                            }
+                        },
+                    )
                 )
-            )
 
             # Avoid infinite loop
             MAX_RETRIES = 5
@@ -317,25 +380,50 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType)
 
             location = self.config.region
             project_name = self.config.project_name
-            full_func_name = GCP.get_full_function_name(project_name, location, function.name)
             self.logging.info(f"Function {function.name} - waiting for deployment...")
-            our_function_req = (
-                self.function_client.projects().locations().functions().get(name=full_func_name)
-            )
-            deployed = False
-            begin = time.time()
-            while not deployed:
-                status_res = our_function_req.execute()
-                if status_res["status"] == "ACTIVE":
-                    deployed = True
-                else:
-                    time.sleep(3)
-                if time.time() - begin > 300:  # wait 5 minutes; TODO: make it configurable
-                    self.logging.error(f"Failed to deploy function: {function.name}")
-                    raise RuntimeError("Deployment timeout!")
-            self.logging.info(f"Function {function.name} - deployed!")
-            invoke_url = status_res["httpsTrigger"]["url"]
-
+            
+            # Cloud Functions v1 do not have "-" in their name, Cloud Run Services do
+            if "-" in function.name:
+                # Cloud Run Service
+                service_id = function.name.lower()
+                full_service_name = GCP.get_full_service_name(project_name, self.config.region, service_id)
+                self.logging.info(f"Waiting for service {full_service_name} to be ready...")
+                deployed = False
+                begin = time.time()
+                while not deployed:
+                    svc = self.run_client.projects().locations().services().get(name=full_service_name).execute()
+                    condition = svc.get("terminalCondition", {})
+                    if condition.get("type") == "Ready" and condition.get("state") == "CONDITION_SUCCEEDED":
+                        deployed = True
+                    else:
+                        time.sleep(3)
+                    
+                    if time.time() - begin > 300:
+                        self.logging.error(f"Failed to deploy service: {function.name}")
+                        raise RuntimeError("Deployment timeout!")
+                
+                self.logging.info(f"Service {function.name} - deployed!")
+                invoke_url = svc["uri"]
+            
+            else:
+                full_func_name = GCP.get_full_function_name(project_name, location, function.name)
+                our_function_req = (
+                    self.function_client.projects().locations().functions().get(name=full_func_name)
+                )
+                deployed = False
+                begin = time.time()
+                while not deployed:
+                    status_res = our_function_req.execute()
+                    if status_res["status"] == "ACTIVE":
+                        deployed = True
+                    else:
+                        time.sleep(3)
+                    if time.time() - begin > 300:  # wait 5 minutes; TODO: make it configurable
+                        self.logging.error(f"Failed to deploy function: {function.name}")
+                        raise RuntimeError("Deployment timeout!")
+                self.logging.info(f"Function {function.name} - deployed!")
+                invoke_url = status_res["httpsTrigger"]["url"]
+            
             trigger = HTTPTrigger(invoke_url)
         else:
             raise RuntimeError("Not supported!")
@@ -363,9 +451,6 @@ def update_function(
         container_uri: str,
     ):
 
-        if container_deployment:
-            raise NotImplementedError("Container deployment is not supported in GCP")
-
         function = cast(GCPFunction, function)
         language_runtime = code_package.language_version
 
@@ -379,60 +464,123 @@ def update_function(
         storage.upload(bucket, code_package.code_location, code_package_name)
 
         envs = self._generate_function_envs(code_package)
-
-        self.logging.info(f"Uploaded new code package to {bucket}/{code_package_name}")
-        full_func_name = GCP.get_full_function_name(
-            self.config.project_name, self.config.region, function.name
-        )
-        req = (
-            self.function_client.projects()
-            .locations()
-            .functions()
-            .patch(
-                name=full_func_name,
-                body={
-                    "name": full_func_name,
-                    "entryPoint": "handler",
-                    "runtime": code_package.language_name + language_runtime.replace(".", ""),
-                    "availableMemoryMb": function.config.memory,
-                    "timeout": str(function.config.timeout) + "s",
-                    "httpsTrigger": {},
-                    "sourceArchiveUrl": "gs://" + bucket + "/" + code_package_name,
-                    "environmentVariables": envs,
-                },
+        
+        if container_deployment:
+            full_service_name = GCP.get_full_service_name(self.config.project_name, self.config.region, function.name)
+            
+            memory = function.config.memory
+            timeout = function.config.timeout
+            
+            # Cloud Run v2 Service Update
+            service_body = {
+                "template": {
+                    "containers": [
+                        {
+                            "image": container_uri,
+                            "resources": {
+                                "limits": {
+                                    "memory": f"{memory if memory >= 512 else 512}Mi",
+                                }
+                            },
+                            "env": [{"name": k, "value": v} for k, v in envs.items()]
+                        }
+                    ],
+                    "timeout": f"{timeout}s"
+                }
+            }
+            
+            req = self.run_client.projects().locations().services().patch(
+                name=full_service_name,
+                body=service_body
             )
-        )
-        res = req.execute()
-        versionId = res["metadata"]["versionId"]
-        retries = 0
-        last_version = -1
-        while retries < 100:
-            is_deployed, last_version = self.is_deployed(function.name, versionId)
-            if not is_deployed:
-                time.sleep(5)
-                retries += 1
-            else:
-                break
-            if retries > 0 and retries % 10 == 0:
-                self.logging.info(f"Waiting for function deployment, {retries} retries.")
-        if retries == 100:
-            raise RuntimeError(
-                "Failed to publish new function code after 10 attempts. "
-                f"Version {versionId} has not been published, last version {last_version}."
+            
+        else:
+            
+            self.logging.info(f"Uploaded new code package to {bucket}/{code_package_name}")
+            full_func_name = GCP.get_full_function_name(
+                self.config.project_name, self.config.region, function.name
             )
-        self.logging.info("Published new function code and configuration.")
+            req = (
+                self.function_client.projects()
+                .locations()
+                .functions()
+                .patch(
+                    name=full_func_name,
+                    body={
+                        "name": full_func_name,
+                        "entryPoint": "handler",
+                        "runtime": code_package.language_name + language_runtime.replace(".", ""),
+                        "availableMemoryMb": function.config.memory,
+                        "timeout": str(function.config.timeout) + "s",
+                        "httpsTrigger": {},
+                        "sourceArchiveUrl": "gs://" + bucket + "/" + code_package_name,
+                        "environmentVariables": envs,
+                    },
+                )
+            )
+
+        res = req.execute()
+        
+        if container_deployment:
+            self.logging.info(f"Updated Cloud Run service {function.name}, waiting for operation completion...")
+            
+            op_name = res["name"]
+            op_res = self.run_client.projects().locations().operations().wait(name=op_name).execute()
+            
+            if "error" in op_res:
+                raise RuntimeError(f"Cloud Run update failed: {op_res['error']}")
+            
+            self.logging.info(f"Cloud Run service {function.name} updated and ready.")
+            
+        else:
+            versionId = res["metadata"]["versionId"]
+            retries = 0
+            last_version = -1
+            while retries < 100:
+                is_deployed, last_version = self.is_deployed(function.name, versionId)
+                if not is_deployed:
+                    time.sleep(5)
+                    retries += 1
+                else:
+                    break
+                if retries > 0 and retries % 10 == 0:
+                    self.logging.info(f"Waiting for function deployment, {retries} retries.")
+            if retries == 100:
+                raise RuntimeError(
+                    "Failed to publish new function code after 10 attempts. "
+                    f"Version {versionId} has not been published, last version {last_version}."
+                )
+            self.logging.info("Published new function code and configuration.")
 
     def _update_envs(self, full_function_name: str, envs: dict) -> dict:
 
-        get_req = (
-            self.function_client.projects().locations().functions().get(name=full_function_name)
-        )
-        response = get_req.execute()
+        if "/services/" in full_function_name:
+            # Envs are in template.containers[0].env (list of {name, value})
+            get_req = self.run_client.projects().locations().services().get(name=full_function_name)
+            response = get_req.execute()
+            
+            # Extract existing envs
+            existing_envs = {}
+            if "template" in response and "containers" in response["template"]:
+                # Assume single container
+                container = response["template"]["containers"][0]
+                if "env" in container:
+                    for e in container["env"]:
+                        existing_envs[e["name"]] = e["value"]
+            
+            # Merge: new overrides old
+            envs = {**existing_envs, **envs}
+            
+        else:
+            get_req = (
+                self.function_client.projects().locations().functions().get(name=full_function_name)
+            )
+            response = get_req.execute()
 
-        # preserve old variables while adding new ones.
-        # but for conflict, we select the new one
-        if "environmentVariables" in response:
-            envs = {**response["environmentVariables"], **envs}
+            # preserve old variables while adding new ones.
+            # but for conflict, we select the new one
+            if "environmentVariables" in response:
+                envs = {**response["environmentVariables"], **envs}
 
         return envs
 
@@ -450,6 +598,10 @@ def _generate_function_envs(self, code_package: Benchmark) -> dict:
 
         return envs
 
+
+    def _transform_service_envs(self, envs: dict) -> list:
+        return [{"name": k, "value": v} for k, v in envs.items()]
+
     def update_function_configuration(
         self, function: Function, code_package: Benchmark, env_variables: dict = {}
     ):
@@ -457,9 +609,16 @@ def update_function_configuration(
         assert code_package.has_input_processed
 
         function = cast(GCPFunction, function)
-        full_func_name = GCP.get_full_function_name(
-            self.config.project_name, self.config.region, function.name
-        )
+        if code_package.language_name == "pypy":
+            full_func_name = GCP.get_full_service_name(
+                self.config.project_name, 
+                self.config.region, 
+                function.name.replace("_", "-").lower()
+            )
+        else:
+            full_func_name = GCP.get_full_function_name(
+                self.config.project_name, self.config.region, function.name
+            )
 
         envs = self._generate_function_envs(code_package)
         envs = {**envs, **env_variables}
@@ -468,7 +627,45 @@ def update_function_configuration(
         if len(envs) > 0:
             envs = self._update_envs(full_func_name, envs)
 
-        if len(envs) > 0:
+        if "/services/" in full_func_name:
+            # Cloud Run Configuration Update
+            
+            # Prepare envs list
+            env_vars = [{"name": k, "value": v} for k, v in envs.items()]
+            memory = function.config.memory
+            timeout = function.config.timeout
+
+            service_body = {
+                "template": {
+                    "containers": [
+                        {
+                            "resources": {
+                                "limits": {
+                                    "memory": f"{memory}Mi",
+                                }
+                            },
+                            "env": env_vars
+                        }
+                    ],
+                    "timeout": f"{timeout}s"
+                }
+            }
+            
+            req = self.run_client.projects().locations().services().patch(
+                name=full_func_name,
+                body=service_body
+            )
+            res = req.execute()
+            
+            self.logging.info(f"Updated Cloud Run configuration {function.name}, waiting for operation...")
+            op_name = res["name"]
+            op_res = self.run_client.projects().locations().operations().wait(name=op_name).execute()
+            if "error" in op_res:
+                raise RuntimeError(f"Cloud Run config update failed: {op_res['error']}")
+
+            return 0 
+            
+        elif len(envs) > 0:
 
             req = (
                 self.function_client.projects()
@@ -527,6 +724,10 @@ def update_function_configuration(
     def get_full_function_name(project_name: str, location: str, func_name: str):
         return f"projects/{project_name}/locations/{location}/functions/{func_name}"
 
+    @staticmethod
+    def get_full_service_name(project_name: str, location: str, service_name: str):
+        return f"projects/{project_name}/locations/{location}/services/{service_name}"
+
     def prepare_experiment(self, benchmark):
         logs_bucket = self._system_resources.get_storage().add_output_bucket(
             benchmark, suffix="logs"
@@ -720,14 +921,31 @@ def get_functions(self, code_package: Benchmark, function_names: List[str]) -> L
         return functions
 
     def is_deployed(self, func_name: str, versionId: int = -1) -> Tuple[bool, int]:
-        name = GCP.get_full_function_name(self.config.project_name, self.config.region, func_name)
-        function_client = self.get_function_client()
-        status_req = function_client.projects().locations().functions().get(name=name)
-        status_res = status_req.execute()
-        if versionId == -1:
-            return (status_res["status"] == "ACTIVE", status_res["versionId"])
+        
+        if "pypy" in func_name:
+             # Cloud Run Service
+             service_name = func_name.replace("_", "-").lower()
+             name = GCP.get_full_service_name(self.config.project_name, self.config.region, service_name)
+             try:
+                  svc = self.run_client.projects().locations().services().get(name=name).execute()
+                  conditions = svc.get("status", {}).get("conditions", [])
+                  ready = next((c for c in conditions if c["type"] == "Ready"), None)
+                  is_ready = ready and ready["status"] == "True"
+                  return (is_ready, 0)
+             except HttpError:
+                  return (False, -1)
         else:
-            return (status_res["versionId"] == versionId, status_res["versionId"])
+            name = GCP.get_full_function_name(self.config.project_name, self.config.region, func_name)
+            try:
+                function_client = self.get_function_client()
+                status_req = function_client.projects().locations().functions().get(name=name)
+                status_res = status_req.execute()
+                if versionId == -1:
+                    return (status_res["status"] == "ACTIVE", status_res["versionId"])
+                else:
+                    return (status_res["versionId"] == versionId, status_res["versionId"])
+            except HttpError:
+                 return (False, -1)
 
     def deployment_version(self, func: Function) -> int:
         name = GCP.get_full_function_name(self.config.project_name, self.config.region, func.name)

From 421d8ff2a6a4ff9d345bd492816d1041d01f841b Mon Sep 17 00:00:00 2001
From: toooadi <toooadi0@gmail.com>
Date: Fri, 12 Dec 2025 18:46:25 +0100
Subject: [PATCH 14/31] Add GCP Pypy container benchmark files

---
 .../000.microbenchmarks/010.sleep/config.json |   2 +-
 .../010.sleep/pypy/function.py                |   9 ++
 .../100.webapps/120.uploader/config.json      |  12 +-
 .../100.webapps/120.uploader/pypy/function.py |  48 +++++++
 .../120.uploader/pypy/requirements.txt        |   0
 benchmarks/wrappers/gcp/pypy/handler.py       |  72 ++++++++++
 benchmarks/wrappers/gcp/pypy/nosql.py         | 131 ++++++++++++++++++
 benchmarks/wrappers/gcp/pypy/setup.py         |  15 ++
 benchmarks/wrappers/gcp/pypy/storage.py       |  61 ++++++++
 benchmarks/wrappers/gcp/python/setup.py       |  15 ++
 dockerfiles/gcp/pypy/Dockerfile.build         |  26 ++++
 dockerfiles/gcp/pypy/Dockerfile.function      |  39 ++++++
 dockerfiles/gcp/python/Dockerfile.function    |  39 ++++++
 13 files changed, 465 insertions(+), 4 deletions(-)
 create mode 100644 benchmarks/000.microbenchmarks/010.sleep/pypy/function.py
 create mode 100755 benchmarks/100.webapps/120.uploader/pypy/function.py
 create mode 100644 benchmarks/100.webapps/120.uploader/pypy/requirements.txt
 create mode 100644 benchmarks/wrappers/gcp/pypy/handler.py
 create mode 100644 benchmarks/wrappers/gcp/pypy/nosql.py
 create mode 100644 benchmarks/wrappers/gcp/pypy/setup.py
 create mode 100644 benchmarks/wrappers/gcp/pypy/storage.py
 create mode 100644 benchmarks/wrappers/gcp/python/setup.py
 create mode 100755 dockerfiles/gcp/pypy/Dockerfile.build
 create mode 100644 dockerfiles/gcp/pypy/Dockerfile.function
 create mode 100644 dockerfiles/gcp/python/Dockerfile.function

diff --git a/benchmarks/000.microbenchmarks/010.sleep/config.json b/benchmarks/000.microbenchmarks/010.sleep/config.json
index 93ce2f561..53f6349d6 100644
--- a/benchmarks/000.microbenchmarks/010.sleep/config.json
+++ b/benchmarks/000.microbenchmarks/010.sleep/config.json
@@ -1,6 +1,6 @@
 {
   "timeout": 120,
   "memory": 128,
-  "languages": ["python", "nodejs"],
+  "languages": ["python", "nodejs", "pypy"],
   "modules": []
 }
diff --git a/benchmarks/000.microbenchmarks/010.sleep/pypy/function.py b/benchmarks/000.microbenchmarks/010.sleep/pypy/function.py
new file mode 100644
index 000000000..7dda59a57
--- /dev/null
+++ b/benchmarks/000.microbenchmarks/010.sleep/pypy/function.py
@@ -0,0 +1,9 @@
+
+from time import sleep
+
+def handler(event):
+
+    # start timing
+    sleep_time = event.get('sleep')
+    sleep(sleep_time)
+    return { 'result': sleep_time }
diff --git a/benchmarks/100.webapps/120.uploader/config.json b/benchmarks/100.webapps/120.uploader/config.json
index cbc635670..6ab2c4e94 100644
--- a/benchmarks/100.webapps/120.uploader/config.json
+++ b/benchmarks/100.webapps/120.uploader/config.json
@@ -1,6 +1,12 @@
 {
   "timeout": 30,
   "memory": 128,
-  "languages": ["python", "nodejs"],
-  "modules": ["storage"]
-}
+  "languages": [
+    "python",
+    "nodejs",
+    "pypy"
+  ],
+  "modules": [
+    "storage"
+  ]
+}
\ No newline at end of file
diff --git a/benchmarks/100.webapps/120.uploader/pypy/function.py b/benchmarks/100.webapps/120.uploader/pypy/function.py
new file mode 100755
index 000000000..d032bbdb6
--- /dev/null
+++ b/benchmarks/100.webapps/120.uploader/pypy/function.py
@@ -0,0 +1,48 @@
+
+import datetime
+import os
+
+import urllib.request
+
+from . import storage
+client = storage.storage.get_instance()
+
+SEBS_USER_AGENT = "SeBS/1.2 (https://github.com/spcl/serverless-benchmarks) SeBS Benchmark Suite/1.2"
+
+def handler(event):
+
+    bucket = event.get('bucket').get('bucket')
+    output_prefix = event.get('bucket').get('output')
+    url = event.get('object').get('url')
+    name = os.path.basename(url)
+    download_path = '/tmp/{}'.format(name)
+
+    process_begin = datetime.datetime.now()
+    req = urllib.request.Request(url)
+    req.add_header('User-Agent', SEBS_USER_AGENT)
+    with open(download_path, 'wb') as f:
+        with urllib.request.urlopen(req) as response:
+            f.write(response.read())
+    size = os.path.getsize(download_path)
+    process_end = datetime.datetime.now()
+
+    upload_begin = datetime.datetime.now()
+    key_name = client.upload(bucket, os.path.join(output_prefix, name), download_path)
+    upload_end = datetime.datetime.now()
+
+    process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1)
+    upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1)
+    return {
+            'result': {
+                'bucket': bucket,
+                'url': url,
+                'key': key_name
+            },
+            'measurement': {
+                'download_time': 0,
+                'download_size': 0,
+                'upload_time': upload_time,
+                'upload_size': size,
+                'compute_time': process_time
+            }
+    }
diff --git a/benchmarks/100.webapps/120.uploader/pypy/requirements.txt b/benchmarks/100.webapps/120.uploader/pypy/requirements.txt
new file mode 100644
index 000000000..e69de29bb
diff --git a/benchmarks/wrappers/gcp/pypy/handler.py b/benchmarks/wrappers/gcp/pypy/handler.py
new file mode 100644
index 000000000..9b6989611
--- /dev/null
+++ b/benchmarks/wrappers/gcp/pypy/handler.py
@@ -0,0 +1,72 @@
+import datetime, io, json, os, uuid, sys
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages'))
+
+# This variable is defined by SeBS during function creation.
+if 'NOSQL_STORAGE_DATABASE' in os.environ:
+    from function import nosql
+
+    nosql.nosql.get_instance(
+        os.environ['NOSQL_STORAGE_DATABASE']
+    )
+
+
+def handler(req):
+    income_timestamp = datetime.datetime.now().timestamp()
+    req_id = req.headers.get('Function-Execution-Id')
+
+
+    req_json = req.get_json()
+    req_json['request-id'] = req_id
+    req_json['income-timestamp'] = income_timestamp
+    begin = datetime.datetime.now()
+    # We are deployed in the same directorygit status
+    from function import function
+    ret = function.handler(req_json)
+    end = datetime.datetime.now()
+
+
+    log_data = {
+        'output': ret['result']
+    }
+    if 'measurement' in ret:
+        log_data['measurement'] = ret['measurement']
+    if 'logs' in req_json:
+        log_data['time'] = (end - begin) / datetime.timedelta(microseconds=1)
+        results_begin = datetime.datetime.now()
+        from function import storage
+        storage_inst = storage.storage.get_instance()
+        b = req_json.get('logs').get('bucket')
+        storage_inst.upload_stream(b, '{}.json'.format(req_id),
+                                   io.BytesIO(json.dumps(log_data).encode('utf-8')))
+        results_end = datetime.datetime.now()
+        results_time = (results_end - results_begin) / datetime.timedelta(microseconds=1)
+    else:
+        results_time = 0
+
+    # cold test
+    is_cold = False
+    fname = os.path.join('/tmp', 'cold_run')
+    if not os.path.exists(fname):
+        is_cold = True
+        container_id = str(uuid.uuid4())[0:8]
+        with open(fname, 'a') as f:
+            f.write(container_id)
+    else:
+        with open(fname, 'r') as f:
+            container_id = f.read()
+
+    cold_start_var = ""
+    if "cold_start" in os.environ:
+        cold_start_var = os.environ["cold_start"]
+
+    return json.dumps({
+            'begin': begin.strftime('%s.%f'),
+            'end': end.strftime('%s.%f'),
+            'results_time': results_time,
+            'is_cold': is_cold,
+            'result': log_data,
+            'request_id': req_id,
+            'cold_start_var': cold_start_var,
+            'container_id': container_id,
+        }), 200, {'ContentType': 'application/json'}
diff --git a/benchmarks/wrappers/gcp/pypy/nosql.py b/benchmarks/wrappers/gcp/pypy/nosql.py
new file mode 100644
index 000000000..408712857
--- /dev/null
+++ b/benchmarks/wrappers/gcp/pypy/nosql.py
@@ -0,0 +1,131 @@
+from typing import List, Optional, Tuple
+
+from google.cloud import datastore
+
+
+class nosql:
+
+    instance: Optional["nosql"] = None
+
+    """
+    Each benchmark supports up to two keys - one for grouping items,
+    and for unique identification of each item.
+
+    In Google Cloud Datastore, we determine different tables by using
+    its value for `kind` name.
+
+    The primary key is assigned to the `kind` value.
+
+    To implement sorting semantics, we use the ancestor relation:
+    the sorting key is used as the parent.
+    It is the assumption that all related items will have the same parent.
+    """
+
+    def __init__(self, database: str):
+        self._client = datastore.Client(database=database)
+
+    def insert(
+        self,
+        table_name: str,
+        primary_key: Tuple[str, str],
+        secondary_key: Tuple[str, str],
+        data: dict,
+    ):
+
+        parent_key = self._client.key(primary_key[0], primary_key[1])
+        key = self._client.key(
+            # kind determines the table
+            table_name,
+            # main ID key
+            secondary_key[1],
+            # organization key
+            parent=parent_key,
+        )
+
+        val = datastore.Entity(key=key)
+        val.update(data)
+        self._client.put(val)
+
+    def update(
+        self,
+        table_name: str,
+        primary_key: Tuple[str, str],
+        secondary_key: Tuple[str, str],
+        data: dict,
+    ):
+        # There is no direct update - we have to fetch the entire entity and manually change fields.
+        parent_key = self._client.key(primary_key[0], primary_key[1])
+        key = self._client.key(
+            # kind determines the table
+            table_name,
+            # main ID key
+            secondary_key[1],
+            # organization key
+            parent=parent_key,
+        )
+        res = self._client.get(key)
+        if res is None:
+            res = datastore.Entity(key=key)
+        res.update(data)
+        self._client.put(res)
+
+    def get(
+        self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str]
+    ) -> Optional[dict]:
+
+        parent_key = self._client.key(primary_key[0], primary_key[1])
+        key = self._client.key(
+            # kind determines the table
+            table_name,
+            # main ID key
+            secondary_key[1],
+            # organization key
+            parent=parent_key,
+        )
+
+        res = self._client.get(key)
+        if res is None:
+            return None
+
+        # Emulate the kind key
+        res[secondary_key[0]] = secondary_key[1]
+
+        return res
+
+    """
+        This query must involve partition key - it does not scan across partitions.
+    """
+
+    def query(
+        self, table_name: str, primary_key: Tuple[str, str], secondary_key_name: str
+    ) -> List[dict]:
+
+        ancestor = self._client.key(primary_key[0], primary_key[1])
+        query = self._client.query(kind=table_name, ancestor=ancestor)
+        res = list(query.fetch())
+
+        # Emulate the kind key
+        for item in res:
+            item[secondary_key_name] = item.key.name
+
+        return res
+
+    def delete(self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str]):
+        parent_key = self._client.key(primary_key[0], primary_key[1])
+        key = self._client.key(
+            # kind determines the table
+            table_name,
+            # main ID key
+            secondary_key[1],
+            # organization key
+            parent=parent_key,
+        )
+
+        return self._client.delete(key)
+
+    @staticmethod
+    def get_instance(database: Optional[str] = None):
+        if nosql.instance is None:
+            assert database is not None
+            nosql.instance = nosql(database)
+        return nosql.instance
diff --git a/benchmarks/wrappers/gcp/pypy/setup.py b/benchmarks/wrappers/gcp/pypy/setup.py
new file mode 100644
index 000000000..b3d878351
--- /dev/null
+++ b/benchmarks/wrappers/gcp/pypy/setup.py
@@ -0,0 +1,15 @@
+from distutils.core import setup
+from glob import glob
+from pkg_resources import parse_requirements
+
+with open('requirements.txt') as f:
+    requirements = [str(r) for r in parse_requirements(f)]
+
+setup(
+    name='function',
+    install_requires=requirements,
+    packages=['function'],
+    package_dir={'function': '.'},
+    package_data={'function': glob('**', recursive=True)},
+)
+
diff --git a/benchmarks/wrappers/gcp/pypy/storage.py b/benchmarks/wrappers/gcp/pypy/storage.py
new file mode 100644
index 000000000..81163cb34
--- /dev/null
+++ b/benchmarks/wrappers/gcp/pypy/storage.py
@@ -0,0 +1,61 @@
+import io
+import os
+import uuid
+
+from google.cloud import storage as gcp_storage
+
+
+class storage:
+    instance = None
+    client = None
+
+    def __init__(self):
+        self.client = gcp_storage.Client()
+
+    @staticmethod
+    def unique_name(name):
+        name, extension = os.path.splitext(name)
+        return '{name}.{random}{extension}'.format(
+                    name=name,
+                    extension=extension,
+                    random=str(uuid.uuid4()).split('-')[0]
+                )
+
+    def upload(self, bucket, file, filepath):
+        key_name = storage.unique_name(file)
+        bucket_instance = self.client.bucket(bucket)
+        blob = bucket_instance.blob(key_name)
+        blob.upload_from_filename(filepath)
+        return key_name
+
+    def download(self, bucket, file, filepath):
+        bucket_instance = self.client.bucket(bucket)
+        blob = bucket_instance.blob(file)
+        blob.download_to_filename(filepath)
+
+    def download_directory(self, bucket, prefix, path):
+        objects = self.client.bucket(bucket).list_blobs(prefix=prefix)
+        for obj in objects:
+            file_name = obj.name
+            path_to_file = os.path.dirname(file_name)
+            os.makedirs(os.path.join(path, path_to_file), exist_ok=True)
+            self.download(bucket, file_name, os.path.join(path, file_name))
+
+    def upload_stream(self, bucket, file, data):
+        key_name = storage.unique_name(file)
+        bucket_instance = self.client.bucket(bucket)
+        blob = bucket_instance.blob(key_name)
+        blob.upload_from_file(data)
+        return key_name
+
+    def download_stream(self, bucket, file):
+        data = io.BytesIO()
+        bucket_instance = self.client.bucket(bucket)
+        blob = bucket_instance.blob(file)
+        blob.download_to_file(data)
+        return data.getbuffer()
+
+    def get_instance():
+        if storage.instance is None:
+            storage.instance = storage()
+        return storage.instance
diff --git a/benchmarks/wrappers/gcp/python/setup.py b/benchmarks/wrappers/gcp/python/setup.py
new file mode 100644
index 000000000..b3d878351
--- /dev/null
+++ b/benchmarks/wrappers/gcp/python/setup.py
@@ -0,0 +1,15 @@
+from distutils.core import setup
+from glob import glob
+from pkg_resources import parse_requirements
+
+with open('requirements.txt') as f:
+    requirements = [str(r) for r in parse_requirements(f)]
+
+setup(
+    name='function',
+    install_requires=requirements,
+    packages=['function'],
+    package_dir={'function': '.'},
+    package_data={'function': glob('**', recursive=True)},
+)
+
diff --git a/dockerfiles/gcp/pypy/Dockerfile.build b/dockerfiles/gcp/pypy/Dockerfile.build
new file mode 100755
index 000000000..c2c918dcf
--- /dev/null
+++ b/dockerfiles/gcp/pypy/Dockerfile.build
@@ -0,0 +1,26 @@
+ARG BASE_IMAGE
+FROM ${BASE_IMAGE}
+ARG VERSION
+ENV PYTHON_VERSION=${VERSION}
+ENV DEBIAN_FRONTEND="noninteractive"
+ENV TZ="Europe/Zurich"
+
+RUN apt-get update\
+  && apt-get install -y --no-install-recommends gosu gcc build-essential libxml2 libxml2-dev zlib1g-dev  gpg-agent zip\
+  && apt-get update\
+  && apt-get purge -y --auto-remove
+
+#RUN export PATH=/opt/python3.7/bin:/opt/python3.6/bin:/opt/python3.5/bin:/opt/python3.4/bin:$PATH
+RUN python -mvenv /sebs/env
+ENV PATH /sebs/env/bin:${PATH}
+ENV VIRTUAL_ENV /sebs/env
+
+RUN mkdir -p /sebs/
+COPY dockerfiles/pypy_installer.sh /sebs/installer.sh
+COPY dockerfiles/entrypoint.sh /sebs/entrypoint.sh
+RUN chmod +x /sebs/entrypoint.sh
+
+# useradd and groupmod is installed in /usr/sbin which is not in PATH
+ENV SCRIPT_FILE=/mnt/function/package.sh
+CMD /bin/bash /sebs/installer.sh
+ENTRYPOINT ["/sebs/entrypoint.sh"]
diff --git a/dockerfiles/gcp/pypy/Dockerfile.function b/dockerfiles/gcp/pypy/Dockerfile.function
new file mode 100644
index 000000000..90038269e
--- /dev/null
+++ b/dockerfiles/gcp/pypy/Dockerfile.function
@@ -0,0 +1,39 @@
+ARG BASE_IMAGE
+FROM $BASE_IMAGE
+ARG VERSION
+ENV PYTHON_VERSION=${VERSION}
+ARG TARGET_ARCHITECTURE
+
+COPY . function/
+
+ENV PLATFORM_ARG=""
+
+RUN pip install functions-framework
+
+RUN touch function/__init__.py \
+    && if [[ "${TARGET_ARCHITECTURE}" == "arm64" ]]; then \
+      export PLATFORM_ARG="--platform manylinux_2_17_aarch64 --only-binary=:all:"; \
+    fi \
+    && if [[ "${TARGET_ARCHITECTURE}" == "arm64" ]] && test -f "function/requirements.txt.arm.${PYTHON_VERSION}"; then \
+        pip install --no-cache-dir ${PLATFORM_ARG} --target . \
+            -r function/requirements.txt \
+            -r function/requirements.txt.arm.${PYTHON_VERSION} \
+            function/ && \
+        pip cache purge; \
+    elif test -f "function/requirements.txt.${PYTHON_VERSION}"; then \
+        pip install --no-cache-dir ${PLATFORM_ARG} --target . \
+            -r function/requirements.txt \
+            -r function/requirements.txt.${PYTHON_VERSION} \
+            function/ && \
+        pip cache purge; \
+    else \
+        pip install --no-cache-dir ${PLATFORM_ARG} --target . \
+            -r function/requirements.txt \
+            function/ && \
+        pip cache purge; \
+    fi
+
+
+ENV PYTHONPATH="/"
+
+CMD ["functions-framework", "--target=handler", "--source=function/handler.py"]
\ No newline at end of file
diff --git a/dockerfiles/gcp/python/Dockerfile.function b/dockerfiles/gcp/python/Dockerfile.function
new file mode 100644
index 000000000..90038269e
--- /dev/null
+++ b/dockerfiles/gcp/python/Dockerfile.function
@@ -0,0 +1,39 @@
+ARG BASE_IMAGE
+FROM $BASE_IMAGE
+ARG VERSION
+ENV PYTHON_VERSION=${VERSION}
+ARG TARGET_ARCHITECTURE
+
+COPY . function/
+
+ENV PLATFORM_ARG=""
+
+RUN pip install functions-framework
+
+RUN touch function/__init__.py \
+    && if [[ "${TARGET_ARCHITECTURE}" == "arm64" ]]; then \
+      export PLATFORM_ARG="--platform manylinux_2_17_aarch64 --only-binary=:all:"; \
+    fi \
+    && if [[ "${TARGET_ARCHITECTURE}" == "arm64" ]] && test -f "function/requirements.txt.arm.${PYTHON_VERSION}"; then \
+        pip install --no-cache-dir ${PLATFORM_ARG} --target . \
+            -r function/requirements.txt \
+            -r function/requirements.txt.arm.${PYTHON_VERSION} \
+            function/ && \
+        pip cache purge; \
+    elif test -f "function/requirements.txt.${PYTHON_VERSION}"; then \
+        pip install --no-cache-dir ${PLATFORM_ARG} --target . \
+            -r function/requirements.txt \
+            -r function/requirements.txt.${PYTHON_VERSION} \
+            function/ && \
+        pip cache purge; \
+    else \
+        pip install --no-cache-dir ${PLATFORM_ARG} --target . \
+            -r function/requirements.txt \
+            function/ && \
+        pip cache purge; \
+    fi
+
+
+ENV PYTHONPATH="/"
+
+CMD ["functions-framework", "--target=handler", "--source=function/handler.py"]
\ No newline at end of file

From 686719ba5919c7339c584986ad04b4aa42c697c7 Mon Sep 17 00:00:00 2001
From: Markbeep <mrkswrn@gmail.com>
Date: Sat, 13 Dec 2025 11:24:13 +0100
Subject: [PATCH 15/31] fix gcp images to specific runtime versions instead of
 ubuntu

---
 config/systems.json                     | 14 ++++++--------
 dockerfiles/gcp/nodejs/Dockerfile.build |  6 +-----
 dockerfiles/gcp/python/Dockerfile.build |  7 +++----
 sebs/faas/container.py                  |  5 -----
 sebs/gcp/container.py                   |  8 --------
 5 files changed, 10 insertions(+), 30 deletions(-)

diff --git a/config/systems.json b/config/systems.json
index 8387db1cc..f51800a2d 100644
--- a/config/systems.json
+++ b/config/systems.json
@@ -260,11 +260,9 @@
       "python": {
         "base_images": {
           "x64": {
-            "3.8": "ubuntu:22.04",
-            "3.9": "ubuntu:22.04",
-            "3.10": "ubuntu:22.04",
-            "3.11": "ubuntu:22.04",
-            "3.12": "ubuntu:22.04"
+            "3.10": "python:3.10-slim",
+            "3.11": "python:3.11-slim",
+            "3.12": "python:3.12-slim"
           }
         },
         "images": [
@@ -322,8 +320,8 @@
       "nodejs": {
         "base_images": {
           "x64": {
-            "18": "ubuntu:22.04",
-            "20": "ubuntu:22.04"
+            "20": "node:20-slim",
+            "22": "node:22-slim"
           }
         },
         "images": [
@@ -420,4 +418,4 @@
       "container"
     ]
   }
-}
\ No newline at end of file
+}
diff --git a/dockerfiles/gcp/nodejs/Dockerfile.build b/dockerfiles/gcp/nodejs/Dockerfile.build
index 477f236bc..d98780c50 100755
--- a/dockerfiles/gcp/nodejs/Dockerfile.build
+++ b/dockerfiles/gcp/nodejs/Dockerfile.build
@@ -1,12 +1,8 @@
 ARG BASE_IMAGE
 FROM ${BASE_IMAGE}
 ARG VERSION
-ENV NVM_DIR=/nvm
 
-#RUN install_node --ignore-verification-failure v${VERSION}
-RUN apt-get update && apt-get install -y gosu wget
-RUN mkdir -p ${NVM_DIR} && wget -qO- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash
-RUN . ${NVM_DIR}/nvm.sh && nvm install ${VERSION} && nvm alias default ${VERSION} && nvm use default
+COPY --from=tianon/gosu:1.19-debian /usr/local/bin/gosu /usr/local/bin/gosu
 
 RUN mkdir -p /sebs/
 COPY dockerfiles/nodejs_installer.sh /sebs/installer.sh
diff --git a/dockerfiles/gcp/python/Dockerfile.build b/dockerfiles/gcp/python/Dockerfile.build
index 88554d230..ae52345b1 100755
--- a/dockerfiles/gcp/python/Dockerfile.build
+++ b/dockerfiles/gcp/python/Dockerfile.build
@@ -5,11 +5,10 @@ ENV PYTHON_VERSION=${VERSION}
 ENV DEBIAN_FRONTEND="noninteractive"
 ENV TZ="Europe/Zurich"
 
+COPY --from=tianon/gosu:1.19-debian /usr/local/bin/gosu /usr/local/bin/gosu
+
 RUN apt-get update\
-  && apt-get install -y --no-install-recommends gosu gcc build-essential libxml2 libxml2-dev zlib1g-dev software-properties-common gpg-agent zip\
-  && add-apt-repository -y ppa:deadsnakes/ppa\
-  && apt-get update\
-  && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-venv python${PYTHON_VERSION}-dev\
+  && apt-get install -y --no-install-recommends gcc build-essential libxml2 libxml2-dev zlib1g-dev gpg-agent zip\
   && apt-get purge -y --auto-remove
 
 #RUN export PATH=/opt/python3.7/bin:/opt/python3.6/bin:/opt/python3.5/bin:/opt/python3.4/bin:$PATH
diff --git a/sebs/faas/container.py b/sebs/faas/container.py
index 073218ed3..b17525f7b 100644
--- a/sebs/faas/container.py
+++ b/sebs/faas/container.py
@@ -126,9 +126,6 @@ def registry_name(
     ) -> Tuple[str, str, str, str]:
         pass
 
-    def get_adapted_image_name(self, image_name: str, language_name: str,language_version: str, architecture: str):
-        return image_name
-
     def build_base_image(
         self,
         directory: str,
@@ -199,8 +196,6 @@ def build_base_image(
                 "our documentation. We recommend QEMU as it can be configured to run automatically."
             )
 
-        builder_image = self.get_adapted_image_name(builder_image, language_name, language_version, architecture)
-
         buildargs = {
             "VERSION": language_version,
             "BASE_IMAGE": builder_image,
diff --git a/sebs/gcp/container.py b/sebs/gcp/container.py
index 2159ac7bf..9afcc7664 100644
--- a/sebs/gcp/container.py
+++ b/sebs/gcp/container.py
@@ -61,14 +61,6 @@ def find_image(self, repository_name, image_tag) -> bool:
             raise e
         return False
 
-    def get_adapted_image_name(self, image_name: str, language_name: str,language_version: str, architecture: str):
-        if language_name == "python":
-            return f"python:{language_version}-slim"
-        elif language_name == "nodejs":
-            return f"node:{language_version}-slim"
-
-        return image_name
-
     def push_image(self, repository_uri, image_tag):        
         self.logging.info("Authenticating Docker against Artifact Registry...")
         self.creds.refresh(Request())

From 8e4a8be57b00bbf95e67fef45051a09bc58d84cc Mon Sep 17 00:00:00 2001
From: toooadi <toooadi0@gmail.com>
Date: Sat, 13 Dec 2025 22:11:00 +0100
Subject: [PATCH 16/31] Adapt AWS deployments

---
 dockerfiles/aws/pypy/Dockerfile.build    | 17 ++++++++++++++
 dockerfiles/aws/pypy/Dockerfile.function | 28 ++++++------------------
 2 files changed, 24 insertions(+), 21 deletions(-)
 create mode 100644 dockerfiles/aws/pypy/Dockerfile.build

diff --git a/dockerfiles/aws/pypy/Dockerfile.build b/dockerfiles/aws/pypy/Dockerfile.build
new file mode 100644
index 000000000..20d50e6ba
--- /dev/null
+++ b/dockerfiles/aws/pypy/Dockerfile.build
@@ -0,0 +1,17 @@
+ARG BASE_IMAGE
+FROM ${BASE_IMAGE}
+ARG VERSION
+ENV PYTHON_VERSION=${VERSION}
+
+COPY --from=tianon/gosu:1.19-debian /usr/local/bin/gosu /usr/local/bin/gosu
+
+RUN mkdir -p /sebs/
+COPY dockerfiles/pypy_installer.sh /sebs/installer.sh
+COPY dockerfiles/entrypoint.sh /sebs/entrypoint.sh
+RUN chmod +x /sebs/entrypoint.sh
+
+# useradd and groupmod is installed in /usr/sbin which is not in PATH
+ENV PATH=/usr/sbin:$PATH
+ENV SCRIPT_FILE=/mnt/function/package.sh
+CMD /bin/bash /sebs/installer.sh
+ENTRYPOINT ["/sebs/entrypoint.sh"]
diff --git a/dockerfiles/aws/pypy/Dockerfile.function b/dockerfiles/aws/pypy/Dockerfile.function
index d42190a97..4a570aab4 100644
--- a/dockerfiles/aws/pypy/Dockerfile.function
+++ b/dockerfiles/aws/pypy/Dockerfile.function
@@ -1,29 +1,15 @@
 ARG BASE_IMAGE
-FROM $BASE_IMAGE
-ARG VERSION
-ENV PYTHON_VERSION=${VERSION}
 ARG TARGET_ARCHITECTURE
 
-COPY . function/
+FROM --platform=linux/amd64 ${BASE_IMAGE} AS base-x64
+FROM --platform=linux/arm64/v8 ${BASE_IMAGE} AS base-arm64
 
-RUN dnf install -y tar bzip2 gzip glibc-langpack-en
-ENV LANG=en_US.UTF-8
-ENV LC_ALL=en_US.UTF-8
-
-RUN if [ "${TARGET_ARCHITECTURE}" = "arm64" ]; then \
-      export PYPY_ARCH="aarch64"; \
-    else \
-      export PYPY_ARCH="linux64"; \
-    fi \
-    && curl -L -o pypy.tar.bz2 https://downloads.python.org/pypy/pypy3.11-v7.3.20-${PYPY_ARCH}.tar.bz2 \
-    && tar -xjf pypy.tar.bz2 \
-    && mv  pypy3.11-v7.3.20-${PYPY_ARCH} /opt/pypy \
-    && rm pypy.tar.bz2
-RUN chmod -R +x /opt/pypy/bin
-ENV PATH=/opt/pypy/bin:$PATH
-RUN python -m ensurepip \
-    && python -mpip install -U pip wheel
+FROM base-${TARGET_ARCHITECTURE}
+ARG TARGET_ARCHITECTURE
+ARG VERSION
+ENV PYTHON_VERSION=${VERSION}
 
+COPY . function/
 ENV PLATFORM_ARG=""
 
 RUN touch function/__init__.py

From e783b8fa8a3b90b8cb1b98bcbf5538337f41283c Mon Sep 17 00:00:00 2001
From: toooadi <toooadi0@gmail.com>
Date: Sat, 13 Dec 2025 22:16:05 +0100
Subject: [PATCH 17/31] Add systems.json for aws

---
 config/systems.json | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/config/systems.json b/config/systems.json
index f51800a2d..87e5621f4 100644
--- a/config/systems.json
+++ b/config/systems.json
@@ -156,10 +156,10 @@
       "pypy": {
         "base_images": {
           "x64": {
-            "3.11": "amazon/aws-lambda-provided:al2023-x86_64"
+            "3.11": "pypy:3.11-slim"
           },
           "arm64": {
-            "3.11": "amazon/aws-lambda-provided:al2023-arm64"
+            "3.11": "pypy:3.11-slim"
           }
         },
         "images": [
@@ -418,4 +418,4 @@
       "container"
     ]
   }
-}
+}
\ No newline at end of file

From d43af67eaea1fe128521b710ffd5866bb5036d9b Mon Sep 17 00:00:00 2001
From: toooadi <toooadi0@gmail.com>
Date: Sun, 14 Dec 2025 10:26:37 +0100
Subject: [PATCH 18/31] GCP add arm64 support (containers)

---
 config/systems.json                        | 15 +++++++++++----
 dockerfiles/gcp/pypy/Dockerfile.function   |  9 +++++++--
 dockerfiles/gcp/python/Dockerfile.function |  9 +++++++--
 sebs/gcp/gcp.py                            |  7 ++++++-
 4 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/config/systems.json b/config/systems.json
index 87e5621f4..645ee925c 100644
--- a/config/systems.json
+++ b/config/systems.json
@@ -263,6 +263,11 @@
             "3.10": "python:3.10-slim",
             "3.11": "python:3.11-slim",
             "3.12": "python:3.12-slim"
+          },
+          "arm64": {
+            "3.10": "python:3.10-slim",
+            "3.11": "python:3.11-slim",
+            "3.12": "python:3.12-slim"
           }
         },
         "images": [
@@ -290,9 +295,10 @@
       "pypy": {
         "base_images": {
           "x64": {
-            "3.10": "pypy:3.10-slim",
-            "3.11": "pypy:3.11-slim",
-            "3.12": "pypy:3.12-slim"
+            "3.11": "pypy:3.11-slim"
+          },
+          "arm64": {
+            "3.11": "pypy:3.11-slim"
           }
         },
         "images": [
@@ -346,7 +352,8 @@
       }
     },
     "architecture": [
-      "x64"
+      "x64",
+      "arm64"
     ],
     "deployments": [
       "package",
diff --git a/dockerfiles/gcp/pypy/Dockerfile.function b/dockerfiles/gcp/pypy/Dockerfile.function
index 90038269e..f165437e6 100644
--- a/dockerfiles/gcp/pypy/Dockerfile.function
+++ b/dockerfiles/gcp/pypy/Dockerfile.function
@@ -1,8 +1,13 @@
 ARG BASE_IMAGE
-FROM $BASE_IMAGE
+ARG TARGET_ARCHITECTURE
+
+FROM --platform=linux/amd64 ${BASE_IMAGE} AS base-x64
+FROM --platform=linux/arm64/v8 ${BASE_IMAGE} AS base-arm64
+
+FROM base-${TARGET_ARCHITECTURE}
+ARG TARGET_ARCHITECTURE
 ARG VERSION
 ENV PYTHON_VERSION=${VERSION}
-ARG TARGET_ARCHITECTURE
 
 COPY . function/
 
diff --git a/dockerfiles/gcp/python/Dockerfile.function b/dockerfiles/gcp/python/Dockerfile.function
index 90038269e..f165437e6 100644
--- a/dockerfiles/gcp/python/Dockerfile.function
+++ b/dockerfiles/gcp/python/Dockerfile.function
@@ -1,8 +1,13 @@
 ARG BASE_IMAGE
-FROM $BASE_IMAGE
+ARG TARGET_ARCHITECTURE
+
+FROM --platform=linux/amd64 ${BASE_IMAGE} AS base-x64
+FROM --platform=linux/arm64/v8 ${BASE_IMAGE} AS base-arm64
+
+FROM base-${TARGET_ARCHITECTURE}
+ARG TARGET_ARCHITECTURE
 ARG VERSION
 ENV PYTHON_VERSION=${VERSION}
-ARG TARGET_ARCHITECTURE
 
 COPY . function/
 
diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py
index 0c709194f..8e722a9c3 100644
--- a/sebs/gcp/gcp.py
+++ b/sebs/gcp/gcp.py
@@ -95,11 +95,12 @@ def default_function_name(
     ) -> str:
         # Create function name
         resource_id = resources.resources_id if resources else self.config.resources.resources_id
-        func_name = "sebs-{}-{}-{}-{}".format(
+        func_name = "sebs-{}-{}-{}-{}-{}".format(
             resource_id,
             code_package.benchmark,
             code_package.language_name,
             code_package.language_version,
+            code_package.architecture
         )
         return GCP.format_function_name(func_name) if not code_package.container_deployment else func_name.replace(".", "-")
 
@@ -216,6 +217,10 @@ def create_function(
         function_cfg = FunctionConfig.from_benchmark(code_package)
         architecture = function_cfg.architecture.value
 
+        if architecture == "arm64" and not container_deployment:
+            raise RuntimeError("GCP does not support arm64 for non-container deployments")
+            
+
         if container_deployment:
             full_service_name = GCP.get_full_service_name(project_name, location, func_name)
             get_req = self.run_client.projects().locations().services().get(name=full_service_name)

From 9402163a03d2060c1fc80af7a7beb5ad71322b79 Mon Sep 17 00:00:00 2001
From: toooadi <toooadi0@gmail.com>
Date: Sun, 14 Dec 2025 22:27:59 +0100
Subject: [PATCH 19/31] Add support for non-wheel images

---
 dockerfiles/aws/pypy/Dockerfile.function | 12 ++++++++++++
 dockerfiles/gcp/pypy/Dockerfile.function | 12 ++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/dockerfiles/aws/pypy/Dockerfile.function b/dockerfiles/aws/pypy/Dockerfile.function
index 4a570aab4..187d4ab39 100644
--- a/dockerfiles/aws/pypy/Dockerfile.function
+++ b/dockerfiles/aws/pypy/Dockerfile.function
@@ -12,6 +12,18 @@ ENV PYTHON_VERSION=${VERSION}
 COPY . function/
 ENV PLATFORM_ARG=""
 
+# Non-wheel images
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    gcc \
+    libc-dev \
+    libjpeg-dev \
+    zlib1g-dev \
+    libtiff-dev \
+    libfreetype6-dev \
+    liblcms2-dev \
+    libwebp-dev \
+    && rm -rf /var/lib/apt/lists/*
+    
 RUN touch function/__init__.py
     # Probably remove this conditional, might break pypy builds, might lead to installation of CPython libraries
 RUN if [ "${TARGET_ARCHITECTURE}" = "arm64" ]; then \
diff --git a/dockerfiles/gcp/pypy/Dockerfile.function b/dockerfiles/gcp/pypy/Dockerfile.function
index f165437e6..088bbccae 100644
--- a/dockerfiles/gcp/pypy/Dockerfile.function
+++ b/dockerfiles/gcp/pypy/Dockerfile.function
@@ -15,6 +15,18 @@ ENV PLATFORM_ARG=""
 
 RUN pip install functions-framework
 
+# Non-wheel images
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    gcc \
+    libc-dev \
+    libjpeg-dev \
+    zlib1g-dev \
+    libtiff-dev \
+    libfreetype6-dev \
+    liblcms2-dev \
+    libwebp-dev \
+    && rm -rf /var/lib/apt/lists/*
+
 RUN touch function/__init__.py \
     && if [[ "${TARGET_ARCHITECTURE}" == "arm64" ]]; then \
       export PLATFORM_ARG="--platform manylinux_2_17_aarch64 --only-binary=:all:"; \

From 596d44043e3557b56a7b2a1448f78b086c7ff5cd Mon Sep 17 00:00:00 2001
From: toooadi <toooadi0@gmail.com>
Date: Sun, 14 Dec 2025 23:29:05 +0100
Subject: [PATCH 20/31] Add further non-wheel dependencies

---
 dockerfiles/aws/pypy/Dockerfile.function | 3 +++
 dockerfiles/gcp/pypy/Dockerfile.function | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/dockerfiles/aws/pypy/Dockerfile.function b/dockerfiles/aws/pypy/Dockerfile.function
index 187d4ab39..a130164b0 100644
--- a/dockerfiles/aws/pypy/Dockerfile.function
+++ b/dockerfiles/aws/pypy/Dockerfile.function
@@ -22,6 +22,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     libfreetype6-dev \
     liblcms2-dev \
     libwebp-dev \
+    g++ \
+    cmake \
+    make \
     && rm -rf /var/lib/apt/lists/*
     
 RUN touch function/__init__.py
diff --git a/dockerfiles/gcp/pypy/Dockerfile.function b/dockerfiles/gcp/pypy/Dockerfile.function
index 088bbccae..18b53467c 100644
--- a/dockerfiles/gcp/pypy/Dockerfile.function
+++ b/dockerfiles/gcp/pypy/Dockerfile.function
@@ -25,6 +25,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     libfreetype6-dev \
     liblcms2-dev \
     libwebp-dev \
+    g++ \
+    cmake \
+    make \
     && rm -rf /var/lib/apt/lists/*
 
 RUN touch function/__init__.py \

From e7095ff55188a3d7361882b23fd76d11f8885c93 Mon Sep 17 00:00:00 2001
From: toooadi <toooadi0@gmail.com>
Date: Sun, 14 Dec 2025 23:30:25 +0100
Subject: [PATCH 21/31] Add more benchmarks

---
 .../210.thumbnailer/config.json               | 12 +++-
 .../210.thumbnailer/pypy/README.md            | 12 ++++
 .../210.thumbnailer/pypy/function.py          | 70 +++++++++++++++++++
 .../210.thumbnailer/pypy/requirements.txt     |  0
 .../pypy/requirements.txt.3.10                |  1 +
 .../pypy/requirements.txt.3.11                |  1 +
 .../pypy/requirements.txt.3.12                |  1 +
 .../210.thumbnailer/pypy/requirements.txt.3.6 |  1 +
 .../210.thumbnailer/pypy/requirements.txt.3.7 |  1 +
 .../210.thumbnailer/pypy/requirements.txt.3.8 |  1 +
 .../210.thumbnailer/pypy/requirements.txt.3.9 |  1 +
 .../pypy/requirements.txt.arm.3.8             |  1 +
 .../pypy/requirements.txt.arm.3.9             |  1 +
 .../300.utilities/311.compression/config.json | 12 +++-
 .../311.compression/pypy/README.md            | 12 ++++
 .../311.compression/pypy/function.py          | 59 ++++++++++++++++
 .../311.compression/pypy/requirements.txt     |  0
 .../501.graph-pagerank/config.json            |  7 +-
 .../501.graph-pagerank/pypy/function.py       | 29 ++++++++
 .../501.graph-pagerank/pypy/requirements.txt  |  0
 .../pypy/requirements.txt.3.10                |  1 +
 .../pypy/requirements.txt.3.11                |  1 +
 .../pypy/requirements.txt.3.12                |  1 +
 .../pypy/requirements.txt.3.7                 |  1 +
 .../pypy/requirements.txt.3.8                 |  1 +
 .../pypy/requirements.txt.3.9                 |  1 +
 .../pypy/requirements.txt.arm.3.8             |  1 +
 .../pypy/requirements.txt.arm.3.9             |  1 +
 28 files changed, 222 insertions(+), 8 deletions(-)
 create mode 100755 benchmarks/200.multimedia/210.thumbnailer/pypy/README.md
 create mode 100755 benchmarks/200.multimedia/210.thumbnailer/pypy/function.py
 create mode 100644 benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt
 create mode 100644 benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.10
 create mode 100644 benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.11
 create mode 100644 benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.12
 create mode 100755 benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.6
 create mode 100755 benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.7
 create mode 100755 benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.8
 create mode 100755 benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.9
 create mode 100644 benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.arm.3.8
 create mode 100644 benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.arm.3.9
 create mode 100755 benchmarks/300.utilities/311.compression/pypy/README.md
 create mode 100755 benchmarks/300.utilities/311.compression/pypy/function.py
 create mode 100644 benchmarks/300.utilities/311.compression/pypy/requirements.txt
 create mode 100755 benchmarks/500.scientific/501.graph-pagerank/pypy/function.py
 create mode 100755 benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt
 create mode 100644 benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.10
 create mode 100644 benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.11
 create mode 100644 benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.12
 create mode 100755 benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.7
 create mode 100755 benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.8
 create mode 100755 benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.9
 create mode 100644 benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.arm.3.8
 create mode 100644 benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.arm.3.9

diff --git a/benchmarks/200.multimedia/210.thumbnailer/config.json b/benchmarks/200.multimedia/210.thumbnailer/config.json
index 8edb99e52..676180d49 100644
--- a/benchmarks/200.multimedia/210.thumbnailer/config.json
+++ b/benchmarks/200.multimedia/210.thumbnailer/config.json
@@ -1,6 +1,12 @@
 {
   "timeout": 60,
   "memory": 256,
-  "languages": ["python", "nodejs"],
-  "modules": ["storage"]
-}
+  "languages": [
+    "python",
+    "nodejs",
+    "pypy"
+  ],
+  "modules": [
+    "storage"
+  ]
+}
\ No newline at end of file
diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/README.md b/benchmarks/200.multimedia/210.thumbnailer/pypy/README.md
new file mode 100755
index 000000000..fc6a75265
--- /dev/null
+++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/README.md
@@ -0,0 +1,12 @@
+# Image Processing
+
+A simple pipeline performing basic image operations with Pillow.
+
+[Inspired by AWS Lambda tutorial code.](https://docs.aws.amazon.com/lambda/latest/dg/with-s3-example-deployment-pkg.htm)
+
+### Instructions
+
+1. Deploy Docker container with function code and input data.
+
+2. Example of JSON payload: `{ "dir": "input_data", "id": "1" }'`.
+
diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/function.py b/benchmarks/200.multimedia/210.thumbnailer/pypy/function.py
new file mode 100755
index 000000000..20527067b
--- /dev/null
+++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/function.py
@@ -0,0 +1,70 @@
+import datetime
+import io
+import os
+import sys
+import uuid
+from urllib.parse import unquote_plus
+from PIL import Image
+
+from . import storage
+client = storage.storage.get_instance()
+
+# Disk-based solution
+#def resize_image(image_path, resized_path, w, h):
+#    with Image.open(image_path) as image:
+#        image.thumbnail((w,h))
+#        image.save(resized_path)
+
+# Memory-based solution
+def resize_image(image_bytes, w, h):
+    with Image.open(io.BytesIO(image_bytes)) as image:
+        image.thumbnail((w,h))
+        out = io.BytesIO()
+        image.save(out, format='jpeg')
+        # necessary to rewind to the beginning of the buffer
+        out.seek(0)
+        return out
+
+def handler(event):
+  
+    bucket = event.get('bucket').get('bucket')
+    input_prefix = event.get('bucket').get('input')
+    output_prefix = event.get('bucket').get('output')
+    key = unquote_plus(event.get('object').get('key'))
+    width = event.get('object').get('width')
+    height = event.get('object').get('height')
+    # UUID to handle multiple calls
+    #download_path = '/tmp/{}-{}'.format(uuid.uuid4(), key)
+    #upload_path = '/tmp/resized-{}'.format(key)
+    #client.download(input_bucket, key, download_path)
+    #resize_image(download_path, upload_path, width, height)
+    #client.upload(output_bucket, key, upload_path)
+    download_begin = datetime.datetime.now()
+    img = client.download_stream(bucket, os.path.join(input_prefix, key))
+    download_end = datetime.datetime.now()
+
+    process_begin = datetime.datetime.now()
+    resized = resize_image(img, width, height)
+    resized_size = resized.getbuffer().nbytes
+    process_end = datetime.datetime.now()
+
+    upload_begin = datetime.datetime.now()
+    key_name = client.upload_stream(bucket, os.path.join(output_prefix, key), resized)
+    upload_end = datetime.datetime.now()
+
+    download_time = (download_end - download_begin) / datetime.timedelta(microseconds=1)
+    upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1)
+    process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1)
+    return {
+            'result': {
+                'bucket': bucket,
+                'key': key_name
+            },
+            'measurement': {
+                'download_time': download_time,
+                'download_size': len(img),
+                'upload_time': upload_time,
+                'upload_size': resized_size,
+                'compute_time': process_time
+            }
+    }
diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt
new file mode 100644
index 000000000..e69de29bb
diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.10 b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.10
new file mode 100644
index 000000000..9caa46c8d
--- /dev/null
+++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.10
@@ -0,0 +1 @@
+pillow==10.3.0
diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.11 b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.11
new file mode 100644
index 000000000..9caa46c8d
--- /dev/null
+++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.11
@@ -0,0 +1 @@
+pillow==10.3.0
diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.12 b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.12
new file mode 100644
index 000000000..9caa46c8d
--- /dev/null
+++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.12
@@ -0,0 +1 @@
+pillow==10.3.0
diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.6 b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.6
new file mode 100755
index 000000000..118ca689e
--- /dev/null
+++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.6
@@ -0,0 +1 @@
+Pillow==7.0.0
diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.7 b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.7
new file mode 100755
index 000000000..91d1b3192
--- /dev/null
+++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.7
@@ -0,0 +1 @@
+Pillow==8.0.0
diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.8 b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.8
new file mode 100755
index 000000000..8da721c23
--- /dev/null
+++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.8
@@ -0,0 +1 @@
+Pillow==9.0.0
diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.9 b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.9
new file mode 100755
index 000000000..8da721c23
--- /dev/null
+++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.9
@@ -0,0 +1 @@
+Pillow==9.0.0
diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.arm.3.8 b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.arm.3.8
new file mode 100644
index 000000000..68ac1eb37
--- /dev/null
+++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.arm.3.8
@@ -0,0 +1 @@
+Pillow==10.0.0
diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.arm.3.9 b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.arm.3.9
new file mode 100644
index 000000000..68ac1eb37
--- /dev/null
+++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.arm.3.9
@@ -0,0 +1 @@
+Pillow==10.0.0
diff --git a/benchmarks/300.utilities/311.compression/config.json b/benchmarks/300.utilities/311.compression/config.json
index 8edb99e52..676180d49 100644
--- a/benchmarks/300.utilities/311.compression/config.json
+++ b/benchmarks/300.utilities/311.compression/config.json
@@ -1,6 +1,12 @@
 {
   "timeout": 60,
   "memory": 256,
-  "languages": ["python", "nodejs"],
-  "modules": ["storage"]
-}
+  "languages": [
+    "python",
+    "nodejs",
+    "pypy"
+  ],
+  "modules": [
+    "storage"
+  ]
+}
\ No newline at end of file
diff --git a/benchmarks/300.utilities/311.compression/pypy/README.md b/benchmarks/300.utilities/311.compression/pypy/README.md
new file mode 100755
index 000000000..fc6a75265
--- /dev/null
+++ b/benchmarks/300.utilities/311.compression/pypy/README.md
@@ -0,0 +1,12 @@
+# Image Processing
+
+A simple pipeline performing basic image operations with Pillow.
+
+[Inspired by AWS Lambda tutorial code.](https://docs.aws.amazon.com/lambda/latest/dg/with-s3-example-deployment-pkg.htm)
+
+### Instructions
+
+1. Deploy Docker container with function code and input data.
+
+2. Example of JSON payload: `{ "dir": "input_data", "id": "1" }'`.
+
diff --git a/benchmarks/300.utilities/311.compression/pypy/function.py b/benchmarks/300.utilities/311.compression/pypy/function.py
new file mode 100755
index 000000000..f758e14e4
--- /dev/null
+++ b/benchmarks/300.utilities/311.compression/pypy/function.py
@@ -0,0 +1,59 @@
+import datetime
+import io
+import os
+import shutil
+import uuid
+import zlib
+
+from . import storage
+client = storage.storage.get_instance()
+
+def parse_directory(directory):
+
+    size = 0
+    for root, dirs, files in os.walk(directory):
+        for file in files:
+            size += os.path.getsize(os.path.join(root, file))
+    return size
+
+def handler(event):
+  
+    bucket = event.get('bucket').get('bucket')
+    input_prefix = event.get('bucket').get('input')
+    output_prefix = event.get('bucket').get('output')
+    key = event.get('object').get('key')
+    download_path = '/tmp/{}-{}'.format(key, uuid.uuid4())
+    os.makedirs(download_path)
+
+    s3_download_begin = datetime.datetime.now()
+    client.download_directory(bucket, os.path.join(input_prefix, key), download_path)
+    s3_download_stop = datetime.datetime.now()
+    size = parse_directory(download_path)
+
+    compress_begin = datetime.datetime.now()
+    shutil.make_archive(os.path.join(download_path, key), 'zip', root_dir=download_path)
+    compress_end = datetime.datetime.now()
+
+    s3_upload_begin = datetime.datetime.now()
+    archive_name = '{}.zip'.format(key)
+    archive_size = os.path.getsize(os.path.join(download_path, archive_name))
+    key_name = client.upload(bucket, os.path.join(output_prefix, archive_name), os.path.join(download_path, archive_name))
+    s3_upload_stop = datetime.datetime.now()
+
+    download_time = (s3_download_stop - s3_download_begin) / datetime.timedelta(microseconds=1)
+    upload_time = (s3_upload_stop - s3_upload_begin) / datetime.timedelta(microseconds=1)
+    process_time = (compress_end - compress_begin) / datetime.timedelta(microseconds=1)
+    return {
+            'result': {
+                'bucket': bucket,
+                'key': key_name
+            },
+            'measurement': {
+                'download_time': download_time,
+                'download_size': size,
+                'upload_time': upload_time,
+                'upload_size': archive_size,
+                'compute_time': process_time
+            }
+        }
+
diff --git a/benchmarks/300.utilities/311.compression/pypy/requirements.txt b/benchmarks/300.utilities/311.compression/pypy/requirements.txt
new file mode 100644
index 000000000..e69de29bb
diff --git a/benchmarks/500.scientific/501.graph-pagerank/config.json b/benchmarks/500.scientific/501.graph-pagerank/config.json
index e80fb4351..689694d92 100644
--- a/benchmarks/500.scientific/501.graph-pagerank/config.json
+++ b/benchmarks/500.scientific/501.graph-pagerank/config.json
@@ -1,6 +1,9 @@
 {
   "timeout": 120,
   "memory": 512,
-  "languages": ["python"],
+  "languages": [
+    "python",
+    "pypy"
+  ],
   "modules": []
-}
+}
\ No newline at end of file
diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/function.py b/benchmarks/500.scientific/501.graph-pagerank/pypy/function.py
new file mode 100755
index 000000000..0e462e9b4
--- /dev/null
+++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/function.py
@@ -0,0 +1,29 @@
+import datetime
+import igraph
+
+def handler(event):
+
+    size = event.get('size')
+    if "seed" in event:
+        import random
+
+        random.seed(event["seed"])
+
+    graph_generating_begin = datetime.datetime.now()
+    graph = igraph.Graph.Barabasi(size, 10)
+    graph_generating_end = datetime.datetime.now()
+
+    process_begin = datetime.datetime.now()
+    result = graph.pagerank()
+    process_end = datetime.datetime.now()
+
+    graph_generating_time = (graph_generating_end - graph_generating_begin) / datetime.timedelta(microseconds=1)
+    process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1)
+
+    return {
+            'result': result[0],
+            'measurement': {
+                'graph_generating_time': graph_generating_time,
+                'compute_time': process_time
+            }
+    }
diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt
new file mode 100755
index 000000000..e69de29bb
diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.10 b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.10
new file mode 100644
index 000000000..e291b7b39
--- /dev/null
+++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.10
@@ -0,0 +1 @@
+igraph==0.11.4
diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.11 b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.11
new file mode 100644
index 000000000..e291b7b39
--- /dev/null
+++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.11
@@ -0,0 +1 @@
+igraph==0.11.4
diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.12 b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.12
new file mode 100644
index 000000000..e291b7b39
--- /dev/null
+++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.12
@@ -0,0 +1 @@
+igraph==0.11.4
diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.7 b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.7
new file mode 100755
index 000000000..4e4d562fd
--- /dev/null
+++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.7
@@ -0,0 +1 @@
+python-igraph==0.8.0
diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.8 b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.8
new file mode 100755
index 000000000..4e4d562fd
--- /dev/null
+++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.8
@@ -0,0 +1 @@
+python-igraph==0.8.0
diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.9 b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.9
new file mode 100755
index 000000000..0918761fe
--- /dev/null
+++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.9
@@ -0,0 +1 @@
+python-igraph==0.9.0
diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.arm.3.8 b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.arm.3.8
new file mode 100644
index 000000000..398b70edc
--- /dev/null
+++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.arm.3.8
@@ -0,0 +1 @@
+python-igraph==0.11.4
diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.arm.3.9 b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.arm.3.9
new file mode 100644
index 000000000..398b70edc
--- /dev/null
+++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.arm.3.9
@@ -0,0 +1 @@
+python-igraph==0.11.4

From 504747947b8a7b3732fe8776fb7ffe66683dc7ee Mon Sep 17 00:00:00 2001
From: Alexander Schlieper <alexander@schlieper.ch>
Date: Mon, 15 Dec 2025 11:44:51 +0100
Subject: [PATCH 22/31] added script for running benchmarks on multiple
 languages

---
 .gitignore                            |   3 +-
 dockerfiles/aws/pypy/Dockerfile.build |  22 +
 scripts/cross_platform_benchmark.py   | 495 +++++++++++++++++++++
 scripts/plot_comparison.py            | 611 ++++++++++++++++++++++++++
 scripts/run_comparison.sh             | 218 +++++++++
 scripts/run_experiments.py            |   2 +-
 sebs/aws/aws.py                       |  14 +-
 tools/build_docker_images.py          |   2 -
 8 files changed, 1359 insertions(+), 8 deletions(-)
 create mode 100644 dockerfiles/aws/pypy/Dockerfile.build
 create mode 100644 scripts/cross_platform_benchmark.py
 create mode 100644 scripts/plot_comparison.py
 create mode 100755 scripts/run_comparison.sh

diff --git a/.gitignore b/.gitignore
index 274165ed8..ae5108f68 100644
--- a/.gitignore
+++ b/.gitignore
@@ -189,4 +189,5 @@ cache
 *.iml
 
 # MacOS Finder
-**/.DS_Store
\ No newline at end of file
+**/.DS_Store
+results/*
\ No newline at end of file
diff --git a/dockerfiles/aws/pypy/Dockerfile.build b/dockerfiles/aws/pypy/Dockerfile.build
new file mode 100644
index 000000000..28718cd4b
--- /dev/null
+++ b/dockerfiles/aws/pypy/Dockerfile.build
@@ -0,0 +1,22 @@
+ARG BASE_IMAGE
+FROM ${BASE_IMAGE}
+ARG VERSION
+ENV PYTHON_VERSION=${VERSION}
+
+# useradd, groupmod
+RUN yum install -y shadow-utils zip
+ENV GOSU_VERSION 1.14
+# https://github.com/tianon/gosu/releases/tag/1.14
+# key https://keys.openpgp.org/search?q=tianon%40debian.org
+RUN curl -o /usr/local/bin/gosu -SL "https://github.com/tianon/gosu/releases/download/${GOSU_VERSION}/gosu-amd64" \
+    && chmod +x /usr/local/bin/gosu
+RUN mkdir -p /sebs/
+COPY dockerfiles/python_installer.sh /sebs/installer.sh
+COPY dockerfiles/entrypoint.sh /sebs/entrypoint.sh
+RUN chmod +x /sebs/entrypoint.sh
+
+# useradd and groupmod is installed in /usr/sbin which is not in PATH
+ENV PATH=/usr/sbin:$PATH
+ENV SCRIPT_FILE=/mnt/function/package.sh
+CMD /bin/bash /sebs/installer.sh
+ENTRYPOINT ["/sebs/entrypoint.sh"]
\ No newline at end of file
diff --git a/scripts/cross_platform_benchmark.py b/scripts/cross_platform_benchmark.py
new file mode 100644
index 000000000..0d0a5909b
--- /dev/null
+++ b/scripts/cross_platform_benchmark.py
@@ -0,0 +1,495 @@
+#!/usr/bin/env python3
+
+"""
+Cross-platform benchmark comparison tool for SeBS.
+Runs benchmarks across multiple languages and cloud platforms,
+aggregates results, and provides comparison analysis.
+"""
+
+import argparse
+import json
+import logging
+import os
+import sys
+import time
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+import subprocess
+import traceback
+
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+PROJECT_ROOT = os.path.abspath(os.path.join(SCRIPT_DIR, os.pardir))
+sys.path.insert(0, PROJECT_ROOT)
+
+# Language-version mappings for different platforms
+LANGUAGE_CONFIGS = {
+    'aws': {
+        'python': ['3.11', '3.10', '3.9', '3.8'],
+        'nodejs': ['16'],
+        'rust': ['1.80', '1.81', '1.82'],
+        'java': ['17'],
+        'pypy': ['3.11']
+    },
+    'azure': {
+        'python': ['3.11', '3.10', '3.9', '3.8'],
+        'nodejs': ['20', '18', '16'],
+        'java': ['17'],
+        'pypy': ['3.11']
+    },
+    'gcp': {
+        'python': ['3.12', '3.11', '3.10', '3.9', '3.8'],
+        'nodejs': ['20', '18']
+    },
+    'local': {
+        'python': ['3.11', '3.10', '3.9'],
+        'nodejs': ['20', '18', '16'],
+        'pypy': ['3.11']
+    }
+}
+
+class BenchmarkRunner:
+    """Orchestrates benchmark execution across platforms and languages."""
+    
+    def __init__(self, output_dir: str, cache_dir: str = 'cache', verbose: bool = False):
+        self.output_dir = Path(output_dir)
+        self.cache_dir = cache_dir
+        self.verbose = verbose
+        self.results = {
+            'metadata': {
+                'start_time': datetime.now().isoformat(),
+                'end_time': None,
+                'version': '1.0.0'
+            },
+            'benchmarks': {}
+        }
+        
+        # Create output directory
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        
+        # Setup logging
+        log_file = self.output_dir / 'benchmark_run.log'
+        logging.basicConfig(
+            level=logging.DEBUG if verbose else logging.INFO,
+            format='%(asctime)s - %(levelname)s - %(message)s',
+            handlers=[
+                logging.FileHandler(log_file),
+                logging.StreamHandler()
+            ]
+        )
+        self.logger = logging.getLogger(__name__)
+    
+    def run_single_benchmark(
+        self,
+        benchmark: str,
+        platform: str,
+        language: str,
+        version: str,
+        config_file: str,
+        input_size: str = 'test',
+        repetitions: int = 5,
+        memory: int = 256,
+        architecture: str = 'x64'
+    ) -> Tuple[bool, Optional[str], Optional[Dict]]:
+        """
+        Run a single benchmark configuration.
+        
+        Returns:
+            (success, output_file, error_message)
+        """
+        run_id = f"{benchmark}_{platform}_{language}_{version}_{memory}MB"
+        self.logger.info(f"Starting: {run_id}")
+        
+        # Create experiment output directory
+        experiment_dir = self.output_dir / run_id
+        experiment_dir.mkdir(parents=True, exist_ok=True)
+        
+        # Update config for this run
+        try:
+            with open(config_file, 'r') as f:
+                config = json.load(f)
+            
+            # Update configuration
+            config['experiments']['runtime'] = {
+                'language': language,
+                'version': version
+            }
+            config['experiments']['repetitions'] = repetitions
+            config['experiments']['memory'] = memory
+            config['experiments']['architecture'] = architecture
+            config['deployment']['name'] = platform
+            
+            # Write updated config
+            run_config_file = experiment_dir / 'config.json'
+            with open(run_config_file, 'w') as f:
+                json.dump(config, f, indent=2)
+            
+            # Construct sebs.py command
+            cmd = [
+                sys.executable,
+                os.path.join(PROJECT_ROOT, 'sebs.py'),
+                'benchmark',
+                'invoke',
+                benchmark,
+                input_size,
+                '--config', str(run_config_file),
+                '--deployment', platform,
+                '--language', language,
+                '--language-version', version,
+                '--memory', str(memory),
+                '--architecture', architecture,
+                '--output-dir', str(experiment_dir),
+                '--cache', self.cache_dir
+            ]
+            
+            if self.verbose:
+                cmd.append('--verbose')
+            
+            self.logger.debug(f"Command: {' '.join(cmd)}")
+            
+            # Execute benchmark (run from experiment directory so experiments.json is saved there)
+            start_time = time.time()
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=600,  # 10 minute timeout
+                cwd=str(experiment_dir)  # Run from experiment directory
+            )
+            execution_time = time.time() - start_time
+            
+            # Save stdout/stderr
+            with open(experiment_dir / 'stdout.log', 'w') as f:
+                f.write(result.stdout)
+            with open(experiment_dir / 'stderr.log', 'w') as f:
+                f.write(result.stderr)
+            
+            if result.returncode == 0:
+                self.logger.info(f"✓ Completed: {run_id} ({execution_time:.2f}s)")
+                
+                # Look for experiments.json in the output
+                exp_json = experiment_dir / 'experiments.json'
+                if not exp_json.exists():
+                    # Try to find it in subdirectories
+                    exp_files = list(experiment_dir.glob('**/experiments.json'))
+                    if exp_files:
+                        exp_json = exp_files[0]
+                
+                return True, str(experiment_dir), None
+            else:
+                error_msg = f"Failed with return code {result.returncode}"
+                self.logger.error(f"✗ Failed: {run_id} - {error_msg}")
+                self.logger.debug(f"Stderr: {result.stderr[:500]}")
+                return False, str(experiment_dir), error_msg
+                
+        except subprocess.TimeoutExpired:
+            error_msg = "Benchmark execution timed out"
+            self.logger.error(f"✗ Timeout: {run_id}")
+            return False, str(experiment_dir), error_msg
+        except Exception as e:
+            error_msg = f"Exception: {str(e)}"
+            self.logger.error(f"✗ Error: {run_id} - {error_msg}")
+            self.logger.debug(traceback.format_exc())
+            return False, str(experiment_dir), error_msg
+    
+    def run_comparison(
+        self,
+        benchmarks: List[str],
+        platforms: List[str],
+        languages: List[str],
+        config_file: str,
+        input_size: str = 'test',
+        repetitions: int = 5,
+        memory_sizes: List[int] = [256],
+        architecture: str = 'x64',
+        versions: Optional[Dict[str, List[str]]] = None
+    ):
+        """
+        Run benchmarks across multiple configurations.
+        
+        Args:
+            benchmarks: List of benchmark names (e.g., ['010.sleep', '110.dynamic-html'])
+            platforms: List of platforms (e.g., ['aws', 'azure'])
+            languages: List of languages (e.g., ['python', 'nodejs'])
+            config_file: Path to base configuration file
+            input_size: Benchmark input size
+            repetitions: Number of repetitions per benchmark
+            memory_sizes: List of memory configurations to test
+            architecture: Target architecture (x64 or arm64)
+            versions: Optional dict mapping language to specific versions
+        """
+        total_runs = 0
+        successful_runs = 0
+        failed_runs = 0
+        
+        for benchmark in benchmarks:
+            self.results['benchmarks'][benchmark] = {}
+            
+            for platform in platforms:
+                self.results['benchmarks'][benchmark][platform] = {}
+                
+                for language in languages:
+                    # Check if language is supported on this platform
+                    if language not in LANGUAGE_CONFIGS.get(platform, {}):
+                        self.logger.warning(f"Skipping {language} on {platform} (not supported)")
+                        continue
+                    
+                    # Get versions to test
+                    if versions and language in versions:
+                        lang_versions = versions[language]
+                    else:
+                        # Use first available version by default
+                        lang_versions = [LANGUAGE_CONFIGS[platform][language][0]]
+                    
+                    self.results['benchmarks'][benchmark][platform][language] = {}
+                    
+                    for version in lang_versions:
+                        # Verify version is supported
+                        if version not in LANGUAGE_CONFIGS[platform][language]:
+                            self.logger.warning(
+                                f"Skipping {language} {version} on {platform} (version not supported)"
+                            )
+                            continue
+                        
+                        self.results['benchmarks'][benchmark][platform][language][version] = {}
+                        
+                        for memory in memory_sizes:
+                            total_runs += 1
+                            
+                            success, output_dir, error = self.run_single_benchmark(
+                                benchmark=benchmark,
+                                platform=platform,
+                                language=language,
+                                version=version,
+                                config_file=config_file,
+                                input_size=input_size,
+                                repetitions=repetitions,
+                                memory=memory,
+                                architecture=architecture
+                            )
+                            
+                            result_entry = {
+                                'success': success,
+                                'output_directory': output_dir,
+                                'memory_mb': memory,
+                                'architecture': architecture,
+                                'repetitions': repetitions,
+                                'input_size': input_size
+                            }
+                            
+                            if success:
+                                successful_runs += 1
+                                # Try to extract metrics and full experiment data
+                                try:
+                                    extracted = self._extract_metrics(output_dir)
+                                    
+                                    # Store full experiments.json data if available
+                                    if 'full_experiment_data' in extracted:
+                                        result_entry['experiment_data'] = extracted['full_experiment_data']
+                                        # Also store summary metrics
+                                        result_entry['metrics'] = {
+                                            k: v for k, v in extracted.items() 
+                                            if k != 'full_experiment_data'
+                                        }
+                                    else:
+                                        result_entry['metrics'] = extracted
+                                except Exception as e:
+                                    self.logger.warning(f"Could not extract metrics: {e}")
+                            else:
+                                failed_runs += 1
+                                result_entry['error'] = error
+                            
+                            self.results['benchmarks'][benchmark][platform][language][version][f'{memory}MB'] = result_entry
+        
+        # Update end time and summary
+        self.results['metadata']['end_time'] = datetime.now().isoformat()
+        self.results['metadata']['summary'] = {
+            'total_runs': total_runs,
+            'successful': successful_runs,
+            'failed': failed_runs,
+            'success_rate': f"{(successful_runs/total_runs*100):.1f}%" if total_runs > 0 else "N/A"
+        }
+        
+        # Save results
+        output_file = self.output_dir / 'comparison_results.json'
+        with open(output_file, 'w') as f:
+            json.dump(self.results, f, indent=2)
+        
+        self.logger.info(f"\n{'='*60}")
+        self.logger.info(f"Benchmark Comparison Complete!")
+        self.logger.info(f"{'='*60}")
+        self.logger.info(f"Total runs: {total_runs}")
+        self.logger.info(f"Successful: {successful_runs}")
+        self.logger.info(f"Failed: {failed_runs}")
+        self.logger.info(f"Results saved to: {output_file}")
+        
+        return self.results
+    
+    def _extract_metrics(self, output_dir: str) -> Dict:
+        """Extract key metrics from experiment output and preserve full experiments.json data."""
+        metrics = {}
+        
+        # Look for experiments.json
+        exp_json_paths = [
+            Path(output_dir) / 'experiments.json',
+            *Path(output_dir).glob('**/experiments.json')
+        ]
+        
+        for exp_json in exp_json_paths:
+            if exp_json.exists():
+                with open(exp_json, 'r') as f:
+                    data = json.load(f)
+                
+                # Store the full experiments.json data
+                metrics['full_experiment_data'] = data
+                
+                # Extract timing information from invocations for summary
+                if '_invocations' in data:
+                    invocations = data['_invocations']
+                    
+                    for func_name, func_data in invocations.items():
+                        execution_times = []
+                        cold_starts = 0
+                        warm_starts = 0
+                        
+                        for inv_id, inv_data in func_data.items():
+                            if 'times' in inv_data:
+                                if 'client' in inv_data['times']:
+                                    # Client time is in microseconds, convert to ms
+                                    execution_times.append(inv_data['times']['client'] / 1000)
+                            
+                            if 'stats' in inv_data:
+                                if inv_data['stats'].get('cold_start'):
+                                    cold_starts += 1
+                                else:
+                                    warm_starts += 1
+                        
+                        if execution_times:
+                            metrics['execution_times_ms'] = execution_times
+                            metrics['avg_execution_time_ms'] = sum(execution_times) / len(execution_times)
+                            metrics['min_execution_time_ms'] = min(execution_times)
+                            metrics['max_execution_time_ms'] = max(execution_times)
+                            metrics['cold_starts'] = cold_starts
+                            metrics['warm_starts'] = warm_starts
+                
+                break
+        
+        return metrics
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Run cross-platform benchmark comparisons',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Compare Python and Node.js on AWS and Azure
+  %(prog)s --benchmarks 010.sleep 110.dynamic-html \\
+           --platforms aws azure \\
+           --languages python nodejs \\
+           --config config/example.json \\
+           --output results/comparison_$(date +%%Y%%m%%d)
+
+  # Compare specific Python versions on AWS
+  %(prog)s --benchmarks 501.graph-pagerank \\
+           --platforms aws \\
+           --languages python \\
+           --python-versions 3.11 3.10 3.9 \\
+           --memory 512 1024 \\
+           --config config/example.json
+        """
+    )
+    
+    parser.add_argument('--benchmarks', nargs='+', required=True,
+                        help='Benchmark names to run (e.g., 010.sleep 110.dynamic-html)')
+    parser.add_argument('--platforms', nargs='+', required=True,
+                        choices=['aws', 'azure', 'gcp', 'local'],
+                        help='Platforms to test on')
+    parser.add_argument('--languages', nargs='+', required=True,
+                        help='Languages to test (e.g., python nodejs rust java)')
+    parser.add_argument('--config', required=True,
+                        help='Base configuration file')
+    parser.add_argument('--output', required=True,
+                        help='Output directory for results')
+    
+    # Optional parameters
+    parser.add_argument('--input-size', default='test',
+                        choices=['test', 'small', 'large'],
+                        help='Benchmark input size (default: test)')
+    parser.add_argument('--repetitions', type=int, default=5,
+                        help='Number of repetitions per benchmark (default: 5)')
+    parser.add_argument('--memory', nargs='+', type=int, default=[256],
+                        help='Memory sizes in MB to test (default: 256)')
+    parser.add_argument('--architecture', default='x64',
+                        choices=['x64', 'arm64'],
+                        help='Target architecture (default: x64)')
+    parser.add_argument('--cache', default='cache',
+                        help='Cache directory (default: cache)')
+    
+    # Language-specific version overrides
+    parser.add_argument('--python-versions', nargs='+',
+                        help='Specific Python versions to test')
+    parser.add_argument('--nodejs-versions', nargs='+',
+                        help='Specific Node.js versions to test')
+    parser.add_argument('--rust-versions', nargs='+',
+                        help='Specific Rust versions to test')
+    parser.add_argument('--java-versions', nargs='+',
+                        help='Specific Java versions to test')
+    
+    parser.add_argument('--verbose', action='store_true',
+                        help='Enable verbose output')
+    
+    args = parser.parse_args()
+    
+    # Build version overrides
+    versions = {}
+    if args.python_versions:
+        versions['python'] = args.python_versions
+    if args.nodejs_versions:
+        versions['nodejs'] = args.nodejs_versions
+    if args.rust_versions:
+        versions['rust'] = args.rust_versions
+    if args.java_versions:
+        versions['java'] = args.java_versions
+    
+    # Create runner
+    runner = BenchmarkRunner(
+        output_dir=args.output,
+        cache_dir=args.cache,
+        verbose=args.verbose
+    )
+    
+    # Run comparison
+    try:
+        results = runner.run_comparison(
+            benchmarks=args.benchmarks,
+            platforms=args.platforms,
+            languages=args.languages,
+            config_file=args.config,
+            input_size=args.input_size,
+            repetitions=args.repetitions,
+            memory_sizes=args.memory,
+            architecture=args.architecture,
+            versions=versions if versions else None
+        )
+        
+        print("\n" + "="*60)
+        print("✓ Benchmark comparison completed successfully!")
+        print("="*60)
+        print(f"Results: {args.output}/comparison_results.json")
+        print(f"Logs: {args.output}/benchmark_run.log")
+        
+        return 0
+        
+    except KeyboardInterrupt:
+        print("\n\nBenchmark interrupted by user")
+        return 130
+    except Exception as e:
+        print(f"\n\nError during benchmark execution: {e}")
+        traceback.print_exc()
+        return 1
+
+
+if __name__ == '__main__':
+    sys.exit(main())
+
diff --git a/scripts/plot_comparison.py b/scripts/plot_comparison.py
new file mode 100644
index 000000000..738164804
--- /dev/null
+++ b/scripts/plot_comparison.py
@@ -0,0 +1,611 @@
+#!/usr/bin/env python3
+
+"""
+Visualization tool for cross-platform benchmark comparisons.
+Creates publication-quality plots comparing performance across
+languages, platforms, and configurations.
+"""
+
+import argparse
+import json
+import logging
+import sys
+from pathlib import Path
+from typing import Dict, List, Optional
+
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+import numpy as np
+import pandas as pd
+import seaborn as sns
+
+# Set style
+sns.set_style("whitegrid")
+sns.set_context("paper", font_scale=1.2)
+
+# Color palettes for different entities
+PLATFORM_COLORS = {
+    'aws': '#FF9900',      # AWS Orange
+    'azure': '#0089D6',    # Azure Blue
+    'gcp': '#4285F4',      # Google Blue
+    'local': '#808080'     # Gray
+}
+
+LANGUAGE_COLORS = {
+    'python': '#3776AB',   # Python Blue
+    'nodejs': '#339933',   # Node.js Green
+    'rust': '#000000',     # Rust Black
+    'java': '#007396',     # Java Blue
+    'pypy': '#193440',     # PyPy Dark
+    'cpp': '#00599C'       # C++ Blue
+}
+
+
+class BenchmarkVisualizer:
+    """Creates visualizations from benchmark comparison results."""
+    
+    def __init__(self, results_file: str, output_dir: Optional[str] = None):
+        self.results_file = Path(results_file)
+        self.output_dir = Path(output_dir) if output_dir else self.results_file.parent / 'plots'
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        
+        # Load results
+        with open(self.results_file, 'r') as f:
+            self.results = json.load(f)
+        
+        # Setup logging
+        logging.basicConfig(
+            level=logging.INFO,
+            format='%(levelname)s: %(message)s'
+        )
+        self.logger = logging.getLogger(__name__)
+        
+        self.logger.info(f"Loaded results from {self.results_file}")
+        self.logger.info(f"Plots will be saved to {self.output_dir}")
+    
+    def extract_dataframe(self) -> pd.DataFrame:
+        """
+        Extract benchmark results into a pandas DataFrame.
+        
+        Returns a DataFrame with columns:
+        - benchmark: benchmark name
+        - platform: cloud platform
+        - language: programming language
+        - version: language version
+        - memory_mb: memory configuration
+        - avg_time_ms: average execution time
+        - min_time_ms: minimum execution time
+        - max_time_ms: maximum execution time
+        - cold_starts: number of cold starts
+        - warm_starts: number of warm starts
+        - success: whether the run succeeded
+        """
+        rows = []
+        
+        for benchmark, bench_data in self.results['benchmarks'].items():
+            for platform, platform_data in bench_data.items():
+                for language, lang_data in platform_data.items():
+                    for version, version_data in lang_data.items():
+                        for memory_config, result in version_data.items():
+                            row = {
+                                'benchmark': benchmark,
+                                'platform': platform,
+                                'language': language,
+                                'version': version,
+                                'memory_mb': result.get('memory_mb', 0),
+                                'success': result.get('success', False)
+                            }
+                            
+                            # Extract metrics if available
+                            if 'metrics' in result:
+                                metrics = result['metrics']
+                                row['avg_time_ms'] = metrics.get('avg_execution_time_ms')
+                                row['min_time_ms'] = metrics.get('min_execution_time_ms')
+                                row['max_time_ms'] = metrics.get('max_execution_time_ms')
+                                row['cold_starts'] = metrics.get('cold_starts', 0)
+                                row['warm_starts'] = metrics.get('warm_starts', 0)
+                                
+                                # Store all execution times for detailed analysis
+                                if 'execution_times_ms' in metrics:
+                                    row['execution_times'] = metrics['execution_times_ms']
+                            
+                            rows.append(row)
+        
+        df = pd.DataFrame(rows)
+        self.logger.info(f"Extracted {len(df)} benchmark results")
+        return df
+    
+    def plot_language_comparison(self, df: pd.DataFrame, benchmark: Optional[str] = None):
+        """
+        Create bar chart comparing languages across platforms.
+        
+        Args:
+            df: DataFrame with benchmark results
+            benchmark: Optional benchmark name to filter by
+        """
+        if benchmark:
+            df = df[df['benchmark'] == benchmark]
+            title_suffix = f" - {benchmark}"
+        else:
+            title_suffix = " - All Benchmarks"
+        
+        # Filter successful runs only
+        df = df[df['success'] == True].copy()
+        
+        if df.empty:
+            self.logger.warning(f"No successful runs for language comparison{title_suffix}")
+            return
+        
+        # Create grouped bar chart
+        fig, ax = plt.subplots(figsize=(12, 6))
+        
+        # Group by platform and language
+        grouped = df.groupby(['platform', 'language'])['avg_time_ms'].mean().reset_index()
+        
+        # Pivot for plotting
+        pivot = grouped.pivot(index='language', columns='platform', values='avg_time_ms')
+        
+        # Create bar chart
+        pivot.plot(kind='bar', ax=ax, color=[PLATFORM_COLORS.get(p, '#888888') for p in pivot.columns])
+        
+        ax.set_xlabel('Language')
+        ax.set_ylabel('Average Execution Time (ms)')
+        ax.set_title(f'Language Performance Comparison{title_suffix}')
+        ax.legend(title='Platform')
+        ax.grid(axis='y', alpha=0.3)
+        
+        plt.xticks(rotation=45)
+        plt.tight_layout()
+        
+        filename = f"language_comparison{'_' + benchmark if benchmark else ''}.png"
+        filepath = self.output_dir / filename
+        plt.savefig(filepath, dpi=300, bbox_inches='tight')
+        self.logger.info(f"Saved: {filepath}")
+        plt.close()
+    
+    def plot_platform_comparison(self, df: pd.DataFrame, language: Optional[str] = None):
+        """
+        Create bar chart comparing platforms for a specific language.
+        
+        Args:
+            df: DataFrame with benchmark results
+            language: Optional language to filter by
+        """
+        if language:
+            df = df[df['language'] == language]
+            title_suffix = f" - {language.title()}"
+        else:
+            title_suffix = ""
+        
+        # Filter successful runs only
+        df = df[df['success'] == True].copy()
+        
+        if df.empty:
+            self.logger.warning(f"No successful runs for platform comparison{title_suffix}")
+            return
+        
+        # Create grouped bar chart
+        fig, ax = plt.subplots(figsize=(12, 6))
+        
+        # Group by platform and benchmark
+        grouped = df.groupby(['benchmark', 'platform'])['avg_time_ms'].mean().reset_index()
+        
+        # Pivot for plotting
+        pivot = grouped.pivot(index='benchmark', columns='platform', values='avg_time_ms')
+        
+        # Create bar chart
+        pivot.plot(kind='bar', ax=ax, color=[PLATFORM_COLORS.get(p, '#888888') for p in pivot.columns])
+        
+        ax.set_xlabel('Benchmark')
+        ax.set_ylabel('Average Execution Time (ms)')
+        ax.set_title(f'Platform Performance Comparison{title_suffix}')
+        ax.legend(title='Platform')
+        ax.grid(axis='y', alpha=0.3)
+        
+        plt.xticks(rotation=45, ha='right')
+        plt.tight_layout()
+        
+        filename = f"platform_comparison{'_' + language if language else ''}.png"
+        filepath = self.output_dir / filename
+        plt.savefig(filepath, dpi=300, bbox_inches='tight')
+        self.logger.info(f"Saved: {filepath}")
+        plt.close()
+    
+    def plot_memory_scaling(self, df: pd.DataFrame, benchmark: Optional[str] = None):
+        """
+        Create line plot showing how performance scales with memory.
+        
+        Args:
+            df: DataFrame with benchmark results
+            benchmark: Optional benchmark to filter by
+        """
+        if benchmark:
+            df = df[df['benchmark'] == benchmark]
+            title_suffix = f" - {benchmark}"
+        else:
+            title_suffix = ""
+        
+        # Filter successful runs only
+        df = df[df['success'] == True].copy()
+        
+        if df.empty or df['memory_mb'].nunique() < 2:
+            self.logger.warning(f"Insufficient data for memory scaling plot{title_suffix}")
+            return
+        
+        fig, ax = plt.subplots(figsize=(12, 6))
+        
+        # Plot for each language-platform combination
+        for (language, platform), group in df.groupby(['language', 'platform']):
+            group_sorted = group.sort_values('memory_mb')
+            label = f"{language} ({platform})"
+            color = LANGUAGE_COLORS.get(language, '#888888')
+            linestyle = '-' if platform == 'aws' else '--' if platform == 'azure' else '-.'
+            
+            ax.plot(
+                group_sorted['memory_mb'],
+                group_sorted['avg_time_ms'],
+                marker='o',
+                label=label,
+                color=color,
+                linestyle=linestyle,
+                linewidth=2
+            )
+        
+        ax.set_xlabel('Memory (MB)')
+        ax.set_ylabel('Average Execution Time (ms)')
+        ax.set_title(f'Performance vs Memory{title_suffix}')
+        ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
+        ax.grid(alpha=0.3)
+        
+        plt.tight_layout()
+        
+        filename = f"memory_scaling{'_' + benchmark if benchmark else ''}.png"
+        filepath = self.output_dir / filename
+        plt.savefig(filepath, dpi=300, bbox_inches='tight')
+        self.logger.info(f"Saved: {filepath}")
+        plt.close()
+    
+    def plot_cold_vs_warm(self, df: pd.DataFrame):
+        """
+        Create stacked bar chart showing cold vs warm start distribution.
+        
+        Args:
+            df: DataFrame with benchmark results
+        """
+        # Filter successful runs only
+        df = df[df['success'] == True].copy()
+        
+        if df.empty or 'cold_starts' not in df.columns:
+            self.logger.warning("No cold start data available")
+            return
+        
+        # Calculate totals
+        df['total_invocations'] = df['cold_starts'] + df['warm_starts']
+        
+        # Filter out rows with no invocations
+        df = df[df['total_invocations'] > 0]
+        
+        if df.empty:
+            self.logger.warning("No invocation data for cold vs warm plot")
+            return
+        
+        fig, ax = plt.subplots(figsize=(14, 6))
+        
+        # Group by language and platform
+        grouped = df.groupby(['language', 'platform']).agg({
+            'cold_starts': 'sum',
+            'warm_starts': 'sum'
+        }).reset_index()
+        
+        # Create labels
+        grouped['label'] = grouped['language'] + '\n(' + grouped['platform'] + ')'
+        
+        # Create stacked bar chart
+        x = np.arange(len(grouped))
+        width = 0.6
+        
+        p1 = ax.bar(x, grouped['cold_starts'], width, label='Cold Starts', color='#d62728')
+        p2 = ax.bar(x, grouped['warm_starts'], width, bottom=grouped['cold_starts'],
+                    label='Warm Starts', color='#2ca02c')
+        
+        ax.set_xlabel('Language (Platform)')
+        ax.set_ylabel('Number of Invocations')
+        ax.set_title('Cold vs Warm Start Distribution')
+        ax.set_xticks(x)
+        ax.set_xticklabels(grouped['label'], rotation=45, ha='right')
+        ax.legend()
+        ax.grid(axis='y', alpha=0.3)
+        
+        plt.tight_layout()
+        
+        filepath = self.output_dir / "cold_vs_warm_starts.png"
+        plt.savefig(filepath, dpi=300, bbox_inches='tight')
+        self.logger.info(f"Saved: {filepath}")
+        plt.close()
+    
+    def plot_heatmap(self, df: pd.DataFrame, metric: str = 'avg_time_ms'):
+        """
+        Create heatmap showing performance across platforms and languages.
+        
+        Args:
+            df: DataFrame with benchmark results
+            metric: Metric to visualize
+        """
+        # Filter successful runs only
+        df = df[df['success'] == True].copy()
+        
+        if df.empty or metric not in df.columns:
+            self.logger.warning(f"No data available for heatmap with metric: {metric}")
+            return
+        
+        # Aggregate by platform and language
+        pivot = df.groupby(['platform', 'language'])[metric].mean().reset_index()
+        pivot_table = pivot.pivot(index='platform', columns='language', values=metric)
+        
+        if pivot_table.empty:
+            self.logger.warning("No data for heatmap")
+            return
+        
+        fig, ax = plt.subplots(figsize=(10, 6))
+        
+        sns.heatmap(
+            pivot_table,
+            annot=True,
+            fmt='.2f',
+            cmap='YlOrRd',
+            ax=ax,
+            cbar_kws={'label': 'Avg Execution Time (ms)'}
+        )
+        
+        ax.set_title(f'Performance Heatmap - {metric.replace("_", " ").title()}')
+        ax.set_xlabel('Language')
+        ax.set_ylabel('Platform')
+        
+        plt.tight_layout()
+        
+        filepath = self.output_dir / f"heatmap_{metric}.png"
+        plt.savefig(filepath, dpi=300, bbox_inches='tight')
+        self.logger.info(f"Saved: {filepath}")
+        plt.close()
+    
+    def plot_version_comparison(self, df: pd.DataFrame, language: str):
+        """
+        Compare different versions of the same language.
+        
+        Args:
+            df: DataFrame with benchmark results
+            language: Language to compare versions for
+        """
+        df = df[df['language'] == language]
+        df = df[df['success'] == True].copy()
+        
+        if df.empty or df['version'].nunique() < 2:
+            self.logger.warning(f"Insufficient version data for {language}")
+            return
+        
+        fig, ax = plt.subplots(figsize=(12, 6))
+        
+        # Group by version and platform
+        grouped = df.groupby(['version', 'platform'])['avg_time_ms'].mean().reset_index()
+        pivot = grouped.pivot(index='version', columns='platform', values='avg_time_ms')
+        
+        pivot.plot(kind='bar', ax=ax, color=[PLATFORM_COLORS.get(p, '#888888') for p in pivot.columns])
+        
+        ax.set_xlabel(f'{language.title()} Version')
+        ax.set_ylabel('Average Execution Time (ms)')
+        ax.set_title(f'{language.title()} Version Performance Comparison')
+        ax.legend(title='Platform')
+        ax.grid(axis='y', alpha=0.3)
+        
+        plt.xticks(rotation=0)
+        plt.tight_layout()
+        
+        filepath = self.output_dir / f"version_comparison_{language}.png"
+        plt.savefig(filepath, dpi=300, bbox_inches='tight')
+        self.logger.info(f"Saved: {filepath}")
+        plt.close()
+    
+    def create_summary_report(self, df: pd.DataFrame):
+        """
+        Create a text summary report of the benchmark results.
+        
+        Args:
+            df: DataFrame with benchmark results
+        """
+        report_lines = []
+        report_lines.append("="*80)
+        report_lines.append("BENCHMARK COMPARISON SUMMARY REPORT")
+        report_lines.append("="*80)
+        report_lines.append("")
+        
+        # Metadata
+        metadata = self.results.get('metadata', {})
+        report_lines.append(f"Start Time: {metadata.get('start_time', 'N/A')}")
+        report_lines.append(f"End Time: {metadata.get('end_time', 'N/A')}")
+        report_lines.append("")
+        
+        # Summary statistics
+        if 'summary' in metadata:
+            summary = metadata['summary']
+            report_lines.append("Overall Statistics:")
+            report_lines.append(f"  Total Runs: {summary.get('total_runs', 0)}")
+            report_lines.append(f"  Successful: {summary.get('successful', 0)}")
+            report_lines.append(f"  Failed: {summary.get('failed', 0)}")
+            report_lines.append(f"  Success Rate: {summary.get('success_rate', 'N/A')}")
+            report_lines.append("")
+        
+        # Successful runs only
+        df_success = df[df['success'] == True].copy()
+        
+        if not df_success.empty and 'avg_time_ms' in df_success.columns:
+            report_lines.append("Performance by Platform:")
+            for platform in sorted(df_success['platform'].unique()):
+                platform_df = df_success[df_success['platform'] == platform]
+                avg_time = platform_df['avg_time_ms'].mean()
+                report_lines.append(f"  {platform.upper()}: {avg_time:.2f} ms (avg)")
+            report_lines.append("")
+            
+            report_lines.append("Performance by Language:")
+            for language in sorted(df_success['language'].unique()):
+                lang_df = df_success[df_success['language'] == language]
+                avg_time = lang_df['avg_time_ms'].mean()
+                report_lines.append(f"  {language}: {avg_time:.2f} ms (avg)")
+            report_lines.append("")
+            
+            # Best performers
+            report_lines.append("Best Performers:")
+            # Check if we have valid data
+            if not df_success['avg_time_ms'].isna().all():
+                best_overall = df_success.loc[df_success['avg_time_ms'].idxmin()]
+                report_lines.append(
+                    f"  Fastest Overall: {best_overall['language']} on {best_overall['platform']} "
+                    f"({best_overall['avg_time_ms']:.2f} ms)"
+                )
+            else:
+                report_lines.append("  No valid performance data available")
+            
+            for platform in df_success['platform'].unique():
+                platform_df = df_success[df_success['platform'] == platform]
+                if not platform_df.empty and not platform_df['avg_time_ms'].isna().all():
+                    best = platform_df.loc[platform_df['avg_time_ms'].idxmin()]
+                    report_lines.append(
+                        f"  Fastest on {platform}: {best['language']} v{best['version']} "
+                        f"({best['avg_time_ms']:.2f} ms)"
+                    )
+            report_lines.append("")
+        
+        report_lines.append("="*80)
+        
+        # Write report
+        report_text = "\n".join(report_lines)
+        filepath = self.output_dir / "summary_report.txt"
+        with open(filepath, 'w') as f:
+            f.write(report_text)
+        
+        self.logger.info(f"Saved: {filepath}")
+        print("\n" + report_text)
+    
+    def create_all_plots(self):
+        """Generate all available plots from the benchmark results."""
+        self.logger.info("Generating all plots...")
+        
+        df = self.extract_dataframe()
+        
+        if df.empty:
+            self.logger.error("No data to plot!")
+            return
+        
+        # Create summary report
+        self.create_summary_report(df)
+        
+        # Language comparison
+        self.plot_language_comparison(df)
+        
+        # Platform comparison
+        self.plot_platform_comparison(df)
+        
+        # Memory scaling
+        if df['memory_mb'].nunique() > 1:
+            self.plot_memory_scaling(df)
+        
+        # Cold vs warm starts
+        if 'cold_starts' in df.columns:
+            self.plot_cold_vs_warm(df)
+        
+        # Heatmap
+        self.plot_heatmap(df)
+        
+        # Version comparisons for each language
+        for language in df['language'].unique():
+            if df[df['language'] == language]['version'].nunique() > 1:
+                self.plot_version_comparison(df, language)
+        
+        # Per-benchmark plots
+        for benchmark in df['benchmark'].unique():
+            self.plot_language_comparison(df, benchmark=benchmark)
+            
+        self.logger.info(f"\n✓ All plots generated in: {self.output_dir}")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Visualize cross-platform benchmark comparison results',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Generate all plots
+  %(prog)s results/comparison_20241212/comparison_results.json
+
+  # Specify output directory
+  %(prog)s results/comparison_20241212/comparison_results.json --output plots/
+
+  # Generate specific plot types
+  %(prog)s results.json --plot-type language_comparison platform_comparison
+        """
+    )
+    
+    parser.add_argument('results_file',
+                        help='Path to comparison_results.json file')
+    parser.add_argument('--output', '-o',
+                        help='Output directory for plots (default: results_dir/plots)')
+    parser.add_argument('--plot-type', nargs='+',
+                        choices=['language_comparison', 'platform_comparison', 
+                                'memory_scaling', 'cold_warm', 'heatmap', 'versions', 'all'],
+                        default=['all'],
+                        help='Types of plots to generate (default: all)')
+    parser.add_argument('--language', '-l',
+                        help='Filter by specific language')
+    parser.add_argument('--benchmark', '-b',
+                        help='Filter by specific benchmark')
+    parser.add_argument('--format', choices=['png', 'pdf', 'svg'], default='png',
+                        help='Output format for plots (default: png)')
+    
+    args = parser.parse_args()
+    
+    try:
+        visualizer = BenchmarkVisualizer(args.results_file, args.output)
+        
+        if 'all' in args.plot_type:
+            visualizer.create_all_plots()
+        else:
+            df = visualizer.extract_dataframe()
+            
+            if 'language_comparison' in args.plot_type:
+                visualizer.plot_language_comparison(df, benchmark=args.benchmark)
+            
+            if 'platform_comparison' in args.plot_type:
+                visualizer.plot_platform_comparison(df, language=args.language)
+            
+            if 'memory_scaling' in args.plot_type:
+                visualizer.plot_memory_scaling(df, benchmark=args.benchmark)
+            
+            if 'cold_warm' in args.plot_type:
+                visualizer.plot_cold_vs_warm(df)
+            
+            if 'heatmap' in args.plot_type:
+                visualizer.plot_heatmap(df)
+            
+            if 'versions' in args.plot_type and args.language:
+                visualizer.plot_version_comparison(df, args.language)
+            
+            visualizer.create_summary_report(df)
+        
+        print(f"\n✓ Visualization complete! Plots saved to: {visualizer.output_dir}")
+        return 0
+        
+    except FileNotFoundError:
+        print(f"Error: Results file not found: {args.results_file}")
+        return 1
+    except json.JSONDecodeError:
+        print(f"Error: Invalid JSON in results file: {args.results_file}")
+        return 1
+    except Exception as e:
+        print(f"Error: {e}")
+        import traceback
+        traceback.print_exc()
+        return 1
+
+
+if __name__ == '__main__':
+    sys.exit(main())
+
diff --git a/scripts/run_comparison.sh b/scripts/run_comparison.sh
new file mode 100755
index 000000000..b6206e519
--- /dev/null
+++ b/scripts/run_comparison.sh
@@ -0,0 +1,218 @@
+#!/bin/bash
+
+# Convenience wrapper for running benchmark comparisons and generating plots
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+# Default values
+BENCHMARKS="010.sleep"
+PLATFORMS="aws"
+LANGUAGES="python"
+CONFIG="${PROJECT_ROOT}/config/example.json"
+OUTPUT_DIR="${PROJECT_ROOT}/results/comparison_$(date +%Y%m%d_%H%M%S)"
+REPETITIONS=5
+MEMORY="256"
+INPUT_SIZE="test"
+GENERATE_PLOTS=true
+
+# Print usage
+usage() {
+    cat << EOF
+Usage: $(basename "$0") [OPTIONS]
+
+Run cross-platform benchmark comparisons and generate plots.
+
+Options:
+    -b, --benchmarks NAMES      Benchmark names (space-separated, default: 010.sleep)
+    -p, --platforms PLATFORMS   Platforms to test (space-separated, default: aws)
+                                Available: aws azure gcp local
+    -l, --languages LANGUAGES   Languages to test (space-separated, default: python)
+                                Available: python nodejs rust java pypy
+    -c, --config FILE           Configuration file (default: config/example.json)
+    -o, --output DIR            Output directory (default: results/comparison_TIMESTAMP)
+    -r, --repetitions NUM       Number of repetitions (default: 5)
+    -m, --memory SIZES          Memory sizes in MB (space-separated, default: 256)
+    -i, --input-size SIZE       Input size: test, small, large (default: test)
+    --no-plots                  Skip plot generation
+    --skip-benchmark            Skip benchmark run, only generate plots
+    -h, --help                  Show this help message
+
+Examples:
+    # Compare Python and Node.js on AWS and Azure
+    $(basename "$0") -b "010.sleep 110.dynamic-html" -p "aws azure" -l "python nodejs"
+
+    # Test different memory configurations
+    $(basename "$0") -b "501.graph-pagerank" -m "512 1024 2048" -r 10
+
+    # Just generate plots from existing results
+    $(basename "$0") --skip-benchmark -o results/comparison_20241212_120000
+
+EOF
+}
+
+# Parse arguments
+SKIP_BENCHMARK=false
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        -b|--benchmarks)
+            BENCHMARKS="$2"
+            shift 2
+            ;;
+        -p|--platforms)
+            PLATFORMS="$2"
+            shift 2
+            ;;
+        -l|--languages)
+            LANGUAGES="$2"
+            shift 2
+            ;;
+        -c|--config)
+            CONFIG="$2"
+            shift 2
+            ;;
+        -o|--output)
+            OUTPUT_DIR="$2"
+            shift 2
+            ;;
+        -r|--repetitions)
+            REPETITIONS="$2"
+            shift 2
+            ;;
+        -m|--memory)
+            MEMORY="$2"
+            shift 2
+            ;;
+        -i|--input-size)
+            INPUT_SIZE="$2"
+            shift 2
+            ;;
+        --no-plots)
+            GENERATE_PLOTS=false
+            shift
+            ;;
+        --skip-benchmark)
+            SKIP_BENCHMARK=true
+            shift
+            ;;
+        -h|--help)
+            usage
+            exit 0
+            ;;
+        *)
+            echo -e "${RED}Error: Unknown option $1${NC}"
+            usage
+            exit 1
+            ;;
+    esac
+done
+
+echo "=================================="
+echo "Benchmark Comparison Tool"
+echo "=================================="
+echo ""
+
+# Check if config exists
+if [ ! -f "$CONFIG" ]; then
+    echo -e "${RED}Error: Configuration file not found: $CONFIG${NC}"
+    exit 1
+fi
+
+# Run benchmarks unless skipped
+if [ "$SKIP_BENCHMARK" = false ]; then
+    echo -e "${GREEN}Step 1: Running Benchmarks${NC}"
+    echo "  Benchmarks: $BENCHMARKS"
+    echo "  Platforms: $PLATFORMS"
+    echo "  Languages: $LANGUAGES"
+    echo "  Repetitions: $REPETITIONS"
+    echo "  Memory: $MEMORY MB"
+    echo "  Input Size: $INPUT_SIZE"
+    echo "  Output: $OUTPUT_DIR"
+    echo ""
+    
+    # Build command
+    CMD=(
+        python3 "${SCRIPT_DIR}/cross_platform_benchmark.py"
+        --benchmarks $BENCHMARKS
+        --platforms $PLATFORMS
+        --languages $LANGUAGES
+        --config "$CONFIG"
+        --output "$OUTPUT_DIR"
+        --repetitions "$REPETITIONS"
+        --memory $MEMORY
+        --input-size "$INPUT_SIZE"
+        --verbose
+    )
+    
+    echo "Running: ${CMD[@]}"
+    echo ""
+    
+    if "${CMD[@]}"; then
+        echo -e "${GREEN}✓ Benchmarks completed successfully!${NC}"
+    else
+        echo -e "${RED}✗ Benchmark execution failed!${NC}"
+        exit 1
+    fi
+else
+    echo -e "${YELLOW}Skipping benchmark execution${NC}"
+    
+    # Check if results file exists
+    if [ ! -f "$OUTPUT_DIR/comparison_results.json" ]; then
+        echo -e "${RED}Error: Results file not found: $OUTPUT_DIR/comparison_results.json${NC}"
+        exit 1
+    fi
+fi
+
+# Generate plots
+if [ "$GENERATE_PLOTS" = true ]; then
+    echo ""
+    echo -e "${GREEN}Step 2: Generating Plots${NC}"
+    echo ""
+    
+    PLOT_CMD=(
+        python3 "${SCRIPT_DIR}/plot_comparison.py"
+        "$OUTPUT_DIR/comparison_results.json"
+        --output "$OUTPUT_DIR/plots"
+    )
+    
+    if "${PLOT_CMD[@]}"; then
+        echo -e "${GREEN}✓ Plots generated successfully!${NC}"
+    else
+        echo -e "${YELLOW}⚠ Plot generation failed (may need matplotlib/seaborn)${NC}"
+    fi
+fi
+
+echo ""
+echo "=================================="
+echo -e "${GREEN}Comparison Complete!${NC}"
+echo "=================================="
+echo ""
+echo "Results Location: $OUTPUT_DIR"
+echo "  - comparison_results.json  (raw results with full experiments.json data)"
+echo "  - benchmark_run.log        (execution log)"
+if [ "$GENERATE_PLOTS" = true ]; then
+    echo "  - plots/                   (visualizations)"
+fi
+echo ""
+echo "Useful commands:"
+echo "  # Regenerate plots"
+echo "  python3 ${SCRIPT_DIR}/plot_comparison.py $OUTPUT_DIR/comparison_results.json"
+echo ""
+echo "  # Extract individual experiments.json files"
+echo "  python3 ${SCRIPT_DIR}/export_comparison_data.py $OUTPUT_DIR/comparison_results.json -e $OUTPUT_DIR/experiments/"
+echo ""
+echo "  # Create aggregated experiments.json"
+echo "  python3 ${SCRIPT_DIR}/export_comparison_data.py $OUTPUT_DIR/comparison_results.json -a $OUTPUT_DIR/aggregated.json"
+echo ""
+echo "  # Export to CSV"
+echo "  python3 ${SCRIPT_DIR}/export_comparison_data.py $OUTPUT_DIR/comparison_results.json -c $OUTPUT_DIR/summary.csv"
+echo ""
+
diff --git a/scripts/run_experiments.py b/scripts/run_experiments.py
index c18b96c0a..011d2f3ee 100755
--- a/scripts/run_experiments.py
+++ b/scripts/run_experiments.py
@@ -410,7 +410,7 @@ def clean(self):
             objects = self.connection.list_objects_v2(bucket)
             objects = [obj.object_name for obj in objects]
             for err in self.connection.remove_objects(bucket, objects):
-                logging.error("Deletion Error: {}".format(del_err))
+                logging.error("Deletion Error: {}".format(err))
 
     def download_results(self, result_dir):
         result_dir = os.path.join(result_dir, 'storage_output')
diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py
index 9fac5e8d6..6314a5eac 100644
--- a/sebs/aws/aws.py
+++ b/sebs/aws/aws.py
@@ -137,6 +137,7 @@ def package_code(
         CONFIG_FILES = {
             "python": ["handler.py", "requirements.txt", ".python_packages"],
             "nodejs": ["handler.js", "package.json", "node_modules"],
+            "pypy": ["handler.py", "requirements.txt", ".python_packages"],
         }
         package_config = CONFIG_FILES[language_name]
         function_dir = os.path.join(directory, "function")
@@ -258,10 +259,15 @@ def create_function(
                         "S3Key": code_prefix,
                     }
 
-                create_function_params["Runtime"] = "{}{}".format(
-                    language, self._map_language_runtime(language, language_runtime)
-                )
-                create_function_params["Handler"] = "handler.handler"
+                # PyPy uses custom runtime (provided.al2023) since there's no native PyPy runtime
+                if language == "pypy":
+                    create_function_params["Runtime"] = "provided.al2023"
+                    create_function_params["Handler"] = "handler.handler"
+                else:
+                    create_function_params["Runtime"] = "{}{}".format(
+                        language, self._map_language_runtime(language, language_runtime)
+                    )
+                    create_function_params["Handler"] = "handler.handler"
 
             create_function_params = {
                 k: v for k, v in create_function_params.items() if v is not None
diff --git a/tools/build_docker_images.py b/tools/build_docker_images.py
index 8428a0f59..c80ecfae4 100755
--- a/tools/build_docker_images.py
+++ b/tools/build_docker_images.py
@@ -22,8 +22,6 @@
     help="Optional Docker platform (e.g., linux/amd64) to override host architecture.",
 )
 parser.add_argument("--language-version", default=None, type=str, action="store")
-# Optional: force build platform (e.g., linux/amd64 on Apple Silicon)
-parser.add_argument("--platform", default=None, type=str, action="store")
 args = parser.parse_args()
 config = json.load(open(os.path.join(PROJECT_DIR, "config", "systems.json"), "r"))
 client = docker.from_env()

From e2657d3dfe1b4e151852826b571463987d22e8a4 Mon Sep 17 00:00:00 2001
From: Alexander Schlieper <alexander@schlieper.ch>
Date: Mon, 15 Dec 2025 18:13:43 +0100
Subject: [PATCH 23/31] force aws pypy to --container-deployment

---
 scripts/cross_platform_benchmark.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/scripts/cross_platform_benchmark.py b/scripts/cross_platform_benchmark.py
index 0d0a5909b..5a4d32412 100644
--- a/scripts/cross_platform_benchmark.py
+++ b/scripts/cross_platform_benchmark.py
@@ -142,6 +142,10 @@ def run_single_benchmark(
                 '--cache', self.cache_dir
             ]
             
+            # Add --container-deployment for AWS PyPy as it is required
+            if platform == 'aws' and language == 'pypy':
+                cmd.append('--container-deployment')
+            
             if self.verbose:
                 cmd.append('--verbose')
             

From acfc7c95e36846bc0eb7ad63141c23a804ab206f Mon Sep 17 00:00:00 2001
From: Alexander Schlieper <alexander@schlieper.ch>
Date: Tue, 16 Dec 2025 19:49:44 +0100
Subject: [PATCH 24/31] Add benchmarking scripts and configuration

---
 config/comparison_example.json      |  41 +++
 requirements.visualization.txt      |   7 +
 scripts/cross_platform_benchmark.py | 499 ++++++++++++++++++++++++++++
 scripts/run_comparison.sh           | 218 ++++++++++++
 4 files changed, 765 insertions(+)
 create mode 100644 config/comparison_example.json
 create mode 100644 requirements.visualization.txt
 create mode 100644 scripts/cross_platform_benchmark.py
 create mode 100755 scripts/run_comparison.sh

diff --git a/config/comparison_example.json b/config/comparison_example.json
new file mode 100644
index 000000000..f9daf292a
--- /dev/null
+++ b/config/comparison_example.json
@@ -0,0 +1,41 @@
+{
+  "deployment": {
+    "name": "aws",
+    "aws": {
+      "region": "us-east-1",
+      "access_key_id": "",
+      "secret_access_key": ""
+    },
+    "azure": {
+      "subscription_id": "",
+      "tenant_id": "",
+      "client_id": "",
+      "client_secret": ""
+    },
+    "gcp": {
+      "project_id": "",
+      "credentials": ""
+    },
+    "local": {
+      "runtime": {
+        "python": "3.11",
+        "nodejs": "20",
+        "pypy": "3.11"
+      }
+    }
+  },
+  "experiments": {
+    "update_code": false,
+    "update_storage": false,
+    "download_results": true,
+    "architecture": "x64",
+    "container_deployment": false,
+    "runtime": {
+      "language": "python",
+      "version": "3.11"
+    },
+    "repetitions": 5,
+    "memory": 256
+  }
+}
+
diff --git a/requirements.visualization.txt b/requirements.visualization.txt
new file mode 100644
index 000000000..37389e6d3
--- /dev/null
+++ b/requirements.visualization.txt
@@ -0,0 +1,7 @@
+# Additional requirements for benchmark visualization tools
+
+matplotlib>=3.5.0
+seaborn>=0.12.0
+pandas>=1.5.0
+numpy>=1.23.0
+
diff --git a/scripts/cross_platform_benchmark.py b/scripts/cross_platform_benchmark.py
new file mode 100644
index 000000000..5a4d32412
--- /dev/null
+++ b/scripts/cross_platform_benchmark.py
@@ -0,0 +1,499 @@
+#!/usr/bin/env python3
+
+"""
+Cross-platform benchmark comparison tool for SeBS.
+Runs benchmarks across multiple languages and cloud platforms,
+aggregates results, and provides comparison analysis.
+"""
+
+import argparse
+import json
+import logging
+import os
+import sys
+import time
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+import subprocess
+import traceback
+
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+PROJECT_ROOT = os.path.abspath(os.path.join(SCRIPT_DIR, os.pardir))
+sys.path.insert(0, PROJECT_ROOT)
+
+# Language-version mappings for different platforms
+LANGUAGE_CONFIGS = {
+    'aws': {
+        'python': ['3.11', '3.10', '3.9', '3.8'],
+        'nodejs': ['16'],
+        'rust': ['1.80', '1.81', '1.82'],
+        'java': ['17'],
+        'pypy': ['3.11']
+    },
+    'azure': {
+        'python': ['3.11', '3.10', '3.9', '3.8'],
+        'nodejs': ['20', '18', '16'],
+        'java': ['17'],
+        'pypy': ['3.11']
+    },
+    'gcp': {
+        'python': ['3.12', '3.11', '3.10', '3.9', '3.8'],
+        'nodejs': ['20', '18']
+    },
+    'local': {
+        'python': ['3.11', '3.10', '3.9'],
+        'nodejs': ['20', '18', '16'],
+        'pypy': ['3.11']
+    }
+}
+
+class BenchmarkRunner:
+    """Orchestrates benchmark execution across platforms and languages."""
+    
+    def __init__(self, output_dir: str, cache_dir: str = 'cache', verbose: bool = False):
+        self.output_dir = Path(output_dir)
+        self.cache_dir = cache_dir
+        self.verbose = verbose
+        self.results = {
+            'metadata': {
+                'start_time': datetime.now().isoformat(),
+                'end_time': None,
+                'version': '1.0.0'
+            },
+            'benchmarks': {}
+        }
+        
+        # Create output directory
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        
+        # Setup logging
+        log_file = self.output_dir / 'benchmark_run.log'
+        logging.basicConfig(
+            level=logging.DEBUG if verbose else logging.INFO,
+            format='%(asctime)s - %(levelname)s - %(message)s',
+            handlers=[
+                logging.FileHandler(log_file),
+                logging.StreamHandler()
+            ]
+        )
+        self.logger = logging.getLogger(__name__)
+    
+    def run_single_benchmark(
+        self,
+        benchmark: str,
+        platform: str,
+        language: str,
+        version: str,
+        config_file: str,
+        input_size: str = 'test',
+        repetitions: int = 5,
+        memory: int = 256,
+        architecture: str = 'x64'
+    ) -> Tuple[bool, Optional[str], Optional[Dict]]:
+        """
+        Run a single benchmark configuration.
+        
+        Returns:
+            (success, output_file, error_message)
+        """
+        run_id = f"{benchmark}_{platform}_{language}_{version}_{memory}MB"
+        self.logger.info(f"Starting: {run_id}")
+        
+        # Create experiment output directory
+        experiment_dir = self.output_dir / run_id
+        experiment_dir.mkdir(parents=True, exist_ok=True)
+        
+        # Update config for this run
+        try:
+            with open(config_file, 'r') as f:
+                config = json.load(f)
+            
+            # Update configuration
+            config['experiments']['runtime'] = {
+                'language': language,
+                'version': version
+            }
+            config['experiments']['repetitions'] = repetitions
+            config['experiments']['memory'] = memory
+            config['experiments']['architecture'] = architecture
+            config['deployment']['name'] = platform
+            
+            # Write updated config
+            run_config_file = experiment_dir / 'config.json'
+            with open(run_config_file, 'w') as f:
+                json.dump(config, f, indent=2)
+            
+            # Construct sebs.py command
+            cmd = [
+                sys.executable,
+                os.path.join(PROJECT_ROOT, 'sebs.py'),
+                'benchmark',
+                'invoke',
+                benchmark,
+                input_size,
+                '--config', str(run_config_file),
+                '--deployment', platform,
+                '--language', language,
+                '--language-version', version,
+                '--memory', str(memory),
+                '--architecture', architecture,
+                '--output-dir', str(experiment_dir),
+                '--cache', self.cache_dir
+            ]
+            
+            # Add --container-deployment for AWS PyPy as it is required
+            if platform == 'aws' and language == 'pypy':
+                cmd.append('--container-deployment')
+            
+            if self.verbose:
+                cmd.append('--verbose')
+            
+            self.logger.debug(f"Command: {' '.join(cmd)}")
+            
+            # Execute benchmark (run from experiment directory so experiments.json is saved there)
+            start_time = time.time()
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=600,  # 10 minute timeout
+                cwd=str(experiment_dir)  # Run from experiment directory
+            )
+            execution_time = time.time() - start_time
+            
+            # Save stdout/stderr
+            with open(experiment_dir / 'stdout.log', 'w') as f:
+                f.write(result.stdout)
+            with open(experiment_dir / 'stderr.log', 'w') as f:
+                f.write(result.stderr)
+            
+            if result.returncode == 0:
+                self.logger.info(f"✓ Completed: {run_id} ({execution_time:.2f}s)")
+                
+                # Look for experiments.json in the output
+                exp_json = experiment_dir / 'experiments.json'
+                if not exp_json.exists():
+                    # Try to find it in subdirectories
+                    exp_files = list(experiment_dir.glob('**/experiments.json'))
+                    if exp_files:
+                        exp_json = exp_files[0]
+                
+                return True, str(experiment_dir), None
+            else:
+                error_msg = f"Failed with return code {result.returncode}"
+                self.logger.error(f"✗ Failed: {run_id} - {error_msg}")
+                self.logger.debug(f"Stderr: {result.stderr[:500]}")
+                return False, str(experiment_dir), error_msg
+                
+        except subprocess.TimeoutExpired:
+            error_msg = "Benchmark execution timed out"
+            self.logger.error(f"✗ Timeout: {run_id}")
+            return False, str(experiment_dir), error_msg
+        except Exception as e:
+            error_msg = f"Exception: {str(e)}"
+            self.logger.error(f"✗ Error: {run_id} - {error_msg}")
+            self.logger.debug(traceback.format_exc())
+            return False, str(experiment_dir), error_msg
+    
+    def run_comparison(
+        self,
+        benchmarks: List[str],
+        platforms: List[str],
+        languages: List[str],
+        config_file: str,
+        input_size: str = 'test',
+        repetitions: int = 5,
+        memory_sizes: List[int] = [256],
+        architecture: str = 'x64',
+        versions: Optional[Dict[str, List[str]]] = None
+    ):
+        """
+        Run benchmarks across multiple configurations.
+        
+        Args:
+            benchmarks: List of benchmark names (e.g., ['010.sleep', '110.dynamic-html'])
+            platforms: List of platforms (e.g., ['aws', 'azure'])
+            languages: List of languages (e.g., ['python', 'nodejs'])
+            config_file: Path to base configuration file
+            input_size: Benchmark input size
+            repetitions: Number of repetitions per benchmark
+            memory_sizes: List of memory configurations to test
+            architecture: Target architecture (x64 or arm64)
+            versions: Optional dict mapping language to specific versions
+        """
+        total_runs = 0
+        successful_runs = 0
+        failed_runs = 0
+        
+        for benchmark in benchmarks:
+            self.results['benchmarks'][benchmark] = {}
+            
+            for platform in platforms:
+                self.results['benchmarks'][benchmark][platform] = {}
+                
+                for language in languages:
+                    # Check if language is supported on this platform
+                    if language not in LANGUAGE_CONFIGS.get(platform, {}):
+                        self.logger.warning(f"Skipping {language} on {platform} (not supported)")
+                        continue
+                    
+                    # Get versions to test
+                    if versions and language in versions:
+                        lang_versions = versions[language]
+                    else:
+                        # Use first available version by default
+                        lang_versions = [LANGUAGE_CONFIGS[platform][language][0]]
+                    
+                    self.results['benchmarks'][benchmark][platform][language] = {}
+                    
+                    for version in lang_versions:
+                        # Verify version is supported
+                        if version not in LANGUAGE_CONFIGS[platform][language]:
+                            self.logger.warning(
+                                f"Skipping {language} {version} on {platform} (version not supported)"
+                            )
+                            continue
+                        
+                        self.results['benchmarks'][benchmark][platform][language][version] = {}
+                        
+                        for memory in memory_sizes:
+                            total_runs += 1
+                            
+                            success, output_dir, error = self.run_single_benchmark(
+                                benchmark=benchmark,
+                                platform=platform,
+                                language=language,
+                                version=version,
+                                config_file=config_file,
+                                input_size=input_size,
+                                repetitions=repetitions,
+                                memory=memory,
+                                architecture=architecture
+                            )
+                            
+                            result_entry = {
+                                'success': success,
+                                'output_directory': output_dir,
+                                'memory_mb': memory,
+                                'architecture': architecture,
+                                'repetitions': repetitions,
+                                'input_size': input_size
+                            }
+                            
+                            if success:
+                                successful_runs += 1
+                                # Try to extract metrics and full experiment data
+                                try:
+                                    extracted = self._extract_metrics(output_dir)
+                                    
+                                    # Store full experiments.json data if available
+                                    if 'full_experiment_data' in extracted:
+                                        result_entry['experiment_data'] = extracted['full_experiment_data']
+                                        # Also store summary metrics
+                                        result_entry['metrics'] = {
+                                            k: v for k, v in extracted.items() 
+                                            if k != 'full_experiment_data'
+                                        }
+                                    else:
+                                        result_entry['metrics'] = extracted
+                                except Exception as e:
+                                    self.logger.warning(f"Could not extract metrics: {e}")
+                            else:
+                                failed_runs += 1
+                                result_entry['error'] = error
+                            
+                            self.results['benchmarks'][benchmark][platform][language][version][f'{memory}MB'] = result_entry
+        
+        # Update end time and summary
+        self.results['metadata']['end_time'] = datetime.now().isoformat()
+        self.results['metadata']['summary'] = {
+            'total_runs': total_runs,
+            'successful': successful_runs,
+            'failed': failed_runs,
+            'success_rate': f"{(successful_runs/total_runs*100):.1f}%" if total_runs > 0 else "N/A"
+        }
+        
+        # Save results
+        output_file = self.output_dir / 'comparison_results.json'
+        with open(output_file, 'w') as f:
+            json.dump(self.results, f, indent=2)
+        
+        self.logger.info(f"\n{'='*60}")
+        self.logger.info(f"Benchmark Comparison Complete!")
+        self.logger.info(f"{'='*60}")
+        self.logger.info(f"Total runs: {total_runs}")
+        self.logger.info(f"Successful: {successful_runs}")
+        self.logger.info(f"Failed: {failed_runs}")
+        self.logger.info(f"Results saved to: {output_file}")
+        
+        return self.results
+    
+    def _extract_metrics(self, output_dir: str) -> Dict:
+        """Extract key metrics from experiment output and preserve full experiments.json data."""
+        metrics = {}
+        
+        # Look for experiments.json
+        exp_json_paths = [
+            Path(output_dir) / 'experiments.json',
+            *Path(output_dir).glob('**/experiments.json')
+        ]
+        
+        for exp_json in exp_json_paths:
+            if exp_json.exists():
+                with open(exp_json, 'r') as f:
+                    data = json.load(f)
+                
+                # Store the full experiments.json data
+                metrics['full_experiment_data'] = data
+                
+                # Extract timing information from invocations for summary
+                if '_invocations' in data:
+                    invocations = data['_invocations']
+                    
+                    for func_name, func_data in invocations.items():
+                        execution_times = []
+                        cold_starts = 0
+                        warm_starts = 0
+                        
+                        for inv_id, inv_data in func_data.items():
+                            if 'times' in inv_data:
+                                if 'client' in inv_data['times']:
+                                    # Client time is in microseconds, convert to ms
+                                    execution_times.append(inv_data['times']['client'] / 1000)
+                            
+                            if 'stats' in inv_data:
+                                if inv_data['stats'].get('cold_start'):
+                                    cold_starts += 1
+                                else:
+                                    warm_starts += 1
+                        
+                        if execution_times:
+                            metrics['execution_times_ms'] = execution_times
+                            metrics['avg_execution_time_ms'] = sum(execution_times) / len(execution_times)
+                            metrics['min_execution_time_ms'] = min(execution_times)
+                            metrics['max_execution_time_ms'] = max(execution_times)
+                            metrics['cold_starts'] = cold_starts
+                            metrics['warm_starts'] = warm_starts
+                
+                break
+        
+        return metrics
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Run cross-platform benchmark comparisons',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Compare Python and Node.js on AWS and Azure
+  %(prog)s --benchmarks 010.sleep 110.dynamic-html \\
+           --platforms aws azure \\
+           --languages python nodejs \\
+           --config config/example.json \\
+           --output results/comparison_$(date +%%Y%%m%%d)
+
+  # Compare specific Python versions on AWS
+  %(prog)s --benchmarks 501.graph-pagerank \\
+           --platforms aws \\
+           --languages python \\
+           --python-versions 3.11 3.10 3.9 \\
+           --memory 512 1024 \\
+           --config config/example.json
+        """
+    )
+    
+    parser.add_argument('--benchmarks', nargs='+', required=True,
+                        help='Benchmark names to run (e.g., 010.sleep 110.dynamic-html)')
+    parser.add_argument('--platforms', nargs='+', required=True,
+                        choices=['aws', 'azure', 'gcp', 'local'],
+                        help='Platforms to test on')
+    parser.add_argument('--languages', nargs='+', required=True,
+                        help='Languages to test (e.g., python nodejs rust java)')
+    parser.add_argument('--config', required=True,
+                        help='Base configuration file')
+    parser.add_argument('--output', required=True,
+                        help='Output directory for results')
+    
+    # Optional parameters
+    parser.add_argument('--input-size', default='test',
+                        choices=['test', 'small', 'large'],
+                        help='Benchmark input size (default: test)')
+    parser.add_argument('--repetitions', type=int, default=5,
+                        help='Number of repetitions per benchmark (default: 5)')
+    parser.add_argument('--memory', nargs='+', type=int, default=[256],
+                        help='Memory sizes in MB to test (default: 256)')
+    parser.add_argument('--architecture', default='x64',
+                        choices=['x64', 'arm64'],
+                        help='Target architecture (default: x64)')
+    parser.add_argument('--cache', default='cache',
+                        help='Cache directory (default: cache)')
+    
+    # Language-specific version overrides
+    parser.add_argument('--python-versions', nargs='+',
+                        help='Specific Python versions to test')
+    parser.add_argument('--nodejs-versions', nargs='+',
+                        help='Specific Node.js versions to test')
+    parser.add_argument('--rust-versions', nargs='+',
+                        help='Specific Rust versions to test')
+    parser.add_argument('--java-versions', nargs='+',
+                        help='Specific Java versions to test')
+    
+    parser.add_argument('--verbose', action='store_true',
+                        help='Enable verbose output')
+    
+    args = parser.parse_args()
+    
+    # Build version overrides
+    versions = {}
+    if args.python_versions:
+        versions['python'] = args.python_versions
+    if args.nodejs_versions:
+        versions['nodejs'] = args.nodejs_versions
+    if args.rust_versions:
+        versions['rust'] = args.rust_versions
+    if args.java_versions:
+        versions['java'] = args.java_versions
+    
+    # Create runner
+    runner = BenchmarkRunner(
+        output_dir=args.output,
+        cache_dir=args.cache,
+        verbose=args.verbose
+    )
+    
+    # Run comparison
+    try:
+        results = runner.run_comparison(
+            benchmarks=args.benchmarks,
+            platforms=args.platforms,
+            languages=args.languages,
+            config_file=args.config,
+            input_size=args.input_size,
+            repetitions=args.repetitions,
+            memory_sizes=args.memory,
+            architecture=args.architecture,
+            versions=versions if versions else None
+        )
+        
+        print("\n" + "="*60)
+        print("✓ Benchmark comparison completed successfully!")
+        print("="*60)
+        print(f"Results: {args.output}/comparison_results.json")
+        print(f"Logs: {args.output}/benchmark_run.log")
+        
+        return 0
+        
+    except KeyboardInterrupt:
+        print("\n\nBenchmark interrupted by user")
+        return 130
+    except Exception as e:
+        print(f"\n\nError during benchmark execution: {e}")
+        traceback.print_exc()
+        return 1
+
+
+if __name__ == '__main__':
+    sys.exit(main())
+
diff --git a/scripts/run_comparison.sh b/scripts/run_comparison.sh
new file mode 100755
index 000000000..b6206e519
--- /dev/null
+++ b/scripts/run_comparison.sh
@@ -0,0 +1,218 @@
+#!/bin/bash
+
+# Convenience wrapper for running benchmark comparisons and generating plots
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+# Default values
+BENCHMARKS="010.sleep"
+PLATFORMS="aws"
+LANGUAGES="python"
+CONFIG="${PROJECT_ROOT}/config/example.json"
+OUTPUT_DIR="${PROJECT_ROOT}/results/comparison_$(date +%Y%m%d_%H%M%S)"
+REPETITIONS=5
+MEMORY="256"
+INPUT_SIZE="test"
+GENERATE_PLOTS=true
+
+# Print usage
+usage() {
+    cat << EOF
+Usage: $(basename "$0") [OPTIONS]
+
+Run cross-platform benchmark comparisons and generate plots.
+
+Options:
+    -b, --benchmarks NAMES      Benchmark names (space-separated, default: 010.sleep)
+    -p, --platforms PLATFORMS   Platforms to test (space-separated, default: aws)
+                                Available: aws azure gcp local
+    -l, --languages LANGUAGES   Languages to test (space-separated, default: python)
+                                Available: python nodejs rust java pypy
+    -c, --config FILE           Configuration file (default: config/example.json)
+    -o, --output DIR            Output directory (default: results/comparison_TIMESTAMP)
+    -r, --repetitions NUM       Number of repetitions (default: 5)
+    -m, --memory SIZES          Memory sizes in MB (space-separated, default: 256)
+    -i, --input-size SIZE       Input size: test, small, large (default: test)
+    --no-plots                  Skip plot generation
+    --skip-benchmark            Skip benchmark run, only generate plots
+    -h, --help                  Show this help message
+
+Examples:
+    # Compare Python and Node.js on AWS and Azure
+    $(basename "$0") -b "010.sleep 110.dynamic-html" -p "aws azure" -l "python nodejs"
+
+    # Test different memory configurations
+    $(basename "$0") -b "501.graph-pagerank" -m "512 1024 2048" -r 10
+
+    # Just generate plots from existing results
+    $(basename "$0") --skip-benchmark -o results/comparison_20241212_120000
+
+EOF
+}
+
+# Parse arguments
+SKIP_BENCHMARK=false
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        -b|--benchmarks)
+            BENCHMARKS="$2"
+            shift 2
+            ;;
+        -p|--platforms)
+            PLATFORMS="$2"
+            shift 2
+            ;;
+        -l|--languages)
+            LANGUAGES="$2"
+            shift 2
+            ;;
+        -c|--config)
+            CONFIG="$2"
+            shift 2
+            ;;
+        -o|--output)
+            OUTPUT_DIR="$2"
+            shift 2
+            ;;
+        -r|--repetitions)
+            REPETITIONS="$2"
+            shift 2
+            ;;
+        -m|--memory)
+            MEMORY="$2"
+            shift 2
+            ;;
+        -i|--input-size)
+            INPUT_SIZE="$2"
+            shift 2
+            ;;
+        --no-plots)
+            GENERATE_PLOTS=false
+            shift
+            ;;
+        --skip-benchmark)
+            SKIP_BENCHMARK=true
+            shift
+            ;;
+        -h|--help)
+            usage
+            exit 0
+            ;;
+        *)
+            echo -e "${RED}Error: Unknown option $1${NC}"
+            usage
+            exit 1
+            ;;
+    esac
+done
+
+echo "=================================="
+echo "Benchmark Comparison Tool"
+echo "=================================="
+echo ""
+
+# Check if config exists
+if [ ! -f "$CONFIG" ]; then
+    echo -e "${RED}Error: Configuration file not found: $CONFIG${NC}"
+    exit 1
+fi
+
+# Run benchmarks unless skipped
+if [ "$SKIP_BENCHMARK" = false ]; then
+    echo -e "${GREEN}Step 1: Running Benchmarks${NC}"
+    echo "  Benchmarks: $BENCHMARKS"
+    echo "  Platforms: $PLATFORMS"
+    echo "  Languages: $LANGUAGES"
+    echo "  Repetitions: $REPETITIONS"
+    echo "  Memory: $MEMORY MB"
+    echo "  Input Size: $INPUT_SIZE"
+    echo "  Output: $OUTPUT_DIR"
+    echo ""
+    
+    # Build command
+    CMD=(
+        python3 "${SCRIPT_DIR}/cross_platform_benchmark.py"
+        --benchmarks $BENCHMARKS
+        --platforms $PLATFORMS
+        --languages $LANGUAGES
+        --config "$CONFIG"
+        --output "$OUTPUT_DIR"
+        --repetitions "$REPETITIONS"
+        --memory $MEMORY
+        --input-size "$INPUT_SIZE"
+        --verbose
+    )
+    
+    echo "Running: ${CMD[@]}"
+    echo ""
+    
+    if "${CMD[@]}"; then
+        echo -e "${GREEN}✓ Benchmarks completed successfully!${NC}"
+    else
+        echo -e "${RED}✗ Benchmark execution failed!${NC}"
+        exit 1
+    fi
+else
+    echo -e "${YELLOW}Skipping benchmark execution${NC}"
+    
+    # Check if results file exists
+    if [ ! -f "$OUTPUT_DIR/comparison_results.json" ]; then
+        echo -e "${RED}Error: Results file not found: $OUTPUT_DIR/comparison_results.json${NC}"
+        exit 1
+    fi
+fi
+
+# Generate plots
+if [ "$GENERATE_PLOTS" = true ]; then
+    echo ""
+    echo -e "${GREEN}Step 2: Generating Plots${NC}"
+    echo ""
+    
+    PLOT_CMD=(
+        python3 "${SCRIPT_DIR}/plot_comparison.py"
+        "$OUTPUT_DIR/comparison_results.json"
+        --output "$OUTPUT_DIR/plots"
+    )
+    
+    if "${PLOT_CMD[@]}"; then
+        echo -e "${GREEN}✓ Plots generated successfully!${NC}"
+    else
+        echo -e "${YELLOW}⚠ Plot generation failed (may need matplotlib/seaborn)${NC}"
+    fi
+fi
+
+echo ""
+echo "=================================="
+echo -e "${GREEN}Comparison Complete!${NC}"
+echo "=================================="
+echo ""
+echo "Results Location: $OUTPUT_DIR"
+echo "  - comparison_results.json  (raw results with full experiments.json data)"
+echo "  - benchmark_run.log        (execution log)"
+if [ "$GENERATE_PLOTS" = true ]; then
+    echo "  - plots/                   (visualizations)"
+fi
+echo ""
+echo "Useful commands:"
+echo "  # Regenerate plots"
+echo "  python3 ${SCRIPT_DIR}/plot_comparison.py $OUTPUT_DIR/comparison_results.json"
+echo ""
+echo "  # Extract individual experiments.json files"
+echo "  python3 ${SCRIPT_DIR}/export_comparison_data.py $OUTPUT_DIR/comparison_results.json -e $OUTPUT_DIR/experiments/"
+echo ""
+echo "  # Create aggregated experiments.json"
+echo "  python3 ${SCRIPT_DIR}/export_comparison_data.py $OUTPUT_DIR/comparison_results.json -a $OUTPUT_DIR/aggregated.json"
+echo ""
+echo "  # Export to CSV"
+echo "  python3 ${SCRIPT_DIR}/export_comparison_data.py $OUTPUT_DIR/comparison_results.json -c $OUTPUT_DIR/summary.csv"
+echo ""
+

From 2c87f46a62f7c068b434763c90e590a695a365f9 Mon Sep 17 00:00:00 2001
From: Alexander Schlieper <alexander@schlieper.ch>
Date: Tue, 16 Dec 2025 20:36:17 +0100
Subject: [PATCH 25/31] Add plot_comparison.py script

---
 scripts/plot_comparison.py | 611 +++++++++++++++++++++++++++++++++++++
 1 file changed, 611 insertions(+)
 create mode 100644 scripts/plot_comparison.py

diff --git a/scripts/plot_comparison.py b/scripts/plot_comparison.py
new file mode 100644
index 000000000..738164804
--- /dev/null
+++ b/scripts/plot_comparison.py
@@ -0,0 +1,611 @@
+#!/usr/bin/env python3
+
+"""
+Visualization tool for cross-platform benchmark comparisons.
+Creates publication-quality plots comparing performance across
+languages, platforms, and configurations.
+"""
+
+import argparse
+import json
+import logging
+import sys
+from pathlib import Path
+from typing import Dict, List, Optional
+
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+import numpy as np
+import pandas as pd
+import seaborn as sns
+
+# Set style
+sns.set_style("whitegrid")
+sns.set_context("paper", font_scale=1.2)
+
+# Color palettes for different entities
+PLATFORM_COLORS = {
+    'aws': '#FF9900',      # AWS Orange
+    'azure': '#0089D6',    # Azure Blue
+    'gcp': '#4285F4',      # Google Blue
+    'local': '#808080'     # Gray
+}
+
+LANGUAGE_COLORS = {
+    'python': '#3776AB',   # Python Blue
+    'nodejs': '#339933',   # Node.js Green
+    'rust': '#000000',     # Rust Black
+    'java': '#007396',     # Java Blue
+    'pypy': '#193440',     # PyPy Dark
+    'cpp': '#00599C'       # C++ Blue
+}
+
+
+class BenchmarkVisualizer:
+    """Creates visualizations from benchmark comparison results."""
+    
+    def __init__(self, results_file: str, output_dir: Optional[str] = None):
+        self.results_file = Path(results_file)
+        self.output_dir = Path(output_dir) if output_dir else self.results_file.parent / 'plots'
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        
+        # Load results
+        with open(self.results_file, 'r') as f:
+            self.results = json.load(f)
+        
+        # Setup logging
+        logging.basicConfig(
+            level=logging.INFO,
+            format='%(levelname)s: %(message)s'
+        )
+        self.logger = logging.getLogger(__name__)
+        
+        self.logger.info(f"Loaded results from {self.results_file}")
+        self.logger.info(f"Plots will be saved to {self.output_dir}")
+    
+    def extract_dataframe(self) -> pd.DataFrame:
+        """
+        Extract benchmark results into a pandas DataFrame.
+        
+        Returns a DataFrame with columns:
+        - benchmark: benchmark name
+        - platform: cloud platform
+        - language: programming language
+        - version: language version
+        - memory_mb: memory configuration
+        - avg_time_ms: average execution time
+        - min_time_ms: minimum execution time
+        - max_time_ms: maximum execution time
+        - cold_starts: number of cold starts
+        - warm_starts: number of warm starts
+        - success: whether the run succeeded
+        """
+        rows = []
+        
+        for benchmark, bench_data in self.results['benchmarks'].items():
+            for platform, platform_data in bench_data.items():
+                for language, lang_data in platform_data.items():
+                    for version, version_data in lang_data.items():
+                        for memory_config, result in version_data.items():
+                            row = {
+                                'benchmark': benchmark,
+                                'platform': platform,
+                                'language': language,
+                                'version': version,
+                                'memory_mb': result.get('memory_mb', 0),
+                                'success': result.get('success', False)
+                            }
+                            
+                            # Extract metrics if available
+                            if 'metrics' in result:
+                                metrics = result['metrics']
+                                row['avg_time_ms'] = metrics.get('avg_execution_time_ms')
+                                row['min_time_ms'] = metrics.get('min_execution_time_ms')
+                                row['max_time_ms'] = metrics.get('max_execution_time_ms')
+                                row['cold_starts'] = metrics.get('cold_starts', 0)
+                                row['warm_starts'] = metrics.get('warm_starts', 0)
+                                
+                                # Store all execution times for detailed analysis
+                                if 'execution_times_ms' in metrics:
+                                    row['execution_times'] = metrics['execution_times_ms']
+                            
+                            rows.append(row)
+        
+        df = pd.DataFrame(rows)
+        self.logger.info(f"Extracted {len(df)} benchmark results")
+        return df
+    
+    def plot_language_comparison(self, df: pd.DataFrame, benchmark: Optional[str] = None):
+        """
+        Create bar chart comparing languages across platforms.
+        
+        Args:
+            df: DataFrame with benchmark results
+            benchmark: Optional benchmark name to filter by
+        """
+        if benchmark:
+            df = df[df['benchmark'] == benchmark]
+            title_suffix = f" - {benchmark}"
+        else:
+            title_suffix = " - All Benchmarks"
+        
+        # Filter successful runs only
+        df = df[df['success'] == True].copy()
+        
+        if df.empty:
+            self.logger.warning(f"No successful runs for language comparison{title_suffix}")
+            return
+        
+        # Create grouped bar chart
+        fig, ax = plt.subplots(figsize=(12, 6))
+        
+        # Group by platform and language
+        grouped = df.groupby(['platform', 'language'])['avg_time_ms'].mean().reset_index()
+        
+        # Pivot for plotting
+        pivot = grouped.pivot(index='language', columns='platform', values='avg_time_ms')
+        
+        # Create bar chart
+        pivot.plot(kind='bar', ax=ax, color=[PLATFORM_COLORS.get(p, '#888888') for p in pivot.columns])
+        
+        ax.set_xlabel('Language')
+        ax.set_ylabel('Average Execution Time (ms)')
+        ax.set_title(f'Language Performance Comparison{title_suffix}')
+        ax.legend(title='Platform')
+        ax.grid(axis='y', alpha=0.3)
+        
+        plt.xticks(rotation=45)
+        plt.tight_layout()
+        
+        filename = f"language_comparison{'_' + benchmark if benchmark else ''}.png"
+        filepath = self.output_dir / filename
+        plt.savefig(filepath, dpi=300, bbox_inches='tight')
+        self.logger.info(f"Saved: {filepath}")
+        plt.close()
+    
+    def plot_platform_comparison(self, df: pd.DataFrame, language: Optional[str] = None):
+        """
+        Create bar chart comparing platforms for a specific language.
+        
+        Args:
+            df: DataFrame with benchmark results
+            language: Optional language to filter by
+        """
+        if language:
+            df = df[df['language'] == language]
+            title_suffix = f" - {language.title()}"
+        else:
+            title_suffix = ""
+        
+        # Filter successful runs only
+        df = df[df['success'] == True].copy()
+        
+        if df.empty:
+            self.logger.warning(f"No successful runs for platform comparison{title_suffix}")
+            return
+        
+        # Create grouped bar chart
+        fig, ax = plt.subplots(figsize=(12, 6))
+        
+        # Group by platform and benchmark
+        grouped = df.groupby(['benchmark', 'platform'])['avg_time_ms'].mean().reset_index()
+        
+        # Pivot for plotting
+        pivot = grouped.pivot(index='benchmark', columns='platform', values='avg_time_ms')
+        
+        # Create bar chart
+        pivot.plot(kind='bar', ax=ax, color=[PLATFORM_COLORS.get(p, '#888888') for p in pivot.columns])
+        
+        ax.set_xlabel('Benchmark')
+        ax.set_ylabel('Average Execution Time (ms)')
+        ax.set_title(f'Platform Performance Comparison{title_suffix}')
+        ax.legend(title='Platform')
+        ax.grid(axis='y', alpha=0.3)
+        
+        plt.xticks(rotation=45, ha='right')
+        plt.tight_layout()
+        
+        filename = f"platform_comparison{'_' + language if language else ''}.png"
+        filepath = self.output_dir / filename
+        plt.savefig(filepath, dpi=300, bbox_inches='tight')
+        self.logger.info(f"Saved: {filepath}")
+        plt.close()
+    
+    def plot_memory_scaling(self, df: pd.DataFrame, benchmark: Optional[str] = None):
+        """
+        Create line plot showing how performance scales with memory.
+        
+        Args:
+            df: DataFrame with benchmark results
+            benchmark: Optional benchmark to filter by
+        """
+        if benchmark:
+            df = df[df['benchmark'] == benchmark]
+            title_suffix = f" - {benchmark}"
+        else:
+            title_suffix = ""
+        
+        # Filter successful runs only
+        df = df[df['success'] == True].copy()
+        
+        if df.empty or df['memory_mb'].nunique() < 2:
+            self.logger.warning(f"Insufficient data for memory scaling plot{title_suffix}")
+            return
+        
+        fig, ax = plt.subplots(figsize=(12, 6))
+        
+        # Plot for each language-platform combination
+        for (language, platform), group in df.groupby(['language', 'platform']):
+            group_sorted = group.sort_values('memory_mb')
+            label = f"{language} ({platform})"
+            color = LANGUAGE_COLORS.get(language, '#888888')
+            linestyle = '-' if platform == 'aws' else '--' if platform == 'azure' else '-.'
+            
+            ax.plot(
+                group_sorted['memory_mb'],
+                group_sorted['avg_time_ms'],
+                marker='o',
+                label=label,
+                color=color,
+                linestyle=linestyle,
+                linewidth=2
+            )
+        
+        ax.set_xlabel('Memory (MB)')
+        ax.set_ylabel('Average Execution Time (ms)')
+        ax.set_title(f'Performance vs Memory{title_suffix}')
+        ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
+        ax.grid(alpha=0.3)
+        
+        plt.tight_layout()
+        
+        filename = f"memory_scaling{'_' + benchmark if benchmark else ''}.png"
+        filepath = self.output_dir / filename
+        plt.savefig(filepath, dpi=300, bbox_inches='tight')
+        self.logger.info(f"Saved: {filepath}")
+        plt.close()
+    
+    def plot_cold_vs_warm(self, df: pd.DataFrame):
+        """
+        Create stacked bar chart showing cold vs warm start distribution.
+        
+        Args:
+            df: DataFrame with benchmark results
+        """
+        # Filter successful runs only
+        df = df[df['success'] == True].copy()
+        
+        if df.empty or 'cold_starts' not in df.columns:
+            self.logger.warning("No cold start data available")
+            return
+        
+        # Calculate totals
+        df['total_invocations'] = df['cold_starts'] + df['warm_starts']
+        
+        # Filter out rows with no invocations
+        df = df[df['total_invocations'] > 0]
+        
+        if df.empty:
+            self.logger.warning("No invocation data for cold vs warm plot")
+            return
+        
+        fig, ax = plt.subplots(figsize=(14, 6))
+        
+        # Group by language and platform
+        grouped = df.groupby(['language', 'platform']).agg({
+            'cold_starts': 'sum',
+            'warm_starts': 'sum'
+        }).reset_index()
+        
+        # Create labels
+        grouped['label'] = grouped['language'] + '\n(' + grouped['platform'] + ')'
+        
+        # Create stacked bar chart
+        x = np.arange(len(grouped))
+        width = 0.6
+        
+        p1 = ax.bar(x, grouped['cold_starts'], width, label='Cold Starts', color='#d62728')
+        p2 = ax.bar(x, grouped['warm_starts'], width, bottom=grouped['cold_starts'],
+                    label='Warm Starts', color='#2ca02c')
+        
+        ax.set_xlabel('Language (Platform)')
+        ax.set_ylabel('Number of Invocations')
+        ax.set_title('Cold vs Warm Start Distribution')
+        ax.set_xticks(x)
+        ax.set_xticklabels(grouped['label'], rotation=45, ha='right')
+        ax.legend()
+        ax.grid(axis='y', alpha=0.3)
+        
+        plt.tight_layout()
+        
+        filepath = self.output_dir / "cold_vs_warm_starts.png"
+        plt.savefig(filepath, dpi=300, bbox_inches='tight')
+        self.logger.info(f"Saved: {filepath}")
+        plt.close()
+    
+    def plot_heatmap(self, df: pd.DataFrame, metric: str = 'avg_time_ms'):
+        """
+        Create heatmap showing performance across platforms and languages.
+        
+        Args:
+            df: DataFrame with benchmark results
+            metric: Metric to visualize
+        """
+        # Filter successful runs only
+        df = df[df['success'] == True].copy()
+        
+        if df.empty or metric not in df.columns:
+            self.logger.warning(f"No data available for heatmap with metric: {metric}")
+            return
+        
+        # Aggregate by platform and language
+        pivot = df.groupby(['platform', 'language'])[metric].mean().reset_index()
+        pivot_table = pivot.pivot(index='platform', columns='language', values=metric)
+        
+        if pivot_table.empty:
+            self.logger.warning("No data for heatmap")
+            return
+        
+        fig, ax = plt.subplots(figsize=(10, 6))
+        
+        sns.heatmap(
+            pivot_table,
+            annot=True,
+            fmt='.2f',
+            cmap='YlOrRd',
+            ax=ax,
+            cbar_kws={'label': 'Avg Execution Time (ms)'}
+        )
+        
+        ax.set_title(f'Performance Heatmap - {metric.replace("_", " ").title()}')
+        ax.set_xlabel('Language')
+        ax.set_ylabel('Platform')
+        
+        plt.tight_layout()
+        
+        filepath = self.output_dir / f"heatmap_{metric}.png"
+        plt.savefig(filepath, dpi=300, bbox_inches='tight')
+        self.logger.info(f"Saved: {filepath}")
+        plt.close()
+    
+    def plot_version_comparison(self, df: pd.DataFrame, language: str):
+        """
+        Compare different versions of the same language.
+        
+        Args:
+            df: DataFrame with benchmark results
+            language: Language to compare versions for
+        """
+        df = df[df['language'] == language]
+        df = df[df['success'] == True].copy()
+        
+        if df.empty or df['version'].nunique() < 2:
+            self.logger.warning(f"Insufficient version data for {language}")
+            return
+        
+        fig, ax = plt.subplots(figsize=(12, 6))
+        
+        # Group by version and platform
+        grouped = df.groupby(['version', 'platform'])['avg_time_ms'].mean().reset_index()
+        pivot = grouped.pivot(index='version', columns='platform', values='avg_time_ms')
+        
+        pivot.plot(kind='bar', ax=ax, color=[PLATFORM_COLORS.get(p, '#888888') for p in pivot.columns])
+        
+        ax.set_xlabel(f'{language.title()} Version')
+        ax.set_ylabel('Average Execution Time (ms)')
+        ax.set_title(f'{language.title()} Version Performance Comparison')
+        ax.legend(title='Platform')
+        ax.grid(axis='y', alpha=0.3)
+        
+        plt.xticks(rotation=0)
+        plt.tight_layout()
+        
+        filepath = self.output_dir / f"version_comparison_{language}.png"
+        plt.savefig(filepath, dpi=300, bbox_inches='tight')
+        self.logger.info(f"Saved: {filepath}")
+        plt.close()
+    
+    def create_summary_report(self, df: pd.DataFrame):
+        """
+        Create a text summary report of the benchmark results.
+        
+        Args:
+            df: DataFrame with benchmark results
+        """
+        report_lines = []
+        report_lines.append("="*80)
+        report_lines.append("BENCHMARK COMPARISON SUMMARY REPORT")
+        report_lines.append("="*80)
+        report_lines.append("")
+        
+        # Metadata
+        metadata = self.results.get('metadata', {})
+        report_lines.append(f"Start Time: {metadata.get('start_time', 'N/A')}")
+        report_lines.append(f"End Time: {metadata.get('end_time', 'N/A')}")
+        report_lines.append("")
+        
+        # Summary statistics
+        if 'summary' in metadata:
+            summary = metadata['summary']
+            report_lines.append("Overall Statistics:")
+            report_lines.append(f"  Total Runs: {summary.get('total_runs', 0)}")
+            report_lines.append(f"  Successful: {summary.get('successful', 0)}")
+            report_lines.append(f"  Failed: {summary.get('failed', 0)}")
+            report_lines.append(f"  Success Rate: {summary.get('success_rate', 'N/A')}")
+            report_lines.append("")
+        
+        # Successful runs only
+        df_success = df[df['success'] == True].copy()
+        
+        if not df_success.empty and 'avg_time_ms' in df_success.columns:
+            report_lines.append("Performance by Platform:")
+            for platform in sorted(df_success['platform'].unique()):
+                platform_df = df_success[df_success['platform'] == platform]
+                avg_time = platform_df['avg_time_ms'].mean()
+                report_lines.append(f"  {platform.upper()}: {avg_time:.2f} ms (avg)")
+            report_lines.append("")
+            
+            report_lines.append("Performance by Language:")
+            for language in sorted(df_success['language'].unique()):
+                lang_df = df_success[df_success['language'] == language]
+                avg_time = lang_df['avg_time_ms'].mean()
+                report_lines.append(f"  {language}: {avg_time:.2f} ms (avg)")
+            report_lines.append("")
+            
+            # Best performers
+            report_lines.append("Best Performers:")
+            # Check if we have valid data
+            if not df_success['avg_time_ms'].isna().all():
+                best_overall = df_success.loc[df_success['avg_time_ms'].idxmin()]
+                report_lines.append(
+                    f"  Fastest Overall: {best_overall['language']} on {best_overall['platform']} "
+                    f"({best_overall['avg_time_ms']:.2f} ms)"
+                )
+            else:
+                report_lines.append("  No valid performance data available")
+            
+            for platform in df_success['platform'].unique():
+                platform_df = df_success[df_success['platform'] == platform]
+                if not platform_df.empty and not platform_df['avg_time_ms'].isna().all():
+                    best = platform_df.loc[platform_df['avg_time_ms'].idxmin()]
+                    report_lines.append(
+                        f"  Fastest on {platform}: {best['language']} v{best['version']} "
+                        f"({best['avg_time_ms']:.2f} ms)"
+                    )
+            report_lines.append("")
+        
+        report_lines.append("="*80)
+        
+        # Write report
+        report_text = "\n".join(report_lines)
+        filepath = self.output_dir / "summary_report.txt"
+        with open(filepath, 'w') as f:
+            f.write(report_text)
+        
+        self.logger.info(f"Saved: {filepath}")
+        print("\n" + report_text)
+    
+    def create_all_plots(self):
+        """Generate all available plots from the benchmark results."""
+        self.logger.info("Generating all plots...")
+        
+        df = self.extract_dataframe()
+        
+        if df.empty:
+            self.logger.error("No data to plot!")
+            return
+        
+        # Create summary report
+        self.create_summary_report(df)
+        
+        # Language comparison
+        self.plot_language_comparison(df)
+        
+        # Platform comparison
+        self.plot_platform_comparison(df)
+        
+        # Memory scaling
+        if df['memory_mb'].nunique() > 1:
+            self.plot_memory_scaling(df)
+        
+        # Cold vs warm starts
+        if 'cold_starts' in df.columns:
+            self.plot_cold_vs_warm(df)
+        
+        # Heatmap
+        self.plot_heatmap(df)
+        
+        # Version comparisons for each language
+        for language in df['language'].unique():
+            if df[df['language'] == language]['version'].nunique() > 1:
+                self.plot_version_comparison(df, language)
+        
+        # Per-benchmark plots
+        for benchmark in df['benchmark'].unique():
+            self.plot_language_comparison(df, benchmark=benchmark)
+            
+        self.logger.info(f"\n✓ All plots generated in: {self.output_dir}")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Visualize cross-platform benchmark comparison results',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Generate all plots
+  %(prog)s results/comparison_20241212/comparison_results.json
+
+  # Specify output directory
+  %(prog)s results/comparison_20241212/comparison_results.json --output plots/
+
+  # Generate specific plot types
+  %(prog)s results.json --plot-type language_comparison platform_comparison
+        """
+    )
+    
+    parser.add_argument('results_file',
+                        help='Path to comparison_results.json file')
+    parser.add_argument('--output', '-o',
+                        help='Output directory for plots (default: results_dir/plots)')
+    parser.add_argument('--plot-type', nargs='+',
+                        choices=['language_comparison', 'platform_comparison', 
+                                'memory_scaling', 'cold_warm', 'heatmap', 'versions', 'all'],
+                        default=['all'],
+                        help='Types of plots to generate (default: all)')
+    parser.add_argument('--language', '-l',
+                        help='Filter by specific language')
+    parser.add_argument('--benchmark', '-b',
+                        help='Filter by specific benchmark')
+    parser.add_argument('--format', choices=['png', 'pdf', 'svg'], default='png',
+                        help='Output format for plots (default: png)')
+    
+    args = parser.parse_args()
+    
+    try:
+        visualizer = BenchmarkVisualizer(args.results_file, args.output)
+        
+        if 'all' in args.plot_type:
+            visualizer.create_all_plots()
+        else:
+            df = visualizer.extract_dataframe()
+            
+            if 'language_comparison' in args.plot_type:
+                visualizer.plot_language_comparison(df, benchmark=args.benchmark)
+            
+            if 'platform_comparison' in args.plot_type:
+                visualizer.plot_platform_comparison(df, language=args.language)
+            
+            if 'memory_scaling' in args.plot_type:
+                visualizer.plot_memory_scaling(df, benchmark=args.benchmark)
+            
+            if 'cold_warm' in args.plot_type:
+                visualizer.plot_cold_vs_warm(df)
+            
+            if 'heatmap' in args.plot_type:
+                visualizer.plot_heatmap(df)
+            
+            if 'versions' in args.plot_type and args.language:
+                visualizer.plot_version_comparison(df, args.language)
+            
+            visualizer.create_summary_report(df)
+        
+        print(f"\n✓ Visualization complete! Plots saved to: {visualizer.output_dir}")
+        return 0
+        
+    except FileNotFoundError:
+        print(f"Error: Results file not found: {args.results_file}")
+        return 1
+    except json.JSONDecodeError:
+        print(f"Error: Invalid JSON in results file: {args.results_file}")
+        return 1
+    except Exception as e:
+        print(f"Error: {e}")
+        import traceback
+        traceback.print_exc()
+        return 1
+
+
+if __name__ == '__main__':
+    sys.exit(main())
+

From a6543d6aa20c24e9f5e3dfa5b477dec4ffcaa8da Mon Sep 17 00:00:00 2001
From: toooadi <toooadi0@gmail.com>
Date: Tue, 16 Dec 2025 21:15:20 +0100
Subject: [PATCH 26/31] Add architecture and container-deployment options

---
 scripts/cross_platform_benchmark.py | 19 +++++++++++++------
 scripts/run_comparison.sh           | 19 +++++++++++++++++++
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/scripts/cross_platform_benchmark.py b/scripts/cross_platform_benchmark.py
index 5a4d32412..31432ac78 100644
--- a/scripts/cross_platform_benchmark.py
+++ b/scripts/cross_platform_benchmark.py
@@ -89,7 +89,8 @@ def run_single_benchmark(
         input_size: str = 'test',
         repetitions: int = 5,
         memory: int = 256,
-        architecture: str = 'x64'
+        architecture: str = 'x64',
+        container_deployment: bool = False
     ) -> Tuple[bool, Optional[str], Optional[Dict]]:
         """
         Run a single benchmark configuration.
@@ -142,8 +143,8 @@ def run_single_benchmark(
                 '--cache', self.cache_dir
             ]
             
-            # Add --container-deployment for AWS PyPy as it is required
-            if platform == 'aws' and language == 'pypy':
+            # Add --container-deployment if requested or required
+            if container_deployment or ((platform == 'aws' or platform == 'gcp') and language == 'pypy'):
                 cmd.append('--container-deployment')
             
             if self.verbose:
@@ -206,7 +207,8 @@ def run_comparison(
         repetitions: int = 5,
         memory_sizes: List[int] = [256],
         architecture: str = 'x64',
-        versions: Optional[Dict[str, List[str]]] = None
+        versions: Optional[Dict[str, List[str]]] = None,
+        container_deployment: bool = False
     ):
         """
         Run benchmarks across multiple configurations.
@@ -269,7 +271,8 @@ def run_comparison(
                                 input_size=input_size,
                                 repetitions=repetitions,
                                 memory=memory,
-                                architecture=architecture
+                                architecture=architecture,
+                                container_deployment=container_deployment
                             )
                             
                             result_entry = {
@@ -442,6 +445,9 @@ def main():
     
     parser.add_argument('--verbose', action='store_true',
                         help='Enable verbose output')
+
+    parser.add_argument('--container-deployment', action='store_true',
+                        help='Run functions as containers')
     
     args = parser.parse_args()
     
@@ -474,7 +480,8 @@ def main():
             repetitions=args.repetitions,
             memory_sizes=args.memory,
             architecture=args.architecture,
-            versions=versions if versions else None
+            versions=versions if versions else None,
+            container_deployment=args.container_deployment
         )
         
         print("\n" + "="*60)
diff --git a/scripts/run_comparison.sh b/scripts/run_comparison.sh
index b6206e519..64ef993ce 100755
--- a/scripts/run_comparison.sh
+++ b/scripts/run_comparison.sh
@@ -22,7 +22,9 @@ OUTPUT_DIR="${PROJECT_ROOT}/results/comparison_$(date +%Y%m%d_%H%M%S)"
 REPETITIONS=5
 MEMORY="256"
 INPUT_SIZE="test"
+ARCHITECTURE="x64"
 GENERATE_PLOTS=true
+CONTAINER_DEPLOYMENT=false
 
 # Print usage
 usage() {
@@ -42,6 +44,8 @@ Options:
     -r, --repetitions NUM       Number of repetitions (default: 5)
     -m, --memory SIZES          Memory sizes in MB (space-separated, default: 256)
     -i, --input-size SIZE       Input size: test, small, large (default: test)
+    -a, --architecture ARCH     Architecture: x64, arm64 (default: x64)
+    --container-deployment      Run functions as containers
     --no-plots                  Skip plot generation
     --skip-benchmark            Skip benchmark run, only generate plots
     -h, --help                  Show this help message
@@ -95,6 +99,14 @@ while [[ $# -gt 0 ]]; do
             INPUT_SIZE="$2"
             shift 2
             ;;
+        -a|--architecture)
+            ARCHITECTURE="$2"
+            shift 2
+            ;;
+        --container-deployment)
+            CONTAINER_DEPLOYMENT=true
+            shift
+            ;;
         --no-plots)
             GENERATE_PLOTS=false
             shift
@@ -135,6 +147,8 @@ if [ "$SKIP_BENCHMARK" = false ]; then
     echo "  Repetitions: $REPETITIONS"
     echo "  Memory: $MEMORY MB"
     echo "  Input Size: $INPUT_SIZE"
+    echo "  Architecture: $ARCHITECTURE"
+    echo "  Container Deployment: $CONTAINER_DEPLOYMENT"
     echo "  Output: $OUTPUT_DIR"
     echo ""
     
@@ -149,8 +163,13 @@ if [ "$SKIP_BENCHMARK" = false ]; then
         --repetitions "$REPETITIONS"
         --memory $MEMORY
         --input-size "$INPUT_SIZE"
+        --architecture "$ARCHITECTURE"
         --verbose
     )
+
+    if [ "$CONTAINER_DEPLOYMENT" = true ]; then
+        CMD+=(--container-deployment)
+    fi
     
     echo "Running: ${CMD[@]}"
     echo ""

From d55e8b46edc5b71dc011028a9834dfb2389bb557 Mon Sep 17 00:00:00 2001
From: Alexander Schlieper <alexander@schlieper.ch>
Date: Thu, 8 Jan 2026 14:38:01 +0100
Subject: [PATCH 27/31] Feature: Multi-language Support (Rust, PyPy, Java) &
 Enhanced Benchmarking Tools

- Added Rust runtime support (AWS)
- Added PyPy support (AWS, Azure, GCP, Local)
- Added Java runtime support (AWS, Azure)
- Added cross-platform benchmarking scripts
- Enhanced plotting and visualization tools
- Updated Dockerfiles and system configurations
- Removed sensitive credentials from config files
---
 .../010.sleep/rust/Cargo.toml                 |  12 +-
 .../010.sleep/rust/src/function.rs            |  24 +
 .../010.sleep/rust/src/main.rs                |  61 --
 .../100.webapps/110.dynamic-html/config.json  |   2 +-
 .../100.webapps/110.dynamic-html/java/pom.xml |  10 +
 .../110.dynamic-html/rust/Cargo.toml          |  16 +
 .../110.dynamic-html/rust/src/function.rs     |  52 ++
 .../rust/src/templates/template.html          |  27 +
 .../rust/templates/template.html              |  27 +
 benchmarks/wrappers/aws/rust/Cargo.toml       |  23 +
 benchmarks/wrappers/aws/rust/src/main.rs      |  87 ++
 benchmarks/wrappers/aws/rust/src/nosql.rs     |  88 ++
 benchmarks/wrappers/aws/rust/src/storage.rs   |  84 ++
 config/example.json                           |  21 +-
 config/systems.json                           |  23 +-
 dockerfiles/aws/java/Dockerfile.build         |   7 +-
 dockerfiles/aws/java/Dockerfile.function      |  18 +-
 dockerfiles/aws/rust/Dockerfile.function      |  22 +-
 dockerfiles/java_installer.sh                 |  24 +-
 experiments.json                              | 120 +++
 scripts/cross_platform_benchmark.py           |  82 +-
 scripts/plot_comparison.py                    | 814 +++++++++++++++---
 scripts/run_comparison.sh                     |  70 +-
 scripts/run_sebs_with_plots.sh                | 218 +++++
 sebs/aws/aws.py                               |  42 +-
 sebs/benchmark.py                             | 198 ++++-
 sebs/faas/container.py                        |   9 +
 third-party/pypapi                            |   1 -
 28 files changed, 1876 insertions(+), 306 deletions(-)
 create mode 100644 benchmarks/000.microbenchmarks/010.sleep/rust/src/function.rs
 delete mode 100644 benchmarks/000.microbenchmarks/010.sleep/rust/src/main.rs
 create mode 100644 benchmarks/100.webapps/110.dynamic-html/rust/Cargo.toml
 create mode 100644 benchmarks/100.webapps/110.dynamic-html/rust/src/function.rs
 create mode 100644 benchmarks/100.webapps/110.dynamic-html/rust/src/templates/template.html
 create mode 100644 benchmarks/100.webapps/110.dynamic-html/rust/templates/template.html
 create mode 100644 benchmarks/wrappers/aws/rust/Cargo.toml
 create mode 100644 benchmarks/wrappers/aws/rust/src/main.rs
 create mode 100644 benchmarks/wrappers/aws/rust/src/nosql.rs
 create mode 100644 benchmarks/wrappers/aws/rust/src/storage.rs
 create mode 100644 experiments.json
 create mode 100755 scripts/run_sebs_with_plots.sh
 delete mode 160000 third-party/pypapi

diff --git a/benchmarks/000.microbenchmarks/010.sleep/rust/Cargo.toml b/benchmarks/000.microbenchmarks/010.sleep/rust/Cargo.toml
index 0c55df5e9..67b85cb29 100644
--- a/benchmarks/000.microbenchmarks/010.sleep/rust/Cargo.toml
+++ b/benchmarks/000.microbenchmarks/010.sleep/rust/Cargo.toml
@@ -2,13 +2,13 @@
 name = "sleep-benchmark"
 version = "0.1.0"
 edition = "2021"
+rust-version = "1.88"
 
-[[bin]]
-name = "bootstrap"
-path = "src/main.rs"
+# Note: This Cargo.toml only contains benchmark-specific dependencies.
+# Wrapper dependencies (lambda_http, aws-sdk-*, etc.) are provided by the wrapper Cargo.toml
+# and will be merged during the build process.
 
 [dependencies]
-lambda_runtime = "0.13"
+# Benchmark-specific dependencies only
+# serde is already in wrapper, but we can override features if needed
 serde = { version = "1.0", features = ["derive"] }
-serde_json = "1.0"
-tokio = { version = "1", features = ["full"] }
diff --git a/benchmarks/000.microbenchmarks/010.sleep/rust/src/function.rs b/benchmarks/000.microbenchmarks/010.sleep/rust/src/function.rs
new file mode 100644
index 000000000..64a34cd3c
--- /dev/null
+++ b/benchmarks/000.microbenchmarks/010.sleep/rust/src/function.rs
@@ -0,0 +1,24 @@
+use serde::{Deserialize, Serialize};
+use std::thread;
+use std::time::Duration;
+
+#[derive(Deserialize)]
+pub struct RequestPayload {
+    pub sleep: Option<f64>,
+}
+
+#[derive(Serialize)]
+pub struct FunctionResponse {
+    pub result: f64,
+}
+
+pub fn handler(event: RequestPayload) -> FunctionResponse {
+    let sleep_time = event.sleep.unwrap_or(0.0);
+    if sleep_time > 0.0 {
+        thread::sleep(Duration::from_secs_f64(sleep_time));
+    }
+    
+    FunctionResponse {
+        result: sleep_time,
+    }
+}
diff --git a/benchmarks/000.microbenchmarks/010.sleep/rust/src/main.rs b/benchmarks/000.microbenchmarks/010.sleep/rust/src/main.rs
deleted file mode 100644
index 86308c155..000000000
--- a/benchmarks/000.microbenchmarks/010.sleep/rust/src/main.rs
+++ /dev/null
@@ -1,61 +0,0 @@
-use lambda_runtime::{service_fn, Error, LambdaEvent};
-use serde::{Deserialize, Serialize};
-use std::time::{Duration, SystemTime, UNIX_EPOCH};
-
-#[derive(Deserialize)]
-struct Request {
-    sleep: Option<f64>,
-}
-
-#[derive(Serialize)]
-struct Response {
-    result: f64,
-    begin: f64,
-    end: f64,
-    is_cold: bool,
-    request_id: String,
-}
-
-static mut IS_COLD: bool = true;
-
-async fn handler(event: LambdaEvent<Request>) -> Result<Response, Error> {
-    let (payload, context) = event.into_parts();
-    
-    let begin = SystemTime::now()
-        .duration_since(UNIX_EPOCH)
-        .unwrap()
-        .as_secs_f64();
-    
-    // Get the cold start status
-    let is_cold = unsafe {
-        let cold = IS_COLD;
-        IS_COLD = false;
-        cold
-    };
-    
-    // Get sleep time from event
-    let sleep_time = payload.sleep.unwrap_or(0.0);
-    
-    // Sleep for the specified time
-    if sleep_time > 0.0 {
-        tokio::time::sleep(Duration::from_secs_f64(sleep_time)).await;
-    }
-    
-    let end = SystemTime::now()
-        .duration_since(UNIX_EPOCH)
-        .unwrap()
-        .as_secs_f64();
-    
-    Ok(Response {
-        result: sleep_time,
-        begin,
-        end,
-        is_cold,
-        request_id: context.request_id,
-    })
-}
-
-#[tokio::main]
-async fn main() -> Result<(), Error> {
-    lambda_runtime::run(service_fn(handler)).await
-}
diff --git a/benchmarks/100.webapps/110.dynamic-html/config.json b/benchmarks/100.webapps/110.dynamic-html/config.json
index 5ef745120..cdeb1aa30 100644
--- a/benchmarks/100.webapps/110.dynamic-html/config.json
+++ b/benchmarks/100.webapps/110.dynamic-html/config.json
@@ -1,6 +1,6 @@
 {
   "timeout": 10,
   "memory": 128,
-  "languages": ["python", "nodejs", "java", "pypy"],
+  "languages": ["python", "nodejs", "java", "rust", "pypy"],
   "modules": []
 }
diff --git a/benchmarks/100.webapps/110.dynamic-html/java/pom.xml b/benchmarks/100.webapps/110.dynamic-html/java/pom.xml
index fb7e685e6..d46a81c75 100644
--- a/benchmarks/100.webapps/110.dynamic-html/java/pom.xml
+++ b/benchmarks/100.webapps/110.dynamic-html/java/pom.xml
@@ -18,6 +18,12 @@
             <artifactId>compiler</artifactId>
             <version>0.9.10</version>
         </dependency>
+        <!-- AWS Lambda (required by wrapper) -->
+        <dependency>
+            <groupId>com.amazonaws</groupId>
+            <artifactId>aws-lambda-java-core</artifactId>
+            <version>1.2.3</version>
+        </dependency>
         <!-- Azure Functions (required by wrapper) -->
         <dependency>
             <groupId>com.microsoft.azure.functions</groupId>
@@ -32,6 +38,7 @@
         </dependency>
     </dependencies>
     <build>
+        <finalName>function</finalName>
         <resources>
             <resource>
                 <directory>${project.basedir}/templates</directory>
@@ -61,6 +68,9 @@
                                         <exclude>META-INF/*.SF</exclude>
                                         <exclude>META-INF/*.RSA</exclude>
                                         <exclude>META-INF/*.DSA</exclude>
+                                        <exclude>module-info.class</exclude>
+                                        <exclude>META-INF/versions/*/module-info.class</exclude>
+                                        <exclude>META-INF/versions/**/module-info.class</exclude>
                                     </excludes>
                                 </filter>
                             </filters>
diff --git a/benchmarks/100.webapps/110.dynamic-html/rust/Cargo.toml b/benchmarks/100.webapps/110.dynamic-html/rust/Cargo.toml
new file mode 100644
index 000000000..02fb138eb
--- /dev/null
+++ b/benchmarks/100.webapps/110.dynamic-html/rust/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name = "dynamic-html-benchmark"
+version = "0.1.0"
+edition = "2021"
+rust-version = "1.88"
+
+# Note: This Cargo.toml only contains benchmark-specific dependencies.
+# Wrapper dependencies (lambda_http, aws-sdk-*, etc.) are provided by the wrapper Cargo.toml
+# and will be merged during the build process.
+
+[dependencies]
+# Benchmark-specific dependencies only
+serde = { version = "1.0", features = ["derive"] }
+rand = "0.8"
+chrono = "0.4"
+
diff --git a/benchmarks/100.webapps/110.dynamic-html/rust/src/function.rs b/benchmarks/100.webapps/110.dynamic-html/rust/src/function.rs
new file mode 100644
index 000000000..e644f12ce
--- /dev/null
+++ b/benchmarks/100.webapps/110.dynamic-html/rust/src/function.rs
@@ -0,0 +1,52 @@
+use serde::{Deserialize, Serialize};
+
+#[derive(Deserialize)]
+pub struct RequestPayload {
+    pub username: String,
+    pub random_len: usize,
+}
+
+#[derive(Serialize)]
+pub struct FunctionResponse {
+    pub result: String,
+}
+
+pub fn handler(event: RequestPayload) -> FunctionResponse {
+    // Generate random numbers
+    use rand::Rng;
+    let mut rng = rand::thread_rng();
+    let random_numbers: Vec<u32> = (0..event.random_len)
+        .map(|_| rng.gen_range(0..1_000_000))
+        .collect();
+
+    // Get current time formatted as locale string
+    use chrono::Local;
+    let cur_time = Local::now().format("%Y-%m-%d %H:%M:%S").to_string();
+
+    // Use embedded template (compiled into binary)
+    // This is more reliable than reading from filesystem in Lambda
+    let template_content = include_str!("templates/template.html");
+
+    // Simple template rendering (replace placeholders)
+    // Generate list items for random numbers
+    let list_items: String = random_numbers
+        .iter()
+        .map(|n| format!("        <li>{}</li>", n))
+        .collect::<Vec<_>>()
+        .join("\n");
+    
+    // Replace template variables
+    let html = template_content
+        .replace("{{username}}", &event.username)
+        .replace("{{cur_time}}", &cur_time)
+        // Replace the entire loop block with generated list items
+        .replace(
+            "        {% for n in random_numbers %}\n        <li>{{n}}</li>\n        {% endfor %}",
+            &list_items,
+        );
+
+    FunctionResponse {
+        result: html,
+    }
+}
+
diff --git a/benchmarks/100.webapps/110.dynamic-html/rust/src/templates/template.html b/benchmarks/100.webapps/110.dynamic-html/rust/src/templates/template.html
new file mode 100644
index 000000000..284499ded
--- /dev/null
+++ b/benchmarks/100.webapps/110.dynamic-html/rust/src/templates/template.html
@@ -0,0 +1,27 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <title>Randomly generated data.</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <link href="http://netdna.bootstrapcdn.com/bootstrap/3.0.0/css/bootstrap.min.css" rel="stylesheet" media="screen">
+    <style type="text/css">
+      .container {
+        max-width: 500px;
+        padding-top: 100px;
+      }
+    </style>
+  </head>
+  <body>
+    <div class="container">
+      <p>Welcome {{username}}!</p>
+      <p>Data generated at: {{cur_time}}!</p>
+      <p>Requested random numbers:</p>
+      <ul>
+        {% for n in random_numbers %}
+        <li>{{n}}</li>
+        {% endfor %}
+      </ul>
+    </div>
+  </body>
+</html>
+
diff --git a/benchmarks/100.webapps/110.dynamic-html/rust/templates/template.html b/benchmarks/100.webapps/110.dynamic-html/rust/templates/template.html
new file mode 100644
index 000000000..284499ded
--- /dev/null
+++ b/benchmarks/100.webapps/110.dynamic-html/rust/templates/template.html
@@ -0,0 +1,27 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <title>Randomly generated data.</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <link href="http://netdna.bootstrapcdn.com/bootstrap/3.0.0/css/bootstrap.min.css" rel="stylesheet" media="screen">
+    <style type="text/css">
+      .container {
+        max-width: 500px;
+        padding-top: 100px;
+      }
+    </style>
+  </head>
+  <body>
+    <div class="container">
+      <p>Welcome {{username}}!</p>
+      <p>Data generated at: {{cur_time}}!</p>
+      <p>Requested random numbers:</p>
+      <ul>
+        {% for n in random_numbers %}
+        <li>{{n}}</li>
+        {% endfor %}
+      </ul>
+    </div>
+  </body>
+</html>
+
diff --git a/benchmarks/wrappers/aws/rust/Cargo.toml b/benchmarks/wrappers/aws/rust/Cargo.toml
new file mode 100644
index 000000000..a8c5d7a9e
--- /dev/null
+++ b/benchmarks/wrappers/aws/rust/Cargo.toml
@@ -0,0 +1,23 @@
+[package]
+name = "aws-lambda-wrapper"
+version = "0.1.0"
+edition = "2021"
+rust-version = "1.88"
+
+[[bin]]
+name = "bootstrap"
+path = "src/main.rs"
+
+[dependencies]
+lambda_http = "0.9"
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+tokio = { version = "1", features = ["full"] }
+# Pin AWS SDK versions to support Rust 1.87
+# Versions 1.1.x are the last to support Rust 1.87, 1.2+ requires Rust 1.88
+# Use version range that excludes 1.2+ (which requires Rust 1.88)
+aws-config = ">=1.0, <1.2"
+aws-sdk-s3 = ">=1.0, <1.2"
+aws-sdk-dynamodb = ">=1.0, <1.2"
+uuid = { version = "1.4", features = ["v4"] }
+
diff --git a/benchmarks/wrappers/aws/rust/src/main.rs b/benchmarks/wrappers/aws/rust/src/main.rs
new file mode 100644
index 000000000..b9b9fd29f
--- /dev/null
+++ b/benchmarks/wrappers/aws/rust/src/main.rs
@@ -0,0 +1,87 @@
+use lambda_http::{run, service_fn, Body, Error, Request, RequestExt, RequestPayloadExt, Response};
+use serde::Serialize;
+use std::time::{SystemTime, UNIX_EPOCH};
+
+mod function;
+pub mod storage;
+pub mod nosql;
+
+use function::{RequestPayload, FunctionResponse};
+
+#[derive(Serialize)]
+struct ResponsePayload {
+    result: serde_json::Value,
+    begin: f64,
+    end: f64,
+    is_cold: bool,
+    request_id: String,
+}
+
+static mut IS_COLD: bool = true;
+
+async fn handler(event: Request) -> Result<Response<Body>, Error> {
+    let begin = SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .unwrap()
+        .as_secs_f64();
+    
+    let is_cold = unsafe {
+        let cold = IS_COLD;
+        IS_COLD = false;
+        cold
+    };
+
+    let request_id = event
+        .lambda_context_ref()
+        .map(|ctx| ctx.request_id.clone())
+        .unwrap_or_else(|| "unknown".to_string());
+
+    // Parse Body
+    let payload: RequestPayload = match event.payload() {
+        Ok(Some(p)) => p,
+        Ok(None) => {
+            return Ok(Response::builder()
+                .status(400)
+                .body(Body::from("Missing request body"))
+                .unwrap());
+        }
+        Err(e) => {
+            return Ok(Response::builder()
+                .status(400)
+                .body(Body::from(format!("Invalid JSON: {}", e)))
+                .unwrap());
+        }
+    };
+
+    // Call the benchmark function (sync)
+    let function_result: FunctionResponse = function::handler(payload);
+
+    let end = SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .unwrap()
+        .as_secs_f64();
+
+    // Convert result to Value
+    let result_value = serde_json::to_value(&function_result.result)?;
+
+    let response_payload = ResponsePayload {
+        result: result_value,
+        begin,
+        end,
+        is_cold,
+        request_id,
+    };
+
+    let response_json = serde_json::to_string(&response_payload)?;
+
+    Ok(Response::builder()
+        .status(200)
+        .header("Content-Type", "application/json")
+        .body(Body::from(response_json))
+        .unwrap())
+}
+
+#[tokio::main]
+async fn main() -> Result<(), Error> {
+    run(service_fn(handler)).await
+}
diff --git a/benchmarks/wrappers/aws/rust/src/nosql.rs b/benchmarks/wrappers/aws/rust/src/nosql.rs
new file mode 100644
index 000000000..9cc3a64a8
--- /dev/null
+++ b/benchmarks/wrappers/aws/rust/src/nosql.rs
@@ -0,0 +1,88 @@
+use aws_sdk_dynamodb::{Client, types::AttributeValue};
+use std::collections::HashMap;
+use std::env;
+
+pub struct NoSQL {
+    client: Client,
+    tables: std::sync::Mutex<HashMap<String, String>>,
+}
+
+impl NoSQL {
+    pub async fn get_instance() -> Self {
+        let config = aws_config::load_defaults(aws_config::BehaviorVersion::latest()).await;
+        let client = Client::new(&config);
+        NoSQL {
+            client,
+            tables: std::sync::Mutex::new(HashMap::new()),
+        }
+    }
+
+    fn get_table_name(&self, table_name: &str) -> Result<String, String> {
+        let mut tables = self.tables.lock().unwrap();
+        if let Some(name) = tables.get(table_name) {
+            return Ok(name.clone());
+        }
+
+        let env_name = format!("NOSQL_STORAGE_TABLE_{}", table_name);
+        match env::var(&env_name) {
+            Ok(aws_name) => {
+                tables.insert(table_name.to_string(), aws_name.clone());
+                Ok(aws_name)
+            }
+            Err(_) => Err(format!("Couldn't find environment variable {} for table {}", env_name, table_name)),
+        }
+    }
+
+    // Helper to convert HashMap<String, AttributeValue> to generic JSON/Map
+    // For simplicity in this wrapper, we accept/return HashMap<String, AttributeValue> 
+    // or we could use serde_dynamo if added as dependency. 
+    // Following the python "dict" approach, we'll try to use AttributeValue directly or simple conversion.
+    // For now, let's expose the raw AttributeValue or simple helpers.
+    
+    pub async fn insert(
+        &self,
+        table_name: &str,
+        primary_key: (&str, &str),
+        secondary_key: (&str, &str),
+        mut data: HashMap<String, AttributeValue>,
+    ) -> Result<(), Box<dyn std::error::Error>> {
+        let aws_table_name = self.get_table_name(table_name)?;
+        
+        data.insert(primary_key.0.to_string(), AttributeValue::S(primary_key.1.to_string()));
+        data.insert(secondary_key.0.to_string(), AttributeValue::S(secondary_key.1.to_string()));
+
+        self.client
+            .put_item()
+            .table_name(aws_table_name)
+            .set_item(Some(data))
+            .send()
+            .await?;
+
+        Ok(())
+    }
+
+    pub async fn get(
+        &self,
+        table_name: &str,
+        primary_key: (&str, &str),
+        secondary_key: (&str, &str),
+    ) -> Result<HashMap<String, AttributeValue>, Box<dyn std::error::Error>> {
+        let aws_table_name = self.get_table_name(table_name)?;
+        
+        let mut key = HashMap::new();
+        key.insert(primary_key.0.to_string(), AttributeValue::S(primary_key.1.to_string()));
+        key.insert(secondary_key.0.to_string(), AttributeValue::S(secondary_key.1.to_string()));
+
+        let resp = self.client
+            .get_item()
+            .table_name(aws_table_name)
+            .set_key(Some(key))
+            .send()
+            .await?;
+
+        Ok(resp.item.unwrap_or_default())
+    }
+    
+    // Minimal implementation matching the python basics. 
+    // update/query/delete can be added similarly.
+}
diff --git a/benchmarks/wrappers/aws/rust/src/storage.rs b/benchmarks/wrappers/aws/rust/src/storage.rs
new file mode 100644
index 000000000..904bb531e
--- /dev/null
+++ b/benchmarks/wrappers/aws/rust/src/storage.rs
@@ -0,0 +1,84 @@
+use aws_sdk_s3::Client;
+use aws_sdk_s3::primitives::ByteStream;
+use std::path::Path;
+use uuid::Uuid;
+use std::fs;
+use std::io::Write;
+
+pub struct Storage {
+    client: Client,
+}
+
+impl Storage {
+    pub async fn get_instance() -> Self {
+        let config = aws_config::load_defaults(aws_config::BehaviorVersion::latest()).await;
+        let client = Client::new(&config);
+        Storage { client }
+    }
+
+    fn unique_name(name: &str) -> String {
+        let path = Path::new(name);
+        let stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or(name);
+        let ext = path.extension().and_then(|s| s.to_str()).map(|e| format!(".{}", e)).unwrap_or_default();
+        let uuid = Uuid::new_v4().to_string();
+        let uuid_short = uuid.split('-').next().unwrap_or(&uuid);
+        format!("{}.{}{}", stem, uuid_short, ext)
+    }
+
+    pub async fn upload(&self, bucket: &str, file: &str, filepath: &str) -> Result<String, Box<dyn std::error::Error>> {
+        let key_name = Self::unique_name(file);
+        let body = ByteStream::from_path(Path::new(filepath)).await?;
+        
+        self.client
+            .put_object()
+            .bucket(bucket)
+            .key(&key_name)
+            .body(body)
+            .send()
+            .await?;
+            
+        Ok(key_name)
+    }
+
+    pub async fn download(&self, bucket: &str, file: &str, filepath: &str) -> Result<(), Box<dyn std::error::Error>> {
+        let resp = self.client
+            .get_object()
+            .bucket(bucket)
+            .key(file)
+            .send()
+            .await?;
+            
+        let data = resp.body.collect().await?;
+        let mut file = fs::File::create(filepath)?;
+        file.write_all(&data.into_bytes())?;
+        
+        Ok(())
+    }
+
+    pub async fn upload_stream(&self, bucket: &str, file: &str, data: Vec<u8>) -> Result<String, Box<dyn std::error::Error>> {
+        let key_name = Self::unique_name(file);
+        let body = ByteStream::from(data);
+        
+        self.client
+            .put_object()
+            .bucket(bucket)
+            .key(&key_name)
+            .body(body)
+            .send()
+            .await?;
+            
+        Ok(key_name)
+    }
+
+    pub async fn download_stream(&self, bucket: &str, file: &str) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
+        let resp = self.client
+            .get_object()
+            .bucket(bucket)
+            .key(file)
+            .send()
+            .await?;
+            
+        let data = resp.body.collect().await?;
+        Ok(data.into_bytes().to_vec())
+    }
+}
diff --git a/config/example.json b/config/example.json
index ea62910dc..313925cbf 100644
--- a/config/example.json
+++ b/config/example.json
@@ -7,12 +7,12 @@
     "architecture": "arm64", 
     "container_deployment": true,
     "runtime": {
-      "language": "python",
-      "version": "3.8"
+      "language": "rust",
+      "version": "1.88"
     },
     "type": "invocation-overhead",
     "perf-cost": {
-      "benchmark": "110.dynamic-html",
+      "benchmark": "010.sleep",
       "experiments": ["cold", "warm", "burst", "sequential"],
       "input-size": "test",
       "repetitions": 50,
@@ -45,11 +45,20 @@
   "deployment": {
     "name": "aws",
     "aws": {
-      "region": "us-east-1",
-      "lambda-role": ""
+      "region": "eu-north-1",
+      "lambda-role": "sebs-lambda-role",
+      "credentials": {
+        "access_key": "",
+        "secret_key": ""
+      }
     },
     "azure": {
-      "region": "westeurope"
+      "region": "westeurope",
+      "credentials": {
+        "appID": "",
+        "tenant": "",
+        "password": ""
+      }
     },
     "gcp": {
       "region": "europe-west1",
diff --git a/config/systems.json b/config/systems.json
index 0f87abbab..770773fed 100644
--- a/config/systems.json
+++ b/config/systems.json
@@ -156,31 +156,20 @@
       "rust": {
         "base_images": {
           "x64": {
-            "1.75": "amazonlinux:2023",
-            "1.76": "amazonlinux:2023",
-            "1.77": "amazonlinux:2023",
-            "1.78": "amazonlinux:2023",
-            "1.79": "amazonlinux:2023",
-            "1.80": "amazonlinux:2023",
-            "1.81": "amazonlinux:2023",
-            "1.82": "amazonlinux:2023"
+            "1.88": "amazonlinux:2023"
           },
           "arm64": {
-            "1.75": "amazonlinux:2023",
-            "1.76": "amazonlinux:2023",
-            "1.77": "amazonlinux:2023",
-            "1.78": "amazonlinux:2023",
-            "1.79": "amazonlinux:2023",
-            "1.80": "amazonlinux:2023",
-            "1.81": "amazonlinux:2023",
-            "1.82": "amazonlinux:2023"
+            "1.88": "amazonlinux:2023"
           }
         },
         "images": [
           "build"
         ],
         "deployment": {
-          "files": [],
+          "files": [
+            "src",
+            "Cargo.toml"
+          ],
           "packages": {}
         }
       },
diff --git a/dockerfiles/aws/java/Dockerfile.build b/dockerfiles/aws/java/Dockerfile.build
index 2990a296b..bd977d694 100644
--- a/dockerfiles/aws/java/Dockerfile.build
+++ b/dockerfiles/aws/java/Dockerfile.build
@@ -4,7 +4,12 @@ ARG VERSION
 ENV JAVA_VERSION=${VERSION}
 
 # useradd, groupmod, build tooling
-RUN yum install -y shadow-utils unzip tar gzip maven zip
+RUN yum install -y shadow-utils unzip tar gzip zip
+# Install Maven 3.x (maven package may be old, install from Apache directly)
+RUN curl -fsSL https://archive.apache.org/dist/maven/maven-3/3.9.6/binaries/apache-maven-3.9.6-bin.tar.gz | tar -xz -C /opt && \
+    ln -s /opt/apache-maven-3.9.6 /opt/maven && \
+    ln -s /opt/maven/bin/mvn /usr/local/bin/mvn
+ENV PATH=/opt/maven/bin:$PATH
 ENV GOSU_VERSION 1.14
 # https://github.com/tianon/gosu/releases/tag/1.14
 # key https://keys.openpgp.org/search?q=tianon%40debian.org
diff --git a/dockerfiles/aws/java/Dockerfile.function b/dockerfiles/aws/java/Dockerfile.function
index 07ae2f1c7..bc20eb685 100644
--- a/dockerfiles/aws/java/Dockerfile.function
+++ b/dockerfiles/aws/java/Dockerfile.function
@@ -5,11 +5,23 @@ ENV JAVA_VERSION=${VERSION}
 ARG TARGET_ARCHITECTURE
 
 COPY . function/
-WORKDIR /function
 
 # Ensure packaged jar is present for the Lambda base image
-RUN if [ -d "target" ] && ls target/*.jar >/dev/null 2>&1; then \
-      cp target/*.jar function.jar; \
+# function.jar should exist (created by java_installer.sh), but if not, copy from target/
+# Prefer the shaded/fat JAR (exclude "original" JARs created by maven-shade-plugin)
+RUN if [ -f "function/function.jar" ]; then \
+      cp function/function.jar function.jar; \
+    elif [ -d "function/target" ] && ls function/target/*.jar >/dev/null 2>&1; then \
+      JAR_FILE=$(ls function/target/*.jar 2>/dev/null | grep -v "original-" | head -n1); \
+      if [ -z "$JAR_FILE" ]; then \
+        JAR_FILE=$(ls function/target/*.jar | head -n1); \
+      fi; \
+      cp "$JAR_FILE" function.jar; \
+    else \
+      echo "Error: function.jar not found"; \
+      echo "Contents of function/:"; \
+      ls -la function/ 2>/dev/null || true; \
+      exit 1; \
     fi \
     && test -f function.jar
 
diff --git a/dockerfiles/aws/rust/Dockerfile.function b/dockerfiles/aws/rust/Dockerfile.function
index 2c662310a..c7523e48d 100644
--- a/dockerfiles/aws/rust/Dockerfile.function
+++ b/dockerfiles/aws/rust/Dockerfile.function
@@ -1,24 +1,12 @@
 ARG BASE_IMAGE
 FROM $BASE_IMAGE
 
-# Install Rust
-ARG RUST_VERSION
-ENV RUSTUP_HOME=/usr/local/rustup \
-    CARGO_HOME=/usr/local/cargo \
-    PATH=/usr/local/cargo/bin:$PATH
+# Copy the pre-built bootstrap binary
+# The binary is already built by the build container
+COPY bootstrap /var/runtime/bootstrap
 
-RUN yum install -y gcc openssl-devel && \
-    curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain ${RUST_VERSION} && \
-    chmod -R a+w $RUSTUP_HOME $CARGO_HOME
-
-# Copy function code
-COPY . /var/task/
-
-# Build the function
-WORKDIR /var/task
-RUN cargo build --release && \
-    cp target/release/bootstrap /var/runtime/bootstrap || \
-    cp target/release/handler /var/runtime/bootstrap
+# Make sure it's executable
+RUN chmod +x /var/runtime/bootstrap
 
 # Set the CMD to the handler (AWS Lambda will execute this)
 CMD ["/var/runtime/bootstrap"]
diff --git a/dockerfiles/java_installer.sh b/dockerfiles/java_installer.sh
index 2fd4fc1b2..59abeb5e7 100644
--- a/dockerfiles/java_installer.sh
+++ b/dockerfiles/java_installer.sh
@@ -4,14 +4,32 @@ set -euo pipefail
 
 cd /mnt/function
 
-if [[ -f "pom.xml" ]]; then
+# Find pom.xml recursively
+POM_PATH=$(find . -maxdepth 3 -name "pom.xml" | head -n1)
+
+if [[ -n "${POM_PATH}" ]]; then
+  echo "Found pom.xml at ${POM_PATH}"
+  POM_DIR=$(dirname "${POM_PATH}")
+  cd "${POM_DIR}"
+
   # Note: -q flag causes issues in Docker, removed for reliable builds
   mvn -DskipTests package
 
   if ls target/*.jar >/dev/null 2>&1; then
-    JAR_PATH=$(ls target/*.jar | head -n1)
-    cp "${JAR_PATH}" function.jar
+    # Prefer the shaded/fat JAR (exclude "original" JARs created by maven-shade-plugin)
+    # The shaded JAR contains all dependencies and is the one we want to use
+    JAR_PATH=$(ls target/*.jar 2>/dev/null | grep -v "original-" | head -n1)
+    if [[ -z "${JAR_PATH}" ]]; then
+      # Fallback to any JAR if no non-original JAR found
+      JAR_PATH=$(ls target/*.jar | head -n1)
+    fi
+    echo "Found built jar at ${JAR_PATH}"
+    cp "${JAR_PATH}" /mnt/function/function.jar
   fi
+  
+  cd /mnt/function
+else
+  echo "No pom.xml found!"
 fi
 
 if [[ -f "${SCRIPT_FILE:-}" ]]; then
diff --git a/experiments.json b/experiments.json
new file mode 100644
index 000000000..53eb51946
--- /dev/null
+++ b/experiments.json
@@ -0,0 +1,120 @@
+{
+  "_invocations": {
+    "sebs_bf659d4e_110_dynamic_html_rust_1_87_x64_docker": {
+      "unknown": {
+        "billing": {
+          "_billed_time": null,
+          "_gb_seconds": 0,
+          "_memory": null
+        },
+        "output": {
+          "begin": 1767609674.294732,
+          "end": 1767609674.2947633,
+          "is_cold": false,
+          "request_id": "unknown",
+          "result": "\n<!DOCTYPE html>\n<html>\n  <head>\n    <title>Randomly generated data.</title>\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n    <link href=\"http://netdna.bootstrapcdn.com/bootstrap/3.0.0/css/bootstrap.min.css\" rel=\"stylesheet\" media=\"screen\">\n    <style type=\"text/css\">\n      .container {\n        max-width: 500px;\n        padding-top: 100px;\n      }\n    </style>\n  </head>\n  <body>\n    <div class=\"container\">\n      <p>Welcome testname!</p>\n      <p>Data generated at: 2026-01-05 10:41:14.294 UTC!</p>\n      <p>Requested random numbers:</p>\n      <ul>\n        \n        <li>903514</li>\n        \n        <li>445253</li>\n        \n        <li>723144</li>\n        \n        <li>167336</li>\n        \n        <li>998709</li>\n        \n        <li>523251</li>\n        \n        <li>807246</li>\n        \n        <li>350991</li>\n        \n        <li>429441</li>\n        \n        <li>321223</li>\n        \n      </ul>\n    </div>\n  </body>\n</html>\n"
+        },
+        "provider_times": {
+          "execution": 0,
+          "initialization": 0
+        },
+        "request_id": "unknown",
+        "stats": {
+          "cold_start": false,
+          "failure": false,
+          "memory_used": null
+        },
+        "times": {
+          "benchmark": 31,
+          "client": 132183,
+          "client_begin": "2026-01-05 11:41:14.159588",
+          "client_end": "2026-01-05 11:41:14.291771",
+          "http_first_byte_return": 0.132009,
+          "http_startup": 0.072621,
+          "initialization": 0
+        }
+      }
+    }
+  },
+  "_metrics": {},
+  "begin_time": 1767609665.552273,
+  "config": {
+    "deployment": {
+      "credentials": {
+        "account_id": "119764645837"
+      },
+      "name": "aws",
+      "region": "eu-north-1",
+      "resources": {
+        "benchmarks": "sebs-benchmarks-bf659d4e",
+        "container_repository": "sebs-benchmarks-bf659d4e",
+        "docker": {
+          "registry": "119764645837.dkr.ecr.eu-north-1.amazonaws.com/sebs-benchmarks-bf659d4e",
+          "username": "AWS"
+        },
+        "http-apis": {
+          "sebs_bf659d4e_110_dynamic_html_rust_1_87_x64_docker-http-api": {
+            "arn": "arn:aws:execute-api:eu-north-1:119764645837:m4mj1j425a",
+            "endpoint": "https://m4mj1j425a.execute-api.eu-north-1.amazonaws.com"
+          }
+        },
+        "lambda-role": "arn:aws:iam::119764645837:role/sebs-lambda-role",
+        "resources_id": "bf659d4e"
+      }
+    },
+    "experiments": {
+      "architecture": "x64",
+      "container_deployment": true,
+      "download_results": false,
+      "experiments": {
+        "eviction-model": {
+          "function_copy_idx": 0,
+          "invocations": 1,
+          "repetitions": 5,
+          "sleep": 1
+        },
+        "invocation-overhead": {
+          "N": 20,
+          "code_begin": 1048576,
+          "code_end": 261619712,
+          "code_points": 20,
+          "payload_begin": 1024,
+          "payload_end": 6251000,
+          "payload_points": 20,
+          "repetitions": 5,
+          "type": "payload"
+        },
+        "network-ping-pong": {
+          "invocations": 50,
+          "repetitions": 1000,
+          "threads": 1
+        },
+        "perf-cost": {
+          "benchmark": "110.dynamic-html",
+          "concurrent-invocations": 50,
+          "experiments": [
+            "cold",
+            "warm",
+            "burst",
+            "sequential"
+          ],
+          "input-size": "test",
+          "memory-sizes": [
+            128,
+            256
+          ],
+          "repetitions": 50
+        }
+      },
+      "flags": {},
+      "runtime": {
+        "language": "rust",
+        "version": "1.87"
+      },
+      "update_code": true,
+      "update_storage": false
+    }
+  },
+  "end_time": 1767609674.294158,
+  "result_bucket": null
+}
\ No newline at end of file
diff --git a/scripts/cross_platform_benchmark.py b/scripts/cross_platform_benchmark.py
index 31432ac78..9ac83f1d0 100644
--- a/scripts/cross_platform_benchmark.py
+++ b/scripts/cross_platform_benchmark.py
@@ -51,10 +51,11 @@
 class BenchmarkRunner:
     """Orchestrates benchmark execution across platforms and languages."""
     
-    def __init__(self, output_dir: str, cache_dir: str = 'cache', verbose: bool = False):
-        self.output_dir = Path(output_dir)
+    def __init__(self, output_dir: str, cache_dir: str = 'cache', verbose: bool = False, container_deployment_for: Optional[List[str]] = None):
+        self.output_dir = Path(output_dir).resolve()
         self.cache_dir = cache_dir
         self.verbose = verbose
+        self.container_deployment_for = set(container_deployment_for or [])
         self.results = {
             'metadata': {
                 'start_time': datetime.now().isoformat(),
@@ -99,10 +100,19 @@ def run_single_benchmark(
             (success, output_file, error_message)
         """
         run_id = f"{benchmark}_{platform}_{language}_{version}_{memory}MB"
-        self.logger.info(f"Starting: {run_id}")
         
-        # Create experiment output directory
-        experiment_dir = self.output_dir / run_id
+        # Determine deployment type for logging
+        should_use_container = (
+            container_deployment or 
+            platform in self.container_deployment_for or
+            ((platform == 'aws' or platform == 'gcp') and language == 'pypy')
+        )
+        deployment_type = "container" if should_use_container else "package"
+        
+        self.logger.info(f"Starting: {run_id} (deployment: {deployment_type})")
+        
+        # Create experiment output directory (use absolute path)
+        experiment_dir = (self.output_dir / run_id).resolve()
         experiment_dir.mkdir(parents=True, exist_ok=True)
         
         # Update config for this run
@@ -144,7 +154,13 @@ def run_single_benchmark(
             ]
             
             # Add --container-deployment if requested or required
-            if container_deployment or ((platform == 'aws' or platform == 'gcp') and language == 'pypy'):
+            # Priority: explicit flag > per-platform setting > automatic for PyPy on AWS/GCP
+            should_use_container = (
+                container_deployment or 
+                platform in self.container_deployment_for or
+                ((platform == 'aws' or platform == 'gcp') and language == 'pypy')
+            )
+            if should_use_container:
                 cmd.append('--container-deployment')
             
             if self.verbose:
@@ -152,17 +168,20 @@ def run_single_benchmark(
             
             self.logger.debug(f"Command: {' '.join(cmd)}")
             
-            # Execute benchmark (run from experiment directory so experiments.json is saved there)
+            # Execute benchmark (run from project root for proper path resolution)
             start_time = time.time()
             result = subprocess.run(
                 cmd,
                 capture_output=True,
                 text=True,
                 timeout=600,  # 10 minute timeout
-                cwd=str(experiment_dir)  # Run from experiment directory
+                cwd=PROJECT_ROOT  # Run from project root
             )
             execution_time = time.time() - start_time
             
+            # Ensure the directory still exists (sebs.py might have cleaned it up on error)
+            experiment_dir.mkdir(parents=True, exist_ok=True)
+            
             # Save stdout/stderr
             with open(experiment_dir / 'stdout.log', 'w') as f:
                 f.write(result.stdout)
@@ -390,12 +409,22 @@ def main():
         formatter_class=argparse.RawDescriptionHelpFormatter,
         epilog="""
 Examples:
-  # Compare Python and Node.js on AWS and Azure
+  # Compare Python and Node.js on AWS and Azure (with auto-plotting)
   %(prog)s --benchmarks 010.sleep 110.dynamic-html \\
            --platforms aws azure \\
            --languages python nodejs \\
            --config config/example.json \\
-           --output results/comparison_$(date +%%Y%%m%%d)
+           --output results/comparison_$(date +%%Y%%m%%d) \\
+           --plot
+
+  # Compare AWS (container) vs Azure (package deployment)
+  %(prog)s --benchmarks 010.sleep \\
+           --platforms aws azure \\
+           --languages python \\
+           --container-deployment-for aws \\
+           --config config/example.json \\
+           --output results/aws_container_vs_azure_package \\
+           --plot
 
   # Compare specific Python versions on AWS
   %(prog)s --benchmarks 501.graph-pagerank \\
@@ -403,7 +432,8 @@ def main():
            --languages python \\
            --python-versions 3.11 3.10 3.9 \\
            --memory 512 1024 \\
-           --config config/example.json
+           --config config/example.json \\
+           --plot
         """
     )
     
@@ -447,7 +477,12 @@ def main():
                         help='Enable verbose output')
 
     parser.add_argument('--container-deployment', action='store_true',
-                        help='Run functions as containers')
+                        help='Run functions as containers (all platforms)')
+    parser.add_argument('--container-deployment-for', nargs='+',
+                        help='Specific platforms to use container deployment (e.g., aws gcp)')
+    
+    parser.add_argument('--plot', action='store_true',
+                        help='Automatically generate plots after benchmarking')
     
     args = parser.parse_args()
     
@@ -466,7 +501,8 @@ def main():
     runner = BenchmarkRunner(
         output_dir=args.output,
         cache_dir=args.cache,
-        verbose=args.verbose
+        verbose=args.verbose,
+        container_deployment_for=args.container_deployment_for
     )
     
     # Run comparison
@@ -490,6 +526,26 @@ def main():
         print(f"Results: {args.output}/comparison_results.json")
         print(f"Logs: {args.output}/benchmark_run.log")
         
+        # Auto-generate plots if requested
+        if args.plot:
+            print("\n" + "="*60)
+            print("Generating plots...")
+            print("="*60)
+            try:
+                # Suppress matplotlib debug output
+                logging.getLogger('matplotlib').setLevel(logging.WARNING)
+                logging.getLogger('PIL').setLevel(logging.WARNING)
+                
+                from plot_comparison import BenchmarkVisualizer
+                results_file = f"{args.output}/comparison_results.json"
+                visualizer = BenchmarkVisualizer(results_file)
+                visualizer.create_all_plots()
+                print(f"\n✓ Plots saved to: {visualizer.output_dir}")
+            except Exception as e:
+                print(f"Warning: Failed to generate plots: {e}")
+                print("You can generate plots manually with:")
+                print(f"  python scripts/plot_comparison.py {args.output}/comparison_results.json")
+        
         return 0
         
     except KeyboardInterrupt:
diff --git a/scripts/plot_comparison.py b/scripts/plot_comparison.py
index 738164804..077041dbf 100644
--- a/scripts/plot_comparison.py
+++ b/scripts/plot_comparison.py
@@ -11,14 +11,20 @@
 import logging
 import sys
 from pathlib import Path
-from typing import Dict, List, Optional
+from typing import Optional
 
 import matplotlib.pyplot as plt
-import matplotlib.patches as mpatches
 import numpy as np
 import pandas as pd
 import seaborn as sns
 
+# Suppress matplotlib debug output
+import matplotlib
+matplotlib_logger = logging.getLogger('matplotlib')
+matplotlib_logger.setLevel(logging.WARNING)
+pil_logger = logging.getLogger('PIL')
+pil_logger.setLevel(logging.WARNING)
+
 # Set style
 sns.set_style("whitegrid")
 sns.set_context("paper", font_scale=1.2)
@@ -49,10 +55,6 @@ def __init__(self, results_file: str, output_dir: Optional[str] = None):
         self.output_dir = Path(output_dir) if output_dir else self.results_file.parent / 'plots'
         self.output_dir.mkdir(parents=True, exist_ok=True)
         
-        # Load results
-        with open(self.results_file, 'r') as f:
-            self.results = json.load(f)
-        
         # Setup logging
         logging.basicConfig(
             level=logging.INFO,
@@ -60,26 +62,44 @@ def __init__(self, results_file: str, output_dir: Optional[str] = None):
         )
         self.logger = logging.getLogger(__name__)
         
-        self.logger.info(f"Loaded results from {self.results_file}")
+        # Detect file type and load results
+        if self.results_file.suffix == '.csv':
+            self.results = None  # CSV doesn't use the results dict
+            self.is_csv = True
+            self.logger.info(f"Loaded CSV from {self.results_file}")
+        else:
+            with open(self.results_file, 'r') as f:
+                self.results = json.load(f)
+            self.is_csv = False
+            self.logger.info(f"Loaded results from {self.results_file}")
+        
         self.logger.info(f"Plots will be saved to {self.output_dir}")
     
     def extract_dataframe(self) -> pd.DataFrame:
         """
         Extract benchmark results into a pandas DataFrame.
         
-        Returns a DataFrame with columns:
-        - benchmark: benchmark name
-        - platform: cloud platform
-        - language: programming language
-        - version: language version
-        - memory_mb: memory configuration
-        - avg_time_ms: average execution time
-        - min_time_ms: minimum execution time
-        - max_time_ms: maximum execution time
-        - cold_starts: number of cold starts
-        - warm_starts: number of warm starts
-        - success: whether the run succeeded
+        For CSV files (SeBS perf-cost format):
+        - Reads directly from CSV with columns: memory, type, is_cold, exec_time, 
+          client_time, provider_time, mem_used
+        
+        For JSON files (cross_platform_benchmark format):
+        - Returns a DataFrame with columns: benchmark, platform, language, version,
+          memory_mb, avg_time_ms, cold_starts, warm_starts, success, etc.
         """
+        if self.is_csv:
+            # Read SeBS perf-cost CSV format
+            df = pd.read_csv(self.results_file)
+            
+            # Convert microseconds to milliseconds
+            df['client_time_ms'] = df['client_time'] / 1000.0
+            df['provider_time_ms'] = df['provider_time'] / 1000.0
+            df['exec_time_ms'] = df['exec_time'] / 1000.0
+            
+            self.logger.info(f"Loaded {len(df)} measurements from CSV")
+            return df
+        
+        # JSON format (original code)
         rows = []
         
         for benchmark, bench_data in self.results['benchmarks'].items():
@@ -142,16 +162,16 @@ def plot_language_comparison(self, df: pd.DataFrame, benchmark: Optional[str] =
         # Group by platform and language
         grouped = df.groupby(['platform', 'language'])['avg_time_ms'].mean().reset_index()
         
-        # Pivot for plotting
-        pivot = grouped.pivot(index='language', columns='platform', values='avg_time_ms')
+        # Pivot for plotting - SWAPPED: platforms on X-axis, languages as colors
+        pivot = grouped.pivot(index='platform', columns='language', values='avg_time_ms')
         
-        # Create bar chart
-        pivot.plot(kind='bar', ax=ax, color=[PLATFORM_COLORS.get(p, '#888888') for p in pivot.columns])
+        # Create bar chart with language colors
+        pivot.plot(kind='bar', ax=ax, color=[LANGUAGE_COLORS.get(lang, '#888888') for lang in pivot.columns])
         
-        ax.set_xlabel('Language')
+        ax.set_xlabel('Platform')
         ax.set_ylabel('Average Execution Time (ms)')
-        ax.set_title(f'Language Performance Comparison{title_suffix}')
-        ax.legend(title='Platform')
+        ax.set_title(f'Platform Performance Comparison by Language{title_suffix}')
+        ax.legend(title='Language')
         ax.grid(axis='y', alpha=0.3)
         
         plt.xticks(rotation=45)
@@ -304,9 +324,9 @@ def plot_cold_vs_warm(self, df: pd.DataFrame):
         x = np.arange(len(grouped))
         width = 0.6
         
-        p1 = ax.bar(x, grouped['cold_starts'], width, label='Cold Starts', color='#d62728')
-        p2 = ax.bar(x, grouped['warm_starts'], width, bottom=grouped['cold_starts'],
-                    label='Warm Starts', color='#2ca02c')
+        ax.bar(x, grouped['cold_starts'], width, label='Cold Starts', color='#d62728')
+        ax.bar(x, grouped['warm_starts'], width, bottom=grouped['cold_starts'],
+               label='Warm Starts', color='#2ca02c')
         
         ax.set_xlabel('Language (Platform)')
         ax.set_ylabel('Number of Invocations')
@@ -323,6 +343,263 @@ def plot_cold_vs_warm(self, df: pd.DataFrame):
         self.logger.info(f"Saved: {filepath}")
         plt.close()
     
+    def plot_cold_warm_comparison_boxplot(self, df: pd.DataFrame, benchmark: Optional[str] = None, 
+                                         language: Optional[str] = None, version: Optional[str] = None):
+        """
+        Create side-by-side boxplot comparison of cold vs warm performance.
+        Similar to the friend's plot style with better visual separation.
+        
+        Args:
+            df: DataFrame with benchmark results
+            benchmark: Optional benchmark name to filter by
+            language: Optional language to filter by
+            version: Optional version to filter by
+        """
+        # Filter data
+        if benchmark:
+            df = df[df['benchmark'] == benchmark]
+        if language:
+            df = df[df['language'] == language]
+        if version:
+            df = df[df['version'] == version]
+        
+        df = df[df['success'] == True].copy()
+        
+        if df.empty or 'execution_times' not in df.columns:
+            self.logger.warning("No execution time data for cold/warm boxplot comparison")
+            return
+        
+        # Expand execution times into separate rows with cold/warm labels
+        rows = []
+        for _, row in df.iterrows():
+            if 'execution_times' in row and row['execution_times']:
+                exec_times = row['execution_times']
+                cold_count = row.get('cold_starts', 0)
+                
+                # Mark first cold_count as cold, rest as warm
+                for i, time_ms in enumerate(exec_times):
+                    rows.append({
+                        'benchmark': row['benchmark'],
+                        'language': row['language'],
+                        'version': row['version'],
+                        'memory': row['memory_mb'],
+                        'time_ms': time_ms,
+                        'type': 'cold' if i < cold_count else 'warm'
+                    })
+        
+        if not rows:
+            self.logger.warning("No execution time data to plot")
+            return
+        
+        expanded_df = pd.DataFrame(rows)
+        
+        # Create figure with subplots
+        fig, axes = plt.subplots(1, 2, figsize=(12, 5), sharey=False)
+        
+        # Build title
+        title_parts = []
+        if benchmark:
+            title_parts.append(benchmark)
+        if language:
+            lang_display = f"{language.title()}"
+            if version:
+                lang_display += f" {version}"
+            title_parts.append(lang_display)
+        title_parts.append("Runtime Performance: Cold vs Warm")
+        fig.suptitle(' '.join(title_parts), fontsize=14, fontweight='bold')
+        
+        # Cold Start Plot
+        cold_data = expanded_df[expanded_df['type'] == 'cold']
+        if not cold_data.empty:
+            sns.boxplot(ax=axes[0], x="memory", y="time_ms", data=cold_data, color="skyblue")
+            axes[0].set_title("Cold Start Latency")
+            axes[0].set_ylabel("Time (ms)")
+            axes[0].set_xlabel("Memory (MB)")
+        
+        # Warm Execution Plot
+        warm_data = expanded_df[expanded_df['type'] == 'warm']
+        if not warm_data.empty:
+            sns.boxplot(ax=axes[1], x="memory", y="time_ms", data=warm_data, color="orange")
+            axes[1].set_title("Warm Execution Latency")
+            axes[1].set_ylabel("Time (ms)")
+            axes[1].set_xlabel("Memory (MB)")
+        
+        plt.tight_layout()
+        
+        # Create filename
+        filename_parts = ["cold_warm_boxplot"]
+        if benchmark:
+            filename_parts.append(benchmark.replace(".", "_"))
+        if language:
+            filename_parts.append(language)
+        if version:
+            filename_parts.append(version.replace(".", "_"))
+        filename = "_".join(filename_parts) + ".png"
+        
+        filepath = self.output_dir / filename
+        plt.savefig(filepath, dpi=300, bbox_inches='tight')
+        self.logger.info(f"Saved: {filepath}")
+        plt.close()
+    
+    def plot_overhead_breakdown(self, df: pd.DataFrame, benchmark: Optional[str] = None,
+                               language: Optional[str] = None, start_type: str = 'cold'):
+        """
+        Create bar chart showing overhead breakdown (client_time, provider_time, exec_time).
+        
+        Args:
+            df: DataFrame with benchmark results
+            benchmark: Optional benchmark name to filter by
+            language: Optional language to filter by
+            start_type: 'cold' or 'warm' to filter by startup type
+        """
+        # Filter data
+        if benchmark:
+            df = df[df['benchmark'] == benchmark]
+        if language:
+            df = df[df['language'] == language]
+        
+        df = df[df['success'] == True].copy()
+        
+        # Check if we have the required timing breakdown data
+        # This assumes metrics might have these fields
+        if df.empty:
+            self.logger.warning(f"No data for overhead breakdown")
+            return
+        
+        # Expand the data to get individual measurements
+        rows = []
+        for _, row in df.iterrows():
+            if 'execution_times' in row and row['execution_times']:
+                exec_times = row['execution_times']
+                cold_count = row.get('cold_starts', 0)
+                
+                for i, exec_time_ms in enumerate(exec_times):
+                    is_cold = i < cold_count
+                    if (start_type == 'cold' and is_cold) or (start_type == 'warm' and not is_cold):
+                        # For now, we'll use execution time as a proxy
+                        # In a real scenario, you'd have client_time, provider_time, exec_time separately
+                        rows.append({
+                            'memory': row['memory_mb'],
+                            'client_time_ms': exec_time_ms,  # Would be actual client_time
+                            'provider_time_ms': exec_time_ms * 0.8,  # Placeholder - provider overhead
+                            'exec_time_ms': exec_time_ms * 0.7  # Placeholder - actual execution
+                        })
+        
+        if not rows:
+            self.logger.warning(f"No {start_type} start data for overhead breakdown")
+            return
+        
+        breakdown_df = pd.DataFrame(rows)
+        
+        # Melt for seaborn
+        melted = breakdown_df.melt(
+            id_vars=['memory'],
+            value_vars=['client_time_ms', 'provider_time_ms', 'exec_time_ms'],
+            var_name='Metric',
+            value_name='Time'
+        )
+        
+        plt.figure(figsize=(10, 6))
+        sns.barplot(x="memory", y="Time", hue="Metric", data=melted, errorbar='sd', palette="muted")
+        
+        # Build title
+        title_parts = []
+        if benchmark:
+            title_parts.append(benchmark)
+        if language:
+            title_parts.append(language.title())
+        title_parts.append(f"Overhead ({start_type.title()} Start)")
+        
+        plt.title(' '.join(title_parts), fontweight='bold')
+        plt.ylabel("Time (ms)")
+        plt.xlabel("Memory (MB)")
+        plt.tight_layout()
+        
+        # Create filename
+        filename_parts = [f"{start_type}_overhead"]
+        if benchmark:
+            filename_parts.append(benchmark.replace(".", "_"))
+        if language:
+            filename_parts.append(language)
+        filename = "_".join(filename_parts) + ".png"
+        
+        filepath = self.output_dir / filename
+        plt.savefig(filepath, dpi=300, bbox_inches='tight')
+        self.logger.info(f"Saved: {filepath}")
+        plt.close()
+    
+    def plot_memory_usage_distribution(self, df: pd.DataFrame, benchmark: Optional[str] = None,
+                                      language: Optional[str] = None):
+        """
+        Create boxplot showing memory usage distribution across memory configurations.
+        
+        Args:
+            df: DataFrame with benchmark results
+            benchmark: Optional benchmark name to filter by
+            language: Optional language to filter by
+        """
+        # Filter data
+        if benchmark:
+            df = df[df['benchmark'] == benchmark]
+        if language:
+            df = df[df['language'] == language]
+        
+        df = df[df['success'] == True].copy()
+        
+        # Check if we have memory usage data in metrics
+        if df.empty:
+            self.logger.warning("No data for memory usage distribution")
+            return
+        
+        # Try to extract memory usage from metrics if available
+        rows = []
+        for _, row in df.iterrows():
+            if 'metrics' in row:
+                metrics = row['metrics']
+                if isinstance(metrics, dict) and 'memory_used_mb' in metrics:
+                    rows.append({
+                        'memory': row['memory_mb'],
+                        'mem_used': metrics['memory_used_mb']
+                    })
+        
+        if not rows:
+            self.logger.warning("No memory usage data available in metrics")
+            return
+        
+        mem_df = pd.DataFrame(rows)
+        
+        plt.figure(figsize=(8, 6))
+        sns.boxplot(x="memory", y="mem_used", data=mem_df, color="lightgreen")
+        
+        if not mem_df.empty and mem_df['mem_used'].max() > 0:
+            plt.ylim(0, mem_df['mem_used'].max() * 1.2)
+        
+        # Build title
+        title_parts = []
+        if benchmark:
+            title_parts.append(benchmark)
+        if language:
+            title_parts.append(language.title())
+        title_parts.append("Memory Usage Distribution")
+        
+        plt.title(' '.join(title_parts), fontweight='bold')
+        plt.ylabel("Used Memory (MB)")
+        plt.xlabel("Allocated Memory (MB)")
+        plt.tight_layout()
+        
+        # Create filename
+        filename_parts = ["memory_usage"]
+        if benchmark:
+            filename_parts.append(benchmark.replace(".", "_"))
+        if language:
+            filename_parts.append(language)
+        filename = "_".join(filename_parts) + ".png"
+        
+        filepath = self.output_dir / filename
+        plt.savefig(filepath, dpi=300, bbox_inches='tight')
+        self.logger.info(f"Saved: {filepath}")
+        plt.close()
+    
     def plot_heatmap(self, df: pd.DataFrame, metric: str = 'avg_time_ms'):
         """
         Create heatmap showing performance across platforms and languages.
@@ -338,35 +615,48 @@ def plot_heatmap(self, df: pd.DataFrame, metric: str = 'avg_time_ms'):
             self.logger.warning(f"No data available for heatmap with metric: {metric}")
             return
         
+        # Drop rows where the metric is NaN
+        df = df.dropna(subset=[metric])
+        
+        if df.empty:
+            self.logger.warning(f"No valid numeric data for heatmap with metric: {metric}")
+            return
+        
         # Aggregate by platform and language
         pivot = df.groupby(['platform', 'language'])[metric].mean().reset_index()
         pivot_table = pivot.pivot(index='platform', columns='language', values=metric)
         
-        if pivot_table.empty:
+        if pivot_table.empty or pivot_table.isna().all().all():
             self.logger.warning("No data for heatmap")
             return
         
         fig, ax = plt.subplots(figsize=(10, 6))
         
-        sns.heatmap(
-            pivot_table,
-            annot=True,
-            fmt='.2f',
-            cmap='YlOrRd',
-            ax=ax,
-            cbar_kws={'label': 'Avg Execution Time (ms)'}
-        )
-        
-        ax.set_title(f'Performance Heatmap - {metric.replace("_", " ").title()}')
-        ax.set_xlabel('Language')
-        ax.set_ylabel('Platform')
-        
-        plt.tight_layout()
-        
-        filepath = self.output_dir / f"heatmap_{metric}.png"
-        plt.savefig(filepath, dpi=300, bbox_inches='tight')
-        self.logger.info(f"Saved: {filepath}")
-        plt.close()
+        # Only annotate if we have valid numeric data
+        try:
+            sns.heatmap(
+                pivot_table,
+                annot=True,
+                fmt='.2f',
+                cmap='YlOrRd',
+                ax=ax,
+                cbar_kws={'label': 'Avg Execution Time (ms)'},
+                mask=pivot_table.isna()  # Mask NaN values
+            )
+            
+            ax.set_title(f'Performance Heatmap - {metric.replace("_", " ").title()}')
+            ax.set_xlabel('Language')
+            ax.set_ylabel('Platform')
+            
+            plt.tight_layout()
+            
+            filepath = self.output_dir / f"heatmap_{metric}.png"
+            plt.savefig(filepath, dpi=300, bbox_inches='tight')
+            self.logger.info(f"Saved: {filepath}")
+        except (ValueError, TypeError) as e:
+            self.logger.warning(f"Could not generate heatmap: {e}")
+        finally:
+            plt.close()
     
     def plot_version_comparison(self, df: pd.DataFrame, language: str):
         """
@@ -418,61 +708,69 @@ def create_summary_report(self, df: pd.DataFrame):
         report_lines.append("="*80)
         report_lines.append("")
         
-        # Metadata
-        metadata = self.results.get('metadata', {})
-        report_lines.append(f"Start Time: {metadata.get('start_time', 'N/A')}")
-        report_lines.append(f"End Time: {metadata.get('end_time', 'N/A')}")
-        report_lines.append("")
-        
-        # Summary statistics
-        if 'summary' in metadata:
-            summary = metadata['summary']
-            report_lines.append("Overall Statistics:")
-            report_lines.append(f"  Total Runs: {summary.get('total_runs', 0)}")
-            report_lines.append(f"  Successful: {summary.get('successful', 0)}")
-            report_lines.append(f"  Failed: {summary.get('failed', 0)}")
-            report_lines.append(f"  Success Rate: {summary.get('success_rate', 'N/A')}")
+        # Metadata (only for JSON results, not CSV)
+        if self.results:
+            metadata = self.results.get('metadata', {})
+            report_lines.append(f"Start Time: {metadata.get('start_time', 'N/A')}")
+            report_lines.append(f"End Time: {metadata.get('end_time', 'N/A')}")
             report_lines.append("")
-        
-        # Successful runs only
-        df_success = df[df['success'] == True].copy()
+            
+            # Summary statistics
+            if 'summary' in metadata:
+                summary = metadata['summary']
+                report_lines.append("Overall Statistics:")
+                report_lines.append(f"  Total Runs: {summary.get('total_runs', 0)}")
+                report_lines.append(f"  Successful: {summary.get('successful', 0)}")
+                report_lines.append(f"  Failed: {summary.get('failed', 0)}")
+                report_lines.append(f"  Success Rate: {summary.get('success_rate', 'N/A')}")
+                report_lines.append("")
+        
+        # Successful runs only (filter by success column if it exists)
+        if 'success' in df.columns:
+            df_success = df[df['success'] == True].copy()
+        else:
+            df_success = df.copy()
         
         if not df_success.empty and 'avg_time_ms' in df_success.columns:
-            report_lines.append("Performance by Platform:")
-            for platform in sorted(df_success['platform'].unique()):
-                platform_df = df_success[df_success['platform'] == platform]
-                avg_time = platform_df['avg_time_ms'].mean()
-                report_lines.append(f"  {platform.upper()}: {avg_time:.2f} ms (avg)")
-            report_lines.append("")
+            if 'platform' in df_success.columns:
+                report_lines.append("Performance by Platform:")
+                for platform in sorted(df_success['platform'].unique()):
+                    platform_df = df_success[df_success['platform'] == platform]
+                    avg_time = platform_df['avg_time_ms'].mean()
+                    report_lines.append(f"  {platform.upper()}: {avg_time:.2f} ms (avg)")
+                report_lines.append("")
             
-            report_lines.append("Performance by Language:")
-            for language in sorted(df_success['language'].unique()):
-                lang_df = df_success[df_success['language'] == language]
-                avg_time = lang_df['avg_time_ms'].mean()
-                report_lines.append(f"  {language}: {avg_time:.2f} ms (avg)")
-            report_lines.append("")
+            if 'language' in df_success.columns:
+                report_lines.append("Performance by Language:")
+                for language in sorted(df_success['language'].unique()):
+                    lang_df = df_success[df_success['language'] == language]
+                    avg_time = lang_df['avg_time_ms'].mean()
+                    report_lines.append(f"  {language}: {avg_time:.2f} ms (avg)")
+                report_lines.append("")
             
             # Best performers
-            report_lines.append("Best Performers:")
-            # Check if we have valid data
-            if not df_success['avg_time_ms'].isna().all():
-                best_overall = df_success.loc[df_success['avg_time_ms'].idxmin()]
-                report_lines.append(
-                    f"  Fastest Overall: {best_overall['language']} on {best_overall['platform']} "
-                    f"({best_overall['avg_time_ms']:.2f} ms)"
-                )
-            else:
-                report_lines.append("  No valid performance data available")
-            
-            for platform in df_success['platform'].unique():
-                platform_df = df_success[df_success['platform'] == platform]
-                if not platform_df.empty and not platform_df['avg_time_ms'].isna().all():
-                    best = platform_df.loc[platform_df['avg_time_ms'].idxmin()]
+            if 'language' in df_success.columns and 'platform' in df_success.columns:
+                report_lines.append("Best Performers:")
+                # Check if we have valid data
+                if not df_success['avg_time_ms'].isna().all():
+                    best_overall = df_success.loc[df_success['avg_time_ms'].idxmin()]
                     report_lines.append(
-                        f"  Fastest on {platform}: {best['language']} v{best['version']} "
-                        f"({best['avg_time_ms']:.2f} ms)"
+                        f"  Fastest Overall: {best_overall['language']} on {best_overall['platform']} "
+                        f"({best_overall['avg_time_ms']:.2f} ms)"
                     )
-            report_lines.append("")
+                else:
+                    report_lines.append("  No valid performance data available")
+                
+                for platform in df_success['platform'].unique():
+                    platform_df = df_success[df_success['platform'] == platform]
+                    if not platform_df.empty and not platform_df['avg_time_ms'].isna().all():
+                        best = platform_df.loc[platform_df['avg_time_ms'].idxmin()]
+                        version_str = f" v{best['version']}" if 'version' in best else ""
+                        report_lines.append(
+                            f"  Fastest on {platform}: {best['language']}{version_str} "
+                            f"({best['avg_time_ms']:.2f} ms)"
+                        )
+                report_lines.append("")
         
         report_lines.append("="*80)
         
@@ -485,6 +783,173 @@ def create_summary_report(self, df: pd.DataFrame):
         self.logger.info(f"Saved: {filepath}")
         print("\n" + report_text)
     
+    def plot_sebs_cold_warm_comparison(self, df: pd.DataFrame, benchmark_name: str = "", 
+                                       language_name: str = "", title_comment: str = ""):
+        """
+        Create SeBS-style side-by-side cold vs warm comparison from CSV data.
+        This matches the friends' plotting style exactly.
+        
+        Args:
+            df: DataFrame from SeBS CSV (must have 'type', 'memory', 'client_time_ms' columns)
+            benchmark_name: Name of benchmark for title
+            language_name: Language name for title
+            title_comment: Additional comment for title
+        """
+        if not self.is_csv:
+            self.logger.warning("This plot type requires CSV input from SeBS perf-cost")
+            return
+        
+        fig, axes = plt.subplots(1, 2, figsize=(12, 5), sharey=False)
+        
+        title_parts = [benchmark_name, language_name, title_comment, "Runtime Performance: Cold vs Warm"]
+        title = ' '.join([p for p in title_parts if p])
+        fig.suptitle(title, fontsize=14, fontweight='bold')
+        
+        # Cold Start Plot
+        cold_data = df[df['type'] == 'cold']
+        if not cold_data.empty:
+            sns.boxplot(ax=axes[0], x="memory", y="client_time_ms", data=cold_data, color="skyblue")
+            axes[0].set_title("Cold Start Latency")
+            axes[0].set_ylabel("Time (ms)")
+            axes[0].set_xlabel("Memory (MB)")
+        
+        # Warm Execution Plot
+        warm_data = df[df['type'] == 'warm']
+        if not warm_data.empty:
+            sns.boxplot(ax=axes[1], x="memory", y="client_time_ms", data=warm_data, color="orange")
+            axes[1].set_title("Warm Execution Latency")
+            axes[1].set_ylabel("Time (ms)")
+            axes[1].set_xlabel("Memory (MB)")
+        
+        plt.tight_layout()
+        
+        filepath = self.output_dir / "sebs_cold_warm_comparison.png"
+        plt.savefig(filepath, dpi=300, bbox_inches='tight')
+        self.logger.info(f"Saved: {filepath}")
+        plt.close()
+    
+    def plot_sebs_overhead_breakdown(self, df: pd.DataFrame, start_type: str = 'cold',
+                                     benchmark_name: str = "", language_name: str = "", 
+                                     title_comment: str = ""):
+        """
+        Create SeBS-style overhead breakdown showing client/provider/exec times.
+        
+        Args:
+            df: DataFrame from SeBS CSV
+            start_type: 'cold' or 'warm'
+            benchmark_name: Name of benchmark for title
+            language_name: Language name for title
+            title_comment: Additional comment for title
+        """
+        if not self.is_csv:
+            self.logger.warning("This plot type requires CSV input from SeBS perf-cost")
+            return
+        
+        # Filter by type
+        filtered_df = df[df['type'] == start_type].copy()
+        
+        if filtered_df.empty:
+            self.logger.warning(f"No {start_type} data for overhead breakdown")
+            return
+        
+        # For cold starts, filter out entries with no provider time
+        if start_type == 'cold':
+            filtered_df = filtered_df[filtered_df['provider_time'] > 0]
+        
+        # Melt for seaborn
+        melted = filtered_df.melt(
+            id_vars=['memory'],
+            value_vars=['client_time_ms', 'provider_time_ms', 'exec_time_ms'],
+            var_name='Metric',
+            value_name='Time'
+        )
+        
+        plt.figure(figsize=(10, 6))
+        sns.barplot(x="memory", y="Time", hue="Metric", data=melted, errorbar='sd', palette="muted")
+        
+        title_parts = [benchmark_name, language_name, title_comment, 
+                      f"Overhead ({start_type.title()} Start)"]
+        title = ' '.join([p for p in title_parts if p])
+        
+        plt.title(title, fontweight='bold')
+        plt.ylabel("Time (ms)")
+        plt.xlabel("Memory (MB)")
+        plt.tight_layout()
+        
+        filepath = self.output_dir / f"sebs_{start_type}_overhead.png"
+        plt.savefig(filepath, dpi=300, bbox_inches='tight')
+        self.logger.info(f"Saved: {filepath}")
+        plt.close()
+    
+    def plot_sebs_memory_usage(self, df: pd.DataFrame, benchmark_name: str = "",
+                               language_name: str = "", title_comment: str = ""):
+        """
+        Create SeBS-style memory usage distribution plot.
+        
+        Args:
+            df: DataFrame from SeBS CSV
+            benchmark_name: Name of benchmark for title
+            language_name: Language name for title  
+            title_comment: Additional comment for title
+        """
+        if not self.is_csv:
+            self.logger.warning("This plot type requires CSV input from SeBS perf-cost")
+            return
+        
+        plt.figure(figsize=(8, 6))
+        sns.boxplot(x="memory", y="mem_used", data=df, color="lightgreen")
+        
+        if not df.empty and df['mem_used'].max() > 0:
+            plt.ylim(0, df['mem_used'].max() * 1.2)
+        
+        title_parts = [benchmark_name, language_name, title_comment, "Memory Usage Distribution"]
+        title = ' '.join([p for p in title_parts if p])
+        
+        plt.title(title, fontweight='bold')
+        plt.ylabel("Used Memory (MB)")
+        plt.xlabel("Allocated Memory (MB)")
+        plt.tight_layout()
+        
+        filepath = self.output_dir / "sebs_memory_usage.png"
+        plt.savefig(filepath, dpi=300, bbox_inches='tight')
+        self.logger.info(f"Saved: {filepath}")
+        plt.close()
+    
+    def create_sebs_plots(self, benchmark_name: str = "", language_name: str = "", 
+                         title_comment: str = ""):
+        """
+        Generate all SeBS-style plots from CSV data.
+        This creates plots matching your friends' style.
+        
+        Args:
+            benchmark_name: Name of benchmark (e.g., "110.html")
+            language_name: Language name (e.g., "Python 3.11")
+            title_comment: Additional comment (e.g., "codepackage", "-O2")
+        """
+        if not self.is_csv:
+            self.logger.error("SeBS plots require CSV input. Use JSON input for standard plots.")
+            return
+        
+        self.logger.info("Generating SeBS-style plots...")
+        
+        df = self.extract_dataframe()
+        
+        if df.empty:
+            self.logger.error("No data to plot!")
+            return
+        
+        # Cold vs Warm comparison
+        self.plot_sebs_cold_warm_comparison(df, benchmark_name, language_name, title_comment)
+        
+        # Overhead breakdowns
+        self.plot_sebs_overhead_breakdown(df, 'cold', benchmark_name, language_name, title_comment)
+        self.plot_sebs_overhead_breakdown(df, 'warm', benchmark_name, language_name, title_comment)
+        
+        # Memory usage
+        self.plot_sebs_memory_usage(df, benchmark_name, language_name, title_comment)
+        
+        self.logger.info(f"\n✓ All SeBS plots generated in: {self.output_dir}")
+    
     def create_all_plots(self):
         """Generate all available plots from the benchmark results."""
         self.logger.info("Generating all plots...")
@@ -495,36 +960,109 @@ def create_all_plots(self):
             self.logger.error("No data to plot!")
             return
         
+        plot_count = 0
+        error_count = 0
+        
         # Create summary report
-        self.create_summary_report(df)
+        try:
+            self.create_summary_report(df)
+            plot_count += 1
+        except Exception as e:
+            self.logger.warning(f"Could not create summary report: {e}")
+            error_count += 1
         
         # Language comparison
-        self.plot_language_comparison(df)
-        
-        # Platform comparison
-        self.plot_platform_comparison(df)
+        try:
+            self.plot_language_comparison(df)
+            plot_count += 1
+        except Exception as e:
+            self.logger.warning(f"Could not create language comparison: {e}")
+            error_count += 1
         
         # Memory scaling
         if df['memory_mb'].nunique() > 1:
-            self.plot_memory_scaling(df)
-        
-        # Cold vs warm starts
-        if 'cold_starts' in df.columns:
-            self.plot_cold_vs_warm(df)
+            try:
+                self.plot_memory_scaling(df)
+                plot_count += 1
+            except Exception as e:
+                self.logger.warning(f"Could not create memory scaling plot: {e}")
+                error_count += 1
+        
+        # Cold vs warm starts (original stacked bar)
+        if 'cold_starts' in df.columns and 'warm_starts' in df.columns:
+            try:
+                self.plot_cold_vs_warm(df)
+                plot_count += 1
+            except Exception as e:
+                self.logger.warning(f"Could not create cold vs warm plot: {e}")
+                error_count += 1
         
         # Heatmap
-        self.plot_heatmap(df)
+        try:
+            self.plot_heatmap(df)
+            plot_count += 1
+        except Exception as e:
+            self.logger.warning(f"Could not create heatmap: {e}")
+            error_count += 1
         
         # Version comparisons for each language
         for language in df['language'].unique():
             if df[df['language'] == language]['version'].nunique() > 1:
-                self.plot_version_comparison(df, language)
+                try:
+                    self.plot_version_comparison(df, language)
+                    plot_count += 1
+                except Exception as e:
+                    self.logger.warning(f"Could not create version comparison for {language}: {e}")
+                    error_count += 1
         
         # Per-benchmark plots
         for benchmark in df['benchmark'].unique():
-            self.plot_language_comparison(df, benchmark=benchmark)
+            try:
+                self.plot_language_comparison(df, benchmark=benchmark)
+                plot_count += 1
+            except Exception as e:
+                self.logger.warning(f"Could not create language comparison for {benchmark}: {e}")
+                error_count += 1
+            
+            # New enhanced plots per benchmark and language
+            for language in df[df['benchmark'] == benchmark]['language'].unique():
+                lang_data = df[(df['benchmark'] == benchmark) & (df['language'] == language)]
+                
+                # Cold/Warm boxplot comparison
+                for version in lang_data['version'].unique():
+                    try:
+                        self.plot_cold_warm_comparison_boxplot(df, benchmark, language, version)
+                        plot_count += 1
+                    except Exception as e:
+                        self.logger.debug(f"Could not create boxplot for {benchmark}/{language}/{version}: {e}")
+                        error_count += 1
+                
+                # Overhead breakdowns
+                try:
+                    self.plot_overhead_breakdown(df, benchmark, language, start_type='cold')
+                    plot_count += 1
+                except Exception as e:
+                    self.logger.debug(f"Could not create cold overhead for {benchmark}/{language}: {e}")
+                    error_count += 1
+                    
+                try:
+                    self.plot_overhead_breakdown(df, benchmark, language, start_type='warm')
+                    plot_count += 1
+                except Exception as e:
+                    self.logger.debug(f"Could not create warm overhead for {benchmark}/{language}: {e}")
+                    error_count += 1
+                
+                # Memory usage
+                try:
+                    self.plot_memory_usage_distribution(df, benchmark, language)
+                    plot_count += 1
+                except Exception as e:
+                    self.logger.debug(f"Could not create memory usage for {benchmark}/{language}: {e}")
+                    error_count += 1
             
-        self.logger.info(f"\n✓ All plots generated in: {self.output_dir}")
+        self.logger.info(f"\n✓ Generated {plot_count} plots in: {self.output_dir}")
+        if error_count > 0:
+            self.logger.info(f"  ({error_count} plots skipped due to insufficient data)")
 
 
 def main():
@@ -533,11 +1071,14 @@ def main():
         formatter_class=argparse.RawDescriptionHelpFormatter,
         epilog="""
 Examples:
-  # Generate all plots
+  # Generate all plots from JSON (cross_platform_benchmark)
   %(prog)s results/comparison_20241212/comparison_results.json
 
+  # Generate SeBS-style plots from CSV (perf-cost result)
+  %(prog)s measurements/110-html/perf-cost/result.csv --plot-type sebs --benchmark-name "110.html" --language-name "Python 3.11" --title-comment "codepackage"
+
   # Specify output directory
-  %(prog)s results/comparison_20241212/comparison_results.json --output plots/
+  %(prog)s results/comparison_results.json --output plots/
 
   # Generate specific plot types
   %(prog)s results.json --plot-type language_comparison platform_comparison
@@ -545,14 +1086,15 @@ def main():
     )
     
     parser.add_argument('results_file',
-                        help='Path to comparison_results.json file')
+                        help='Path to comparison_results.json or result.csv file')
     parser.add_argument('--output', '-o',
                         help='Output directory for plots (default: results_dir/plots)')
     parser.add_argument('--plot-type', nargs='+',
                         choices=['language_comparison', 'platform_comparison', 
-                                'memory_scaling', 'cold_warm', 'heatmap', 'versions', 'all'],
+                                'memory_scaling', 'cold_warm', 'cold_warm_boxplot', 
+                                'overhead', 'memory_usage', 'heatmap', 'versions', 'all', 'sebs'],
                         default=['all'],
-                        help='Types of plots to generate (default: all)')
+                        help='Types of plots to generate (default: all, use sebs for CSV input)')
     parser.add_argument('--language', '-l',
                         help='Filter by specific language')
     parser.add_argument('--benchmark', '-b',
@@ -560,12 +1102,34 @@ def main():
     parser.add_argument('--format', choices=['png', 'pdf', 'svg'], default='png',
                         help='Output format for plots (default: png)')
     
+    # SeBS-specific arguments
+    parser.add_argument('--benchmark-name',
+                        help='Benchmark name for SeBS plots (e.g., "110.html")')
+    parser.add_argument('--language-name', 
+                        help='Language name for SeBS plots (e.g., "Python 3.11")')
+    parser.add_argument('--title-comment',
+                        help='Additional title comment for SeBS plots (e.g., "codepackage", "-O2")')
+    
     args = parser.parse_args()
     
     try:
         visualizer = BenchmarkVisualizer(args.results_file, args.output)
         
-        if 'all' in args.plot_type:
+        # Handle CSV files (SeBS perf-cost format)
+        if visualizer.is_csv:
+            if 'sebs' in args.plot_type or 'all' in args.plot_type:
+                visualizer.create_sebs_plots(
+                    benchmark_name=args.benchmark_name or "",
+                    language_name=args.language_name or "",
+                    title_comment=args.title_comment or ""
+                )
+            else:
+                print("CSV input detected. Use --plot-type sebs to generate SeBS-style plots.")
+                print("Example: python plot_comparison.py result.csv --plot-type sebs --benchmark-name '110.html' --language-name 'Python 3.11'")
+                return 1
+        
+        # Handle JSON files (cross_platform_benchmark format)
+        elif 'all' in args.plot_type:
             visualizer.create_all_plots()
         else:
             df = visualizer.extract_dataframe()
@@ -582,6 +1146,24 @@ def main():
             if 'cold_warm' in args.plot_type:
                 visualizer.plot_cold_vs_warm(df)
             
+            if 'cold_warm_boxplot' in args.plot_type:
+                visualizer.plot_cold_warm_comparison_boxplot(
+                    df, benchmark=args.benchmark, language=args.language
+                )
+            
+            if 'overhead' in args.plot_type:
+                visualizer.plot_overhead_breakdown(
+                    df, benchmark=args.benchmark, language=args.language, start_type='cold'
+                )
+                visualizer.plot_overhead_breakdown(
+                    df, benchmark=args.benchmark, language=args.language, start_type='warm'
+                )
+            
+            if 'memory_usage' in args.plot_type:
+                visualizer.plot_memory_usage_distribution(
+                    df, benchmark=args.benchmark, language=args.language
+                )
+            
             if 'heatmap' in args.plot_type:
                 visualizer.plot_heatmap(df)
             
diff --git a/scripts/run_comparison.sh b/scripts/run_comparison.sh
index 64ef993ce..1fde995e0 100755
--- a/scripts/run_comparison.sh
+++ b/scripts/run_comparison.sh
@@ -25,6 +25,7 @@ INPUT_SIZE="test"
 ARCHITECTURE="x64"
 GENERATE_PLOTS=true
 CONTAINER_DEPLOYMENT=false
+CONTAINER_DEPLOYMENT_FOR=""
 
 # Print usage
 usage() {
@@ -45,7 +46,9 @@ Options:
     -m, --memory SIZES          Memory sizes in MB (space-separated, default: 256)
     -i, --input-size SIZE       Input size: test, small, large (default: test)
     -a, --architecture ARCH     Architecture: x64, arm64 (default: x64)
-    --container-deployment      Run functions as containers
+    --container-deployment      Run functions as containers (all platforms)
+    --container-deployment-for  Platforms to use container deployment (space-separated)
+                                Example: --container-deployment-for "aws gcp"
     --no-plots                  Skip plot generation
     --skip-benchmark            Skip benchmark run, only generate plots
     -h, --help                  Show this help message
@@ -54,6 +57,9 @@ Examples:
     # Compare Python and Node.js on AWS and Azure
     $(basename "$0") -b "010.sleep 110.dynamic-html" -p "aws azure" -l "python nodejs"
 
+    # Compare AWS (container) vs Azure (package deployment)
+    $(basename "$0") -b "010.sleep" -p "aws azure" -l "python" --container-deployment-for "aws"
+
     # Test different memory configurations
     $(basename "$0") -b "501.graph-pagerank" -m "512 1024 2048" -r 10
 
@@ -107,6 +113,10 @@ while [[ $# -gt 0 ]]; do
             CONTAINER_DEPLOYMENT=true
             shift
             ;;
+        --container-deployment-for)
+            CONTAINER_DEPLOYMENT_FOR="$2"
+            shift 2
+            ;;
         --no-plots)
             GENERATE_PLOTS=false
             shift
@@ -149,6 +159,9 @@ if [ "$SKIP_BENCHMARK" = false ]; then
     echo "  Input Size: $INPUT_SIZE"
     echo "  Architecture: $ARCHITECTURE"
     echo "  Container Deployment: $CONTAINER_DEPLOYMENT"
+    if [ -n "$CONTAINER_DEPLOYMENT_FOR" ]; then
+        echo "  Container Deployment For: $CONTAINER_DEPLOYMENT_FOR"
+    fi
     echo "  Output: $OUTPUT_DIR"
     echo ""
     
@@ -171,6 +184,15 @@ if [ "$SKIP_BENCHMARK" = false ]; then
         CMD+=(--container-deployment)
     fi
     
+    if [ -n "$CONTAINER_DEPLOYMENT_FOR" ]; then
+        CMD+=(--container-deployment-for $CONTAINER_DEPLOYMENT_FOR)
+    fi
+    
+    # Add --plot flag if plots are enabled (uses integrated plotting)
+    if [ "$GENERATE_PLOTS" = true ]; then
+        CMD+=(--plot)
+    fi
+    
     echo "Running: ${CMD[@]}"
     echo ""
     
@@ -188,24 +210,24 @@ else
         echo -e "${RED}Error: Results file not found: $OUTPUT_DIR/comparison_results.json${NC}"
         exit 1
     fi
-fi
-
-# Generate plots
-if [ "$GENERATE_PLOTS" = true ]; then
-    echo ""
-    echo -e "${GREEN}Step 2: Generating Plots${NC}"
-    echo ""
     
-    PLOT_CMD=(
-        python3 "${SCRIPT_DIR}/plot_comparison.py"
-        "$OUTPUT_DIR/comparison_results.json"
-        --output "$OUTPUT_DIR/plots"
-    )
-    
-    if "${PLOT_CMD[@]}"; then
-        echo -e "${GREEN}✓ Plots generated successfully!${NC}"
-    else
-        echo -e "${YELLOW}⚠ Plot generation failed (may need matplotlib/seaborn)${NC}"
+    # Generate plots from existing results if requested
+    if [ "$GENERATE_PLOTS" = true ]; then
+        echo ""
+        echo -e "${GREEN}Generating Plots from Existing Results${NC}"
+        echo ""
+        
+        PLOT_CMD=(
+            python3 "${SCRIPT_DIR}/plot_comparison.py"
+            "$OUTPUT_DIR/comparison_results.json"
+            --output "$OUTPUT_DIR/plots"
+        )
+        
+        if "${PLOT_CMD[@]}"; then
+            echo -e "${GREEN}✓ Plots generated successfully!${NC}"
+        else
+            echo -e "${YELLOW}⚠ Plot generation failed (may need matplotlib/seaborn)${NC}"
+        fi
     fi
 fi
 
@@ -215,7 +237,7 @@ echo -e "${GREEN}Comparison Complete!${NC}"
 echo "=================================="
 echo ""
 echo "Results Location: $OUTPUT_DIR"
-echo "  - comparison_results.json  (raw results with full experiments.json data)"
+echo "  - comparison_results.json  (raw results)"
 echo "  - benchmark_run.log        (execution log)"
 if [ "$GENERATE_PLOTS" = true ]; then
     echo "  - plots/                   (visualizations)"
@@ -225,13 +247,7 @@ echo "Useful commands:"
 echo "  # Regenerate plots"
 echo "  python3 ${SCRIPT_DIR}/plot_comparison.py $OUTPUT_DIR/comparison_results.json"
 echo ""
-echo "  # Extract individual experiments.json files"
-echo "  python3 ${SCRIPT_DIR}/export_comparison_data.py $OUTPUT_DIR/comparison_results.json -e $OUTPUT_DIR/experiments/"
-echo ""
-echo "  # Create aggregated experiments.json"
-echo "  python3 ${SCRIPT_DIR}/export_comparison_data.py $OUTPUT_DIR/comparison_results.json -a $OUTPUT_DIR/aggregated.json"
-echo ""
-echo "  # Export to CSV"
-echo "  python3 ${SCRIPT_DIR}/export_comparison_data.py $OUTPUT_DIR/comparison_results.json -c $OUTPUT_DIR/summary.csv"
+echo "  # Regenerate with specific plot types"
+echo "  python3 ${SCRIPT_DIR}/plot_comparison.py $OUTPUT_DIR/comparison_results.json --plot-type cold_warm_boxplot memory_scaling"
 echo ""
 
diff --git a/scripts/run_sebs_with_plots.sh b/scripts/run_sebs_with_plots.sh
new file mode 100755
index 000000000..c5e04c8d6
--- /dev/null
+++ b/scripts/run_sebs_with_plots.sh
@@ -0,0 +1,218 @@
+#!/bin/bash
+# Wrapper script to run SeBS perf-cost experiment and generate plots in one command
+
+set -e  # Exit on error
+
+# Color output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+usage() {
+    cat << EOF
+Usage: $0 [OPTIONS]
+
+Run SeBS perf-cost experiment and automatically generate plots.
+
+Required Options:
+    --config CONFIG           Path to SeBS config file
+    --deployment PLATFORM     Deployment platform (aws, azure, gcp)
+    --output-dir DIR          Output directory for measurements
+
+Optional Options:
+    --benchmark-name NAME     Benchmark name for plot titles (e.g., "110.html")
+    --language-name LANG      Language name for plot titles (e.g., "Python 3.11")
+    --title-comment TEXT      Additional comment for titles (e.g., "codepackage", "-O2")
+    --skip-invoke            Skip the invoke step (only process existing data)
+    --skip-process           Skip the process step (only invoke)
+    --update-code            Update function code before running
+    --help                   Show this help message
+
+Examples:
+    # Run full workflow (invoke + process + plot)
+    $0 --config config/aws_110_html_python_128_512_2048.json \\
+       --deployment aws \\
+       --output-dir measurements/110-html-python \\
+       --benchmark-name "110.html" \\
+       --language-name "Python 3.11" \\
+       --title-comment "codepackage"
+
+    # Just process existing data and plot
+    $0 --config config/aws_110_html_cpp.json \\
+       --deployment aws \\
+       --output-dir measurements/110-html-cpp \\
+       --benchmark-name "110.html" \\
+       --language-name "C++ -O2" \\
+       --skip-invoke
+
+EOF
+    exit 1
+}
+
+# Default values
+SKIP_INVOKE=false
+SKIP_PROCESS=false
+UPDATE_CODE=""
+BENCHMARK_NAME=""
+LANGUAGE_NAME=""
+TITLE_COMMENT=""
+
+# Parse arguments
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --config)
+            CONFIG="$2"
+            shift 2
+            ;;
+        --deployment)
+            DEPLOYMENT="$2"
+            shift 2
+            ;;
+        --output-dir)
+            OUTPUT_DIR="$2"
+            shift 2
+            ;;
+        --benchmark-name)
+            BENCHMARK_NAME="$2"
+            shift 2
+            ;;
+        --language-name)
+            LANGUAGE_NAME="$2"
+            shift 2
+            ;;
+        --title-comment)
+            TITLE_COMMENT="$2"
+            shift 2
+            ;;
+        --skip-invoke)
+            SKIP_INVOKE=true
+            shift
+            ;;
+        --skip-process)
+            SKIP_PROCESS=true
+            shift
+            ;;
+        --update-code)
+            UPDATE_CODE="--update-code"
+            shift
+            ;;
+        --help)
+            usage
+            ;;
+        *)
+            echo -e "${RED}Error: Unknown option $1${NC}"
+            usage
+            ;;
+    esac
+done
+
+# Validate required arguments
+if [[ -z "$CONFIG" ]] || [[ -z "$DEPLOYMENT" ]] || [[ -z "$OUTPUT_DIR" ]]; then
+    echo -e "${RED}Error: Missing required arguments${NC}"
+    usage
+fi
+
+# Get script directory
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
+
+echo -e "${BLUE}======================================${NC}"
+echo -e "${BLUE}SeBS Perf-Cost Experiment with Plots${NC}"
+echo -e "${BLUE}======================================${NC}"
+echo ""
+echo -e "Config:      ${GREEN}$CONFIG${NC}"
+echo -e "Deployment:  ${GREEN}$DEPLOYMENT${NC}"
+echo -e "Output:      ${GREEN}$OUTPUT_DIR${NC}"
+echo ""
+
+# Step 1: Invoke experiment
+if [[ "$SKIP_INVOKE" == false ]]; then
+    echo -e "${YELLOW}Step 1: Running experiment (invoke perf-cost)...${NC}"
+    cd "$PROJECT_ROOT"
+    ./sebs.py experiment invoke perf-cost \
+        --config "$CONFIG" \
+        --deployment "$DEPLOYMENT" \
+        --output-dir "$OUTPUT_DIR" \
+        $UPDATE_CODE
+    
+    if [[ $? -eq 0 ]]; then
+        echo -e "${GREEN}✓ Experiment completed successfully${NC}"
+    else
+        echo -e "${RED}✗ Experiment failed${NC}"
+        exit 1
+    fi
+    echo ""
+else
+    echo -e "${YELLOW}Step 1: Skipping invoke step${NC}"
+    echo ""
+fi
+
+# Step 2: Process experiment
+if [[ "$SKIP_PROCESS" == false ]]; then
+    echo -e "${YELLOW}Step 2: Processing results...${NC}"
+    cd "$PROJECT_ROOT"
+    ./sebs.py experiment process perf-cost \
+        --config "$CONFIG" \
+        --deployment "$DEPLOYMENT" \
+        --output-dir "$OUTPUT_DIR"
+    
+    if [[ $? -eq 0 ]]; then
+        echo -e "${GREEN}✓ Processing completed successfully${NC}"
+    else
+        echo -e "${RED}✗ Processing failed${NC}"
+        exit 1
+    fi
+    echo ""
+else
+    echo -e "${YELLOW}Step 2: Skipping process step${NC}"
+    echo ""
+fi
+
+# Step 3: Generate plots
+echo -e "${YELLOW}Step 3: Generating plots...${NC}"
+
+CSV_FILE="$OUTPUT_DIR/perf-cost/result.csv"
+
+if [[ ! -f "$CSV_FILE" ]]; then
+    echo -e "${RED}✗ CSV file not found: $CSV_FILE${NC}"
+    echo -e "${RED}  Make sure the process step completed successfully${NC}"
+    exit 1
+fi
+
+# Build plot command
+PLOT_CMD="python3 $SCRIPT_DIR/plot_comparison.py $CSV_FILE --plot-type sebs"
+
+if [[ -n "$BENCHMARK_NAME" ]]; then
+    PLOT_CMD="$PLOT_CMD --benchmark-name \"$BENCHMARK_NAME\""
+fi
+
+if [[ -n "$LANGUAGE_NAME" ]]; then
+    PLOT_CMD="$PLOT_CMD --language-name \"$LANGUAGE_NAME\""
+fi
+
+if [[ -n "$TITLE_COMMENT" ]]; then
+    PLOT_CMD="$PLOT_CMD --title-comment \"$TITLE_COMMENT\""
+fi
+
+eval $PLOT_CMD
+
+if [[ $? -eq 0 ]]; then
+    echo -e "${GREEN}✓ Plots generated successfully${NC}"
+    echo -e "${GREEN}  Output: $OUTPUT_DIR/perf-cost/plots/${NC}"
+else
+    echo -e "${RED}✗ Plot generation failed${NC}"
+    exit 1
+fi
+
+echo ""
+echo -e "${BLUE}======================================${NC}"
+echo -e "${GREEN}✓ All steps completed successfully!${NC}"
+echo -e "${BLUE}======================================${NC}"
+echo ""
+echo -e "Results:"
+echo -e "  CSV:   ${GREEN}$CSV_FILE${NC}"
+echo -e "  Plots: ${GREEN}$OUTPUT_DIR/perf-cost/plots/${NC}"
+echo ""
+
diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py
index 930dfe2af..c1c2d3430 100644
--- a/sebs/aws/aws.py
+++ b/sebs/aws/aws.py
@@ -137,7 +137,8 @@ def package_code(
         CONFIG_FILES = {
             "python": ["handler.py", "requirements.txt", ".python_packages"],
             "nodejs": ["handler.js", "package.json", "node_modules"],
-            "pypy": ["handler.py", "requirements.txt", ".python_packages"],
+            "rust": ["bootstrap", "Cargo.toml", "Cargo.lock", "target"],
+            "java": ["function.jar"],
         }
         package_config = CONFIG_FILES[language_name]
         function_dir = os.path.join(directory, "function")
@@ -153,21 +154,15 @@ def package_code(
         benchmark_archive = "{}.zip".format(os.path.join(directory, benchmark))
         self.logging.info("Created {} archive".format(benchmark_archive))
 
-        bytes_size = os.path.getsize(benchmark_archive)
+        bytes_size = os.path.getsize(os.path.join(directory, benchmark_archive))
         mbytes = bytes_size / 1024.0 / 1024.0
         self.logging.info("Zip archive size {:2f} MB".format(mbytes))
 
         return (
-            benchmark_archive,
+            os.path.join(directory, "{}.zip".format(benchmark)),
             bytes_size,
             container_uri,
         )
-    def _default_handler(self, language: str) -> str:
-
-        if language == "java":
-            return "org.serverlessbench.Handler::handleRequest"
-        return "handler.handler"
-
 
     def _map_architecture(self, architecture: str) -> str:
 
@@ -179,12 +174,10 @@ def _map_language_runtime(self, language: str, runtime: str):
 
         # AWS uses different naming scheme for Node.js versions
         # For example, it's 12.x instead of 12.
-        # We use a OS-only runtime for PyPy
         if language == "nodejs":
-            return f"{language}{runtime}.x"
-        elif language == "python":
-            return f"{language}{runtime}"
-        elif language == "pypy":
+            return f"{runtime}.x"
+        # Rust uses provided.al2023 runtime (custom runtime)
+        elif language == "rust":
             return "provided.al2023"
         return runtime
 
@@ -263,10 +256,10 @@ def create_function(
                         "S3Key": code_prefix,
                     }
 
-                # PyPy uses custom runtime (provided.al2023) since there's no native PyPy runtime
-                if language == "pypy":
-                    create_function_params["Runtime"] = "provided.al2023"
-                    create_function_params["Handler"] = "handler.handler"
+                # Rust uses custom runtime with different handler
+                if language == "rust":
+                    create_function_params["Runtime"] = self._map_language_runtime(language, language_runtime)
+                    create_function_params["Handler"] = "bootstrap"
                 else:
                     create_function_params["Runtime"] = "{}{}".format(
                         language, self._map_language_runtime(language, language_runtime)
@@ -418,26 +411,15 @@ def update_function_configuration(
         self.wait_function_updated(function)
         self.logging.info(f"Updated configuration of {function.name} function. ")
 
-    def get_real_language_name(self, language_name: str) -> str:
-        LANGUAGE_NAMES = {
-            "python": "python",
-            "pypy": "python",
-            "nodejs": "nodejs",
-        }
-        return LANGUAGE_NAMES.get(language_name)
-
     # @staticmethod
     def default_function_name(
         self, code_package: Benchmark, resources: Optional[Resources] = None
     ) -> str:
         # Create function name
         resource_id = resources.resources_id if resources else self.config.resources.resources_id
-        
         func_name = "sebs-{}-{}-{}-{}-{}".format(
             resource_id,
             code_package.benchmark,
-            # see which works
-            #self.get_real_language_name(code_package.language_name),
             code_package.language_name,
             code_package.language_version,
             code_package.architecture,
@@ -669,4 +651,4 @@ def wait_function_updated(self, func: LambdaFunction):
         self.logging.info("Lambda function has been updated.")
 
     def disable_rich_output(self):
-        self.ecr_client.disable_rich_output = True
+        self.ecr_client.disable_rich_output = True
\ No newline at end of file
diff --git a/sebs/benchmark.py b/sebs/benchmark.py
index 59887d6cd..3c0deb9dd 100644
--- a/sebs/benchmark.py
+++ b/sebs/benchmark.py
@@ -352,9 +352,14 @@ def copy_code(self, output_dir):
         if self.language_name == "java":
             shutil.copytree(path, output_dir, dirs_exist_ok=True)
             return
+        self.logging.info(f"copy_code: Looking for files in {path} for language {self.language_name}")
         for file_type in FILES[self.language_name]:
-            for f in glob.glob(os.path.join(path, file_type)):
-                shutil.copy2(os.path.join(path, f), output_dir)
+            matches = glob.glob(os.path.join(path, file_type))
+            self.logging.info(f"copy_code: Pattern {file_type} matched {len(matches)} files: {matches}")
+            for f in matches:
+                dest = os.path.join(output_dir, os.path.basename(f))
+                self.logging.info(f"copy_code: Copying {f} to {dest}")
+                shutil.copy2(f, dest)
         
         # For Rust, copy the entire src directory
         if self.language_name == "rust":
@@ -389,6 +394,106 @@ def add_benchmark_data(self, output_dir):
                     stderr=subprocess.STDOUT,
                 )
 
+    def _merge_rust_cargo_toml(self, wrapper_cargo_path: str, benchmark_cargo_path: str, output_dir: str):
+        """
+        Merge benchmark Cargo.toml dependencies into wrapper Cargo.toml.
+        The wrapper Cargo.toml is the base, and benchmark dependencies are added/merged.
+        Uses simple string-based approach to extract and merge [dependencies] sections.
+        """
+        import re
+        
+        # Ensure output_dir is absolute for consistent path handling
+        output_dir = os.path.abspath(output_dir)
+        
+        with open(wrapper_cargo_path, 'r') as f:
+            wrapper_content = f.read()
+        
+        with open(benchmark_cargo_path, 'r') as f:
+            benchmark_content = f.read()
+        
+        # Extract dependencies from benchmark Cargo.toml
+        deps_match = re.search(r'\[dependencies\](.*?)(?=\n\[|\Z)', benchmark_content, re.DOTALL)
+        if not deps_match:
+            # No dependencies in benchmark, just copy wrapper
+            output_cargo = os.path.join(output_dir, "Cargo.toml")
+            with open(output_cargo, 'w') as f:
+                f.write(wrapper_content)
+            return
+        
+        benchmark_deps_lines = deps_match.group(1).strip().split('\n')
+        
+        # Extract existing dependency names from wrapper to avoid duplicates
+        wrapper_deps_match = re.search(r'\[dependencies\](.*?)(?=\n\[|\Z)', wrapper_content, re.DOTALL)
+        existing_deps = set()
+        if wrapper_deps_match:
+            for line in wrapper_deps_match.group(1).split('\n'):
+                line = line.strip()
+                if line and not line.startswith('#'):
+                    # Extract dependency name (before = or {)
+                    dep_name = re.split(r'[=\s{]+', line)[0].strip()
+                    if dep_name:
+                        existing_deps.add(dep_name)
+        
+        # Add benchmark dependencies that aren't already in wrapper
+        new_deps = []
+        for line in benchmark_deps_lines:
+            line = line.strip()
+            if line and not line.startswith('#'):
+                dep_name = re.split(r'[=\s{]+', line)[0].strip()
+                if dep_name and dep_name not in existing_deps:
+                    new_deps.append(line)
+                    existing_deps.add(dep_name)
+        
+        # Merge dependencies into wrapper content
+        if new_deps:
+            if wrapper_deps_match:
+                # Insert new dependencies before the end of [dependencies] section
+                deps_section_start = wrapper_deps_match.start()
+                deps_section_end = wrapper_deps_match.end()
+                deps_content = wrapper_deps_match.group(1)
+                
+                # Build merged dependencies section
+                merged_deps = deps_content.rstrip()
+                for dep_line in new_deps:
+                    merged_deps += '\n' + dep_line
+                merged_deps += '\n'
+                
+                # Reconstruct wrapper content with merged dependencies
+                merged_content = (
+                    wrapper_content[:deps_section_start] +
+                    '[dependencies]' + merged_deps +
+                    wrapper_content[deps_section_end:]
+                )
+            else:
+                # Add [dependencies] section if it doesn't exist
+                if not wrapper_content.endswith('\n'):
+                    wrapper_content += '\n'
+                merged_content = wrapper_content + '\n[dependencies]\n'
+                for dep_line in new_deps:
+                    merged_content += dep_line + '\n'
+        else:
+            merged_content = wrapper_content
+        
+        # Write merged Cargo.toml (output_dir is already absolute)
+        output_cargo = os.path.join(output_dir, "Cargo.toml")
+        # Ensure directory exists
+        os.makedirs(output_dir, exist_ok=True)
+        with open(output_cargo, 'w') as f:
+            f.write(merged_content)
+            f.flush()
+            os.fsync(f.fileno())  # Force write to disk
+        # Verify it was written (with a small delay for filesystem sync)
+        import time
+        time.sleep(0.01)  # Small delay for filesystem to sync
+        if not os.path.exists(output_cargo):
+            # Try to get more info about what went wrong
+            parent_dir = os.path.dirname(output_cargo)
+            raise RuntimeError(
+                f"Failed to write merged Cargo.toml to {output_cargo}. "
+                f"Parent directory exists: {os.path.exists(parent_dir)}, "
+                f"Parent directory contents: {os.listdir(parent_dir) if os.path.exists(parent_dir) else 'N/A'}"
+            )
+
     def add_deployment_files(self, output_dir):
         handlers_dir = project_absolute_path(
             "benchmarks", "wrappers", self._deployment_name, self.language_name
@@ -399,13 +504,59 @@ def add_deployment_files(self, output_dir):
                 self._deployment_name, self.language_name
             )
         ]
+        
+        # Copy wrapper files first (except Cargo.toml for Rust, which we'll merge)
         for file in handlers:
             destination = os.path.join(output_dir, os.path.basename(file))
+            if os.path.basename(file) == "Cargo.toml" and self.language_name == "rust":
+                # Skip copying wrapper Cargo.toml directly - we'll merge it instead
+                continue
             if os.path.isdir(file):
                 shutil.copytree(file, destination, dirs_exist_ok=True)
             else:
                 if not os.path.exists(destination):
                     shutil.copy2(file, destination)
+        
+        # For Rust, merge Cargo.toml files after copying other wrapper files
+        if self.language_name == "rust":
+            # Ensure output_dir is absolute for consistent path handling
+            output_dir_abs = os.path.abspath(output_dir)
+            wrapper_cargo = os.path.join(handlers_dir, "Cargo.toml")
+            benchmark_cargo = os.path.join(output_dir_abs, "Cargo.toml")
+            self.logging.info(f"Rust Cargo.toml merge: wrapper={wrapper_cargo} (exists: {os.path.exists(wrapper_cargo)}), benchmark={benchmark_cargo} (exists: {os.path.exists(benchmark_cargo)})")
+            if os.path.exists(wrapper_cargo) and os.path.exists(benchmark_cargo):
+                # Merge dependencies from benchmark Cargo.toml into wrapper Cargo.toml
+                self.logging.info("Merging Rust Cargo.toml files")
+                # The merge function reads benchmark_cargo and writes merged content to output_dir/Cargo.toml
+                # Since benchmark_cargo IS output_dir/Cargo.toml, the merge overwrites it
+                # So we don't need to remove benchmark_cargo - it's already been overwritten with merged content
+                self._merge_rust_cargo_toml(wrapper_cargo, benchmark_cargo, output_dir_abs)
+                merged_path = os.path.join(output_dir_abs, "Cargo.toml")
+                # The merge function should have raised an error if it failed, but verify anyway
+                if not os.path.exists(merged_path):
+                    # List directory contents for debugging
+                    dir_contents = os.listdir(output_dir_abs) if os.path.exists(output_dir_abs) else []
+                    raise RuntimeError(
+                        f"Merged Cargo.toml was not created at {merged_path}. "
+                        f"Directory contents: {dir_contents}"
+                    )
+                self.logging.info(f"Merged Cargo.toml successfully written to {merged_path}")
+            elif os.path.exists(wrapper_cargo):
+                # Only wrapper Cargo.toml exists, just copy it
+                wrapper_dest = os.path.join(output_dir_abs, "Cargo.toml")
+                self.logging.info(f"Only wrapper Cargo.toml exists, copying to {wrapper_dest}")
+                shutil.copy2(wrapper_cargo, wrapper_dest)
+            elif os.path.exists(benchmark_cargo):
+                # Only benchmark Cargo.toml exists, copy it (shouldn't happen normally)
+                benchmark_dest = os.path.join(output_dir_abs, "Cargo.toml")
+                self.logging.warning(f"Only benchmark Cargo.toml exists, copying to {benchmark_dest}")
+                # Keep it as-is since wrapper should always exist
+            else:
+                self.logging.error(f"Neither wrapper nor benchmark Cargo.toml found! Wrapper: {wrapper_cargo}, Benchmark: {benchmark_cargo}")
+                raise RuntimeError(
+                    f"Cargo.toml not found: wrapper at {wrapper_cargo} or benchmark at {benchmark_cargo}. "
+                    "Both should exist for Rust builds."
+                )
 
     def add_deployment_package_python(self, output_dir):
 
@@ -478,15 +629,26 @@ def directory_size(directory: str):
 
     def install_dependencies(self, output_dir):
         # do we have docker image for this run and language?
-        if "build" not in self._system_config.docker_image_types(
+        image_types = self._system_config.docker_image_types(
             self._deployment_name, self.language_name
-        ):
+        )
+        self.logging.info(
+            f"Docker image types for {self._deployment_name}/{self.language_name}: {image_types}"
+        )
+        if "build" not in image_types:
             self.logging.info(
                 (
                     "There is no Docker build image for {deployment} run in {language}, "
                     "thus skipping the Docker-based installation of dependencies."
                 ).format(deployment=self._deployment_name, language=self.language_name)
             )
+            # For Rust, this is a fatal error - we need the build image
+            if self.language_name == "rust":
+                raise RuntimeError(
+                    f"Docker build image is required for Rust but not configured for "
+                    f"{self._deployment_name}/{self.language_name}. "
+                    "Please ensure 'build' is in the 'images' list in config/systems.json"
+                )
         else:
             repo_name = self._system_config.docker_repository()
             unversioned_image_name = "build.{deployment}.{language}.{runtime}".format(
@@ -544,7 +706,17 @@ def ensure_image(name: str) -> None:
             # run Docker container to install packages
             PACKAGE_FILES = {"python": "requirements.txt", "nodejs": "package.json", "rust": "Cargo.toml", "java": "pom.xml", "pypy": "requirements.txt"}
             file = os.path.join(output_dir, PACKAGE_FILES[self.language_name])
+            
+            # For Java, check recursively if pom.xml exists
+            if self.language_name == "java" and not os.path.exists(file):
+                for root, _, files in os.walk(output_dir):
+                    if "pom.xml" in files:
+                        file = os.path.join(root, "pom.xml")
+                        break
+
+            self.logging.info(f"Checking for package file: {file} (exists: {os.path.exists(file)})")
             if os.path.exists(file):
+                self.logging.info(f"Found package file {file}, proceeding with Docker build")
                 try:
                     self.logging.info(
                         "Docker build of benchmark dependencies in container "
@@ -672,7 +844,25 @@ def build(
         self.add_benchmark_data(self._output_dir)
         self.add_deployment_files(self._output_dir)
         self.add_deployment_package(self._output_dir)
+        
+        # For Rust, remove any existing Cargo.lock to ensure it's regenerated with correct constraints
+        if self.language_name == "rust":
+            cargo_lock = os.path.join(self._output_dir, "Cargo.lock")
+            if os.path.exists(cargo_lock):
+                self.logging.info(f"Removing existing Cargo.lock at {cargo_lock} to ensure regeneration with correct dependency versions")
+                os.remove(cargo_lock)
+        
         self.install_dependencies(self._output_dir)
+        
+        # For Rust, verify bootstrap binary exists after dependency installation
+        if self.language_name == "rust":
+            bootstrap_path = os.path.join(self._output_dir, "bootstrap")
+            if not os.path.exists(bootstrap_path):
+                self.logging.error(f"Rust bootstrap binary not found at {bootstrap_path} after install_dependencies!")
+                raise RuntimeError(
+                    f"Rust build failed: bootstrap binary not created at {bootstrap_path}. "
+                    "Check Docker build logs above for compilation errors."
+                )
 
         self._code_location, self._code_size, self._container_uri = deployment_build_step(
             os.path.abspath(self._output_dir),
diff --git a/sebs/faas/container.py b/sebs/faas/container.py
index b17525f7b..cc4b136c9 100644
--- a/sebs/faas/container.py
+++ b/sebs/faas/container.py
@@ -169,6 +169,15 @@ def build_base_image(
         build_dir = os.path.join(directory, "build")
         os.makedirs(build_dir, exist_ok=True)
 
+        # For Rust, ensure bootstrap binary exists before building container image
+        if language_name == "rust":
+            bootstrap_path = os.path.join(directory, "bootstrap")
+            if not os.path.exists(bootstrap_path):
+                raise RuntimeError(
+                    f"Rust bootstrap binary not found at {bootstrap_path}. "
+                    "The Rust build must complete successfully before container image creation."
+                )
+
         shutil.copy(
             os.path.join(DOCKER_DIR, self.name(), language_name, "Dockerfile.function"),
             os.path.join(build_dir, "Dockerfile"),
diff --git a/third-party/pypapi b/third-party/pypapi
deleted file mode 160000
index 2188acab3..000000000
--- a/third-party/pypapi
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 2188acab38a28af0a70a3bd2f36060fa9984e096

From 2d361483ca9d2f6f4e1876c29c4f7cedcd894db2 Mon Sep 17 00:00:00 2001
From: Alexander Schlieper <alexander@schlieper.ch>
Date: Thu, 8 Jan 2026 14:39:45 +0100
Subject: [PATCH 28/31] Remove plotting and benchmarking scripts

- Deleted `plot_comparison.py` and associated plotting functions.
- Removed `run_comparison.sh` and `run_sebs_with_plots.sh` scripts for benchmark execution and plot generation.
- Cleaned up the repository by eliminating unused files related to benchmarking and visualization.
---
 scripts/plot_comparison.py     | 1193 --------------------------------
 scripts/run_comparison.sh      |  253 -------
 scripts/run_sebs_with_plots.sh |  218 ------
 3 files changed, 1664 deletions(-)
 delete mode 100644 scripts/plot_comparison.py
 delete mode 100755 scripts/run_comparison.sh
 delete mode 100755 scripts/run_sebs_with_plots.sh

diff --git a/scripts/plot_comparison.py b/scripts/plot_comparison.py
deleted file mode 100644
index 077041dbf..000000000
--- a/scripts/plot_comparison.py
+++ /dev/null
@@ -1,1193 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-Visualization tool for cross-platform benchmark comparisons.
-Creates publication-quality plots comparing performance across
-languages, platforms, and configurations.
-"""
-
-import argparse
-import json
-import logging
-import sys
-from pathlib import Path
-from typing import Optional
-
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-import seaborn as sns
-
-# Suppress matplotlib debug output
-import matplotlib
-matplotlib_logger = logging.getLogger('matplotlib')
-matplotlib_logger.setLevel(logging.WARNING)
-pil_logger = logging.getLogger('PIL')
-pil_logger.setLevel(logging.WARNING)
-
-# Set style
-sns.set_style("whitegrid")
-sns.set_context("paper", font_scale=1.2)
-
-# Color palettes for different entities
-PLATFORM_COLORS = {
-    'aws': '#FF9900',      # AWS Orange
-    'azure': '#0089D6',    # Azure Blue
-    'gcp': '#4285F4',      # Google Blue
-    'local': '#808080'     # Gray
-}
-
-LANGUAGE_COLORS = {
-    'python': '#3776AB',   # Python Blue
-    'nodejs': '#339933',   # Node.js Green
-    'rust': '#000000',     # Rust Black
-    'java': '#007396',     # Java Blue
-    'pypy': '#193440',     # PyPy Dark
-    'cpp': '#00599C'       # C++ Blue
-}
-
-
-class BenchmarkVisualizer:
-    """Creates visualizations from benchmark comparison results."""
-    
-    def __init__(self, results_file: str, output_dir: Optional[str] = None):
-        self.results_file = Path(results_file)
-        self.output_dir = Path(output_dir) if output_dir else self.results_file.parent / 'plots'
-        self.output_dir.mkdir(parents=True, exist_ok=True)
-        
-        # Setup logging
-        logging.basicConfig(
-            level=logging.INFO,
-            format='%(levelname)s: %(message)s'
-        )
-        self.logger = logging.getLogger(__name__)
-        
-        # Detect file type and load results
-        if self.results_file.suffix == '.csv':
-            self.results = None  # CSV doesn't use the results dict
-            self.is_csv = True
-            self.logger.info(f"Loaded CSV from {self.results_file}")
-        else:
-            with open(self.results_file, 'r') as f:
-                self.results = json.load(f)
-            self.is_csv = False
-            self.logger.info(f"Loaded results from {self.results_file}")
-        
-        self.logger.info(f"Plots will be saved to {self.output_dir}")
-    
-    def extract_dataframe(self) -> pd.DataFrame:
-        """
-        Extract benchmark results into a pandas DataFrame.
-        
-        For CSV files (SeBS perf-cost format):
-        - Reads directly from CSV with columns: memory, type, is_cold, exec_time, 
-          client_time, provider_time, mem_used
-        
-        For JSON files (cross_platform_benchmark format):
-        - Returns a DataFrame with columns: benchmark, platform, language, version,
-          memory_mb, avg_time_ms, cold_starts, warm_starts, success, etc.
-        """
-        if self.is_csv:
-            # Read SeBS perf-cost CSV format
-            df = pd.read_csv(self.results_file)
-            
-            # Convert microseconds to milliseconds
-            df['client_time_ms'] = df['client_time'] / 1000.0
-            df['provider_time_ms'] = df['provider_time'] / 1000.0
-            df['exec_time_ms'] = df['exec_time'] / 1000.0
-            
-            self.logger.info(f"Loaded {len(df)} measurements from CSV")
-            return df
-        
-        # JSON format (original code)
-        rows = []
-        
-        for benchmark, bench_data in self.results['benchmarks'].items():
-            for platform, platform_data in bench_data.items():
-                for language, lang_data in platform_data.items():
-                    for version, version_data in lang_data.items():
-                        for memory_config, result in version_data.items():
-                            row = {
-                                'benchmark': benchmark,
-                                'platform': platform,
-                                'language': language,
-                                'version': version,
-                                'memory_mb': result.get('memory_mb', 0),
-                                'success': result.get('success', False)
-                            }
-                            
-                            # Extract metrics if available
-                            if 'metrics' in result:
-                                metrics = result['metrics']
-                                row['avg_time_ms'] = metrics.get('avg_execution_time_ms')
-                                row['min_time_ms'] = metrics.get('min_execution_time_ms')
-                                row['max_time_ms'] = metrics.get('max_execution_time_ms')
-                                row['cold_starts'] = metrics.get('cold_starts', 0)
-                                row['warm_starts'] = metrics.get('warm_starts', 0)
-                                
-                                # Store all execution times for detailed analysis
-                                if 'execution_times_ms' in metrics:
-                                    row['execution_times'] = metrics['execution_times_ms']
-                            
-                            rows.append(row)
-        
-        df = pd.DataFrame(rows)
-        self.logger.info(f"Extracted {len(df)} benchmark results")
-        return df
-    
-    def plot_language_comparison(self, df: pd.DataFrame, benchmark: Optional[str] = None):
-        """
-        Create bar chart comparing languages across platforms.
-        
-        Args:
-            df: DataFrame with benchmark results
-            benchmark: Optional benchmark name to filter by
-        """
-        if benchmark:
-            df = df[df['benchmark'] == benchmark]
-            title_suffix = f" - {benchmark}"
-        else:
-            title_suffix = " - All Benchmarks"
-        
-        # Filter successful runs only
-        df = df[df['success'] == True].copy()
-        
-        if df.empty:
-            self.logger.warning(f"No successful runs for language comparison{title_suffix}")
-            return
-        
-        # Create grouped bar chart
-        fig, ax = plt.subplots(figsize=(12, 6))
-        
-        # Group by platform and language
-        grouped = df.groupby(['platform', 'language'])['avg_time_ms'].mean().reset_index()
-        
-        # Pivot for plotting - SWAPPED: platforms on X-axis, languages as colors
-        pivot = grouped.pivot(index='platform', columns='language', values='avg_time_ms')
-        
-        # Create bar chart with language colors
-        pivot.plot(kind='bar', ax=ax, color=[LANGUAGE_COLORS.get(lang, '#888888') for lang in pivot.columns])
-        
-        ax.set_xlabel('Platform')
-        ax.set_ylabel('Average Execution Time (ms)')
-        ax.set_title(f'Platform Performance Comparison by Language{title_suffix}')
-        ax.legend(title='Language')
-        ax.grid(axis='y', alpha=0.3)
-        
-        plt.xticks(rotation=45)
-        plt.tight_layout()
-        
-        filename = f"language_comparison{'_' + benchmark if benchmark else ''}.png"
-        filepath = self.output_dir / filename
-        plt.savefig(filepath, dpi=300, bbox_inches='tight')
-        self.logger.info(f"Saved: {filepath}")
-        plt.close()
-    
-    def plot_platform_comparison(self, df: pd.DataFrame, language: Optional[str] = None):
-        """
-        Create bar chart comparing platforms for a specific language.
-        
-        Args:
-            df: DataFrame with benchmark results
-            language: Optional language to filter by
-        """
-        if language:
-            df = df[df['language'] == language]
-            title_suffix = f" - {language.title()}"
-        else:
-            title_suffix = ""
-        
-        # Filter successful runs only
-        df = df[df['success'] == True].copy()
-        
-        if df.empty:
-            self.logger.warning(f"No successful runs for platform comparison{title_suffix}")
-            return
-        
-        # Create grouped bar chart
-        fig, ax = plt.subplots(figsize=(12, 6))
-        
-        # Group by platform and benchmark
-        grouped = df.groupby(['benchmark', 'platform'])['avg_time_ms'].mean().reset_index()
-        
-        # Pivot for plotting
-        pivot = grouped.pivot(index='benchmark', columns='platform', values='avg_time_ms')
-        
-        # Create bar chart
-        pivot.plot(kind='bar', ax=ax, color=[PLATFORM_COLORS.get(p, '#888888') for p in pivot.columns])
-        
-        ax.set_xlabel('Benchmark')
-        ax.set_ylabel('Average Execution Time (ms)')
-        ax.set_title(f'Platform Performance Comparison{title_suffix}')
-        ax.legend(title='Platform')
-        ax.grid(axis='y', alpha=0.3)
-        
-        plt.xticks(rotation=45, ha='right')
-        plt.tight_layout()
-        
-        filename = f"platform_comparison{'_' + language if language else ''}.png"
-        filepath = self.output_dir / filename
-        plt.savefig(filepath, dpi=300, bbox_inches='tight')
-        self.logger.info(f"Saved: {filepath}")
-        plt.close()
-    
-    def plot_memory_scaling(self, df: pd.DataFrame, benchmark: Optional[str] = None):
-        """
-        Create line plot showing how performance scales with memory.
-        
-        Args:
-            df: DataFrame with benchmark results
-            benchmark: Optional benchmark to filter by
-        """
-        if benchmark:
-            df = df[df['benchmark'] == benchmark]
-            title_suffix = f" - {benchmark}"
-        else:
-            title_suffix = ""
-        
-        # Filter successful runs only
-        df = df[df['success'] == True].copy()
-        
-        if df.empty or df['memory_mb'].nunique() < 2:
-            self.logger.warning(f"Insufficient data for memory scaling plot{title_suffix}")
-            return
-        
-        fig, ax = plt.subplots(figsize=(12, 6))
-        
-        # Plot for each language-platform combination
-        for (language, platform), group in df.groupby(['language', 'platform']):
-            group_sorted = group.sort_values('memory_mb')
-            label = f"{language} ({platform})"
-            color = LANGUAGE_COLORS.get(language, '#888888')
-            linestyle = '-' if platform == 'aws' else '--' if platform == 'azure' else '-.'
-            
-            ax.plot(
-                group_sorted['memory_mb'],
-                group_sorted['avg_time_ms'],
-                marker='o',
-                label=label,
-                color=color,
-                linestyle=linestyle,
-                linewidth=2
-            )
-        
-        ax.set_xlabel('Memory (MB)')
-        ax.set_ylabel('Average Execution Time (ms)')
-        ax.set_title(f'Performance vs Memory{title_suffix}')
-        ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
-        ax.grid(alpha=0.3)
-        
-        plt.tight_layout()
-        
-        filename = f"memory_scaling{'_' + benchmark if benchmark else ''}.png"
-        filepath = self.output_dir / filename
-        plt.savefig(filepath, dpi=300, bbox_inches='tight')
-        self.logger.info(f"Saved: {filepath}")
-        plt.close()
-    
-    def plot_cold_vs_warm(self, df: pd.DataFrame):
-        """
-        Create stacked bar chart showing cold vs warm start distribution.
-        
-        Args:
-            df: DataFrame with benchmark results
-        """
-        # Filter successful runs only
-        df = df[df['success'] == True].copy()
-        
-        if df.empty or 'cold_starts' not in df.columns:
-            self.logger.warning("No cold start data available")
-            return
-        
-        # Calculate totals
-        df['total_invocations'] = df['cold_starts'] + df['warm_starts']
-        
-        # Filter out rows with no invocations
-        df = df[df['total_invocations'] > 0]
-        
-        if df.empty:
-            self.logger.warning("No invocation data for cold vs warm plot")
-            return
-        
-        fig, ax = plt.subplots(figsize=(14, 6))
-        
-        # Group by language and platform
-        grouped = df.groupby(['language', 'platform']).agg({
-            'cold_starts': 'sum',
-            'warm_starts': 'sum'
-        }).reset_index()
-        
-        # Create labels
-        grouped['label'] = grouped['language'] + '\n(' + grouped['platform'] + ')'
-        
-        # Create stacked bar chart
-        x = np.arange(len(grouped))
-        width = 0.6
-        
-        ax.bar(x, grouped['cold_starts'], width, label='Cold Starts', color='#d62728')
-        ax.bar(x, grouped['warm_starts'], width, bottom=grouped['cold_starts'],
-               label='Warm Starts', color='#2ca02c')
-        
-        ax.set_xlabel('Language (Platform)')
-        ax.set_ylabel('Number of Invocations')
-        ax.set_title('Cold vs Warm Start Distribution')
-        ax.set_xticks(x)
-        ax.set_xticklabels(grouped['label'], rotation=45, ha='right')
-        ax.legend()
-        ax.grid(axis='y', alpha=0.3)
-        
-        plt.tight_layout()
-        
-        filepath = self.output_dir / "cold_vs_warm_starts.png"
-        plt.savefig(filepath, dpi=300, bbox_inches='tight')
-        self.logger.info(f"Saved: {filepath}")
-        plt.close()
-    
-    def plot_cold_warm_comparison_boxplot(self, df: pd.DataFrame, benchmark: Optional[str] = None, 
-                                         language: Optional[str] = None, version: Optional[str] = None):
-        """
-        Create side-by-side boxplot comparison of cold vs warm performance.
-        Similar to the friend's plot style with better visual separation.
-        
-        Args:
-            df: DataFrame with benchmark results
-            benchmark: Optional benchmark name to filter by
-            language: Optional language to filter by
-            version: Optional version to filter by
-        """
-        # Filter data
-        if benchmark:
-            df = df[df['benchmark'] == benchmark]
-        if language:
-            df = df[df['language'] == language]
-        if version:
-            df = df[df['version'] == version]
-        
-        df = df[df['success'] == True].copy()
-        
-        if df.empty or 'execution_times' not in df.columns:
-            self.logger.warning("No execution time data for cold/warm boxplot comparison")
-            return
-        
-        # Expand execution times into separate rows with cold/warm labels
-        rows = []
-        for _, row in df.iterrows():
-            if 'execution_times' in row and row['execution_times']:
-                exec_times = row['execution_times']
-                cold_count = row.get('cold_starts', 0)
-                
-                # Mark first cold_count as cold, rest as warm
-                for i, time_ms in enumerate(exec_times):
-                    rows.append({
-                        'benchmark': row['benchmark'],
-                        'language': row['language'],
-                        'version': row['version'],
-                        'memory': row['memory_mb'],
-                        'time_ms': time_ms,
-                        'type': 'cold' if i < cold_count else 'warm'
-                    })
-        
-        if not rows:
-            self.logger.warning("No execution time data to plot")
-            return
-        
-        expanded_df = pd.DataFrame(rows)
-        
-        # Create figure with subplots
-        fig, axes = plt.subplots(1, 2, figsize=(12, 5), sharey=False)
-        
-        # Build title
-        title_parts = []
-        if benchmark:
-            title_parts.append(benchmark)
-        if language:
-            lang_display = f"{language.title()}"
-            if version:
-                lang_display += f" {version}"
-            title_parts.append(lang_display)
-        title_parts.append("Runtime Performance: Cold vs Warm")
-        fig.suptitle(' '.join(title_parts), fontsize=14, fontweight='bold')
-        
-        # Cold Start Plot
-        cold_data = expanded_df[expanded_df['type'] == 'cold']
-        if not cold_data.empty:
-            sns.boxplot(ax=axes[0], x="memory", y="time_ms", data=cold_data, color="skyblue")
-            axes[0].set_title("Cold Start Latency")
-            axes[0].set_ylabel("Time (ms)")
-            axes[0].set_xlabel("Memory (MB)")
-        
-        # Warm Execution Plot
-        warm_data = expanded_df[expanded_df['type'] == 'warm']
-        if not warm_data.empty:
-            sns.boxplot(ax=axes[1], x="memory", y="time_ms", data=warm_data, color="orange")
-            axes[1].set_title("Warm Execution Latency")
-            axes[1].set_ylabel("Time (ms)")
-            axes[1].set_xlabel("Memory (MB)")
-        
-        plt.tight_layout()
-        
-        # Create filename
-        filename_parts = ["cold_warm_boxplot"]
-        if benchmark:
-            filename_parts.append(benchmark.replace(".", "_"))
-        if language:
-            filename_parts.append(language)
-        if version:
-            filename_parts.append(version.replace(".", "_"))
-        filename = "_".join(filename_parts) + ".png"
-        
-        filepath = self.output_dir / filename
-        plt.savefig(filepath, dpi=300, bbox_inches='tight')
-        self.logger.info(f"Saved: {filepath}")
-        plt.close()
-    
-    def plot_overhead_breakdown(self, df: pd.DataFrame, benchmark: Optional[str] = None,
-                               language: Optional[str] = None, start_type: str = 'cold'):
-        """
-        Create bar chart showing overhead breakdown (client_time, provider_time, exec_time).
-        
-        Args:
-            df: DataFrame with benchmark results
-            benchmark: Optional benchmark name to filter by
-            language: Optional language to filter by
-            start_type: 'cold' or 'warm' to filter by startup type
-        """
-        # Filter data
-        if benchmark:
-            df = df[df['benchmark'] == benchmark]
-        if language:
-            df = df[df['language'] == language]
-        
-        df = df[df['success'] == True].copy()
-        
-        # Check if we have the required timing breakdown data
-        # This assumes metrics might have these fields
-        if df.empty:
-            self.logger.warning(f"No data for overhead breakdown")
-            return
-        
-        # Expand the data to get individual measurements
-        rows = []
-        for _, row in df.iterrows():
-            if 'execution_times' in row and row['execution_times']:
-                exec_times = row['execution_times']
-                cold_count = row.get('cold_starts', 0)
-                
-                for i, exec_time_ms in enumerate(exec_times):
-                    is_cold = i < cold_count
-                    if (start_type == 'cold' and is_cold) or (start_type == 'warm' and not is_cold):
-                        # For now, we'll use execution time as a proxy
-                        # In a real scenario, you'd have client_time, provider_time, exec_time separately
-                        rows.append({
-                            'memory': row['memory_mb'],
-                            'client_time_ms': exec_time_ms,  # Would be actual client_time
-                            'provider_time_ms': exec_time_ms * 0.8,  # Placeholder - provider overhead
-                            'exec_time_ms': exec_time_ms * 0.7  # Placeholder - actual execution
-                        })
-        
-        if not rows:
-            self.logger.warning(f"No {start_type} start data for overhead breakdown")
-            return
-        
-        breakdown_df = pd.DataFrame(rows)
-        
-        # Melt for seaborn
-        melted = breakdown_df.melt(
-            id_vars=['memory'],
-            value_vars=['client_time_ms', 'provider_time_ms', 'exec_time_ms'],
-            var_name='Metric',
-            value_name='Time'
-        )
-        
-        plt.figure(figsize=(10, 6))
-        sns.barplot(x="memory", y="Time", hue="Metric", data=melted, errorbar='sd', palette="muted")
-        
-        # Build title
-        title_parts = []
-        if benchmark:
-            title_parts.append(benchmark)
-        if language:
-            title_parts.append(language.title())
-        title_parts.append(f"Overhead ({start_type.title()} Start)")
-        
-        plt.title(' '.join(title_parts), fontweight='bold')
-        plt.ylabel("Time (ms)")
-        plt.xlabel("Memory (MB)")
-        plt.tight_layout()
-        
-        # Create filename
-        filename_parts = [f"{start_type}_overhead"]
-        if benchmark:
-            filename_parts.append(benchmark.replace(".", "_"))
-        if language:
-            filename_parts.append(language)
-        filename = "_".join(filename_parts) + ".png"
-        
-        filepath = self.output_dir / filename
-        plt.savefig(filepath, dpi=300, bbox_inches='tight')
-        self.logger.info(f"Saved: {filepath}")
-        plt.close()
-    
-    def plot_memory_usage_distribution(self, df: pd.DataFrame, benchmark: Optional[str] = None,
-                                      language: Optional[str] = None):
-        """
-        Create boxplot showing memory usage distribution across memory configurations.
-        
-        Args:
-            df: DataFrame with benchmark results
-            benchmark: Optional benchmark name to filter by
-            language: Optional language to filter by
-        """
-        # Filter data
-        if benchmark:
-            df = df[df['benchmark'] == benchmark]
-        if language:
-            df = df[df['language'] == language]
-        
-        df = df[df['success'] == True].copy()
-        
-        # Check if we have memory usage data in metrics
-        if df.empty:
-            self.logger.warning("No data for memory usage distribution")
-            return
-        
-        # Try to extract memory usage from metrics if available
-        rows = []
-        for _, row in df.iterrows():
-            if 'metrics' in row:
-                metrics = row['metrics']
-                if isinstance(metrics, dict) and 'memory_used_mb' in metrics:
-                    rows.append({
-                        'memory': row['memory_mb'],
-                        'mem_used': metrics['memory_used_mb']
-                    })
-        
-        if not rows:
-            self.logger.warning("No memory usage data available in metrics")
-            return
-        
-        mem_df = pd.DataFrame(rows)
-        
-        plt.figure(figsize=(8, 6))
-        sns.boxplot(x="memory", y="mem_used", data=mem_df, color="lightgreen")
-        
-        if not mem_df.empty and mem_df['mem_used'].max() > 0:
-            plt.ylim(0, mem_df['mem_used'].max() * 1.2)
-        
-        # Build title
-        title_parts = []
-        if benchmark:
-            title_parts.append(benchmark)
-        if language:
-            title_parts.append(language.title())
-        title_parts.append("Memory Usage Distribution")
-        
-        plt.title(' '.join(title_parts), fontweight='bold')
-        plt.ylabel("Used Memory (MB)")
-        plt.xlabel("Allocated Memory (MB)")
-        plt.tight_layout()
-        
-        # Create filename
-        filename_parts = ["memory_usage"]
-        if benchmark:
-            filename_parts.append(benchmark.replace(".", "_"))
-        if language:
-            filename_parts.append(language)
-        filename = "_".join(filename_parts) + ".png"
-        
-        filepath = self.output_dir / filename
-        plt.savefig(filepath, dpi=300, bbox_inches='tight')
-        self.logger.info(f"Saved: {filepath}")
-        plt.close()
-    
-    def plot_heatmap(self, df: pd.DataFrame, metric: str = 'avg_time_ms'):
-        """
-        Create heatmap showing performance across platforms and languages.
-        
-        Args:
-            df: DataFrame with benchmark results
-            metric: Metric to visualize
-        """
-        # Filter successful runs only
-        df = df[df['success'] == True].copy()
-        
-        if df.empty or metric not in df.columns:
-            self.logger.warning(f"No data available for heatmap with metric: {metric}")
-            return
-        
-        # Drop rows where the metric is NaN
-        df = df.dropna(subset=[metric])
-        
-        if df.empty:
-            self.logger.warning(f"No valid numeric data for heatmap with metric: {metric}")
-            return
-        
-        # Aggregate by platform and language
-        pivot = df.groupby(['platform', 'language'])[metric].mean().reset_index()
-        pivot_table = pivot.pivot(index='platform', columns='language', values=metric)
-        
-        if pivot_table.empty or pivot_table.isna().all().all():
-            self.logger.warning("No data for heatmap")
-            return
-        
-        fig, ax = plt.subplots(figsize=(10, 6))
-        
-        # Only annotate if we have valid numeric data
-        try:
-            sns.heatmap(
-                pivot_table,
-                annot=True,
-                fmt='.2f',
-                cmap='YlOrRd',
-                ax=ax,
-                cbar_kws={'label': 'Avg Execution Time (ms)'},
-                mask=pivot_table.isna()  # Mask NaN values
-            )
-            
-            ax.set_title(f'Performance Heatmap - {metric.replace("_", " ").title()}')
-            ax.set_xlabel('Language')
-            ax.set_ylabel('Platform')
-            
-            plt.tight_layout()
-            
-            filepath = self.output_dir / f"heatmap_{metric}.png"
-            plt.savefig(filepath, dpi=300, bbox_inches='tight')
-            self.logger.info(f"Saved: {filepath}")
-        except (ValueError, TypeError) as e:
-            self.logger.warning(f"Could not generate heatmap: {e}")
-        finally:
-            plt.close()
-    
-    def plot_version_comparison(self, df: pd.DataFrame, language: str):
-        """
-        Compare different versions of the same language.
-        
-        Args:
-            df: DataFrame with benchmark results
-            language: Language to compare versions for
-        """
-        df = df[df['language'] == language]
-        df = df[df['success'] == True].copy()
-        
-        if df.empty or df['version'].nunique() < 2:
-            self.logger.warning(f"Insufficient version data for {language}")
-            return
-        
-        fig, ax = plt.subplots(figsize=(12, 6))
-        
-        # Group by version and platform
-        grouped = df.groupby(['version', 'platform'])['avg_time_ms'].mean().reset_index()
-        pivot = grouped.pivot(index='version', columns='platform', values='avg_time_ms')
-        
-        pivot.plot(kind='bar', ax=ax, color=[PLATFORM_COLORS.get(p, '#888888') for p in pivot.columns])
-        
-        ax.set_xlabel(f'{language.title()} Version')
-        ax.set_ylabel('Average Execution Time (ms)')
-        ax.set_title(f'{language.title()} Version Performance Comparison')
-        ax.legend(title='Platform')
-        ax.grid(axis='y', alpha=0.3)
-        
-        plt.xticks(rotation=0)
-        plt.tight_layout()
-        
-        filepath = self.output_dir / f"version_comparison_{language}.png"
-        plt.savefig(filepath, dpi=300, bbox_inches='tight')
-        self.logger.info(f"Saved: {filepath}")
-        plt.close()
-    
-    def create_summary_report(self, df: pd.DataFrame):
-        """
-        Create a text summary report of the benchmark results.
-        
-        Args:
-            df: DataFrame with benchmark results
-        """
-        report_lines = []
-        report_lines.append("="*80)
-        report_lines.append("BENCHMARK COMPARISON SUMMARY REPORT")
-        report_lines.append("="*80)
-        report_lines.append("")
-        
-        # Metadata (only for JSON results, not CSV)
-        if self.results:
-            metadata = self.results.get('metadata', {})
-            report_lines.append(f"Start Time: {metadata.get('start_time', 'N/A')}")
-            report_lines.append(f"End Time: {metadata.get('end_time', 'N/A')}")
-            report_lines.append("")
-            
-            # Summary statistics
-            if 'summary' in metadata:
-                summary = metadata['summary']
-                report_lines.append("Overall Statistics:")
-                report_lines.append(f"  Total Runs: {summary.get('total_runs', 0)}")
-                report_lines.append(f"  Successful: {summary.get('successful', 0)}")
-                report_lines.append(f"  Failed: {summary.get('failed', 0)}")
-                report_lines.append(f"  Success Rate: {summary.get('success_rate', 'N/A')}")
-                report_lines.append("")
-        
-        # Successful runs only (filter by success column if it exists)
-        if 'success' in df.columns:
-            df_success = df[df['success'] == True].copy()
-        else:
-            df_success = df.copy()
-        
-        if not df_success.empty and 'avg_time_ms' in df_success.columns:
-            if 'platform' in df_success.columns:
-                report_lines.append("Performance by Platform:")
-                for platform in sorted(df_success['platform'].unique()):
-                    platform_df = df_success[df_success['platform'] == platform]
-                    avg_time = platform_df['avg_time_ms'].mean()
-                    report_lines.append(f"  {platform.upper()}: {avg_time:.2f} ms (avg)")
-                report_lines.append("")
-            
-            if 'language' in df_success.columns:
-                report_lines.append("Performance by Language:")
-                for language in sorted(df_success['language'].unique()):
-                    lang_df = df_success[df_success['language'] == language]
-                    avg_time = lang_df['avg_time_ms'].mean()
-                    report_lines.append(f"  {language}: {avg_time:.2f} ms (avg)")
-                report_lines.append("")
-            
-            # Best performers
-            if 'language' in df_success.columns and 'platform' in df_success.columns:
-                report_lines.append("Best Performers:")
-                # Check if we have valid data
-                if not df_success['avg_time_ms'].isna().all():
-                    best_overall = df_success.loc[df_success['avg_time_ms'].idxmin()]
-                    report_lines.append(
-                        f"  Fastest Overall: {best_overall['language']} on {best_overall['platform']} "
-                        f"({best_overall['avg_time_ms']:.2f} ms)"
-                    )
-                else:
-                    report_lines.append("  No valid performance data available")
-                
-                for platform in df_success['platform'].unique():
-                    platform_df = df_success[df_success['platform'] == platform]
-                    if not platform_df.empty and not platform_df['avg_time_ms'].isna().all():
-                        best = platform_df.loc[platform_df['avg_time_ms'].idxmin()]
-                        version_str = f" v{best['version']}" if 'version' in best else ""
-                        report_lines.append(
-                            f"  Fastest on {platform}: {best['language']}{version_str} "
-                            f"({best['avg_time_ms']:.2f} ms)"
-                        )
-                report_lines.append("")
-        
-        report_lines.append("="*80)
-        
-        # Write report
-        report_text = "\n".join(report_lines)
-        filepath = self.output_dir / "summary_report.txt"
-        with open(filepath, 'w') as f:
-            f.write(report_text)
-        
-        self.logger.info(f"Saved: {filepath}")
-        print("\n" + report_text)
-    
-    def plot_sebs_cold_warm_comparison(self, df: pd.DataFrame, benchmark_name: str = "", 
-                                       language_name: str = "", title_comment: str = ""):
-        """
-        Create SeBS-style side-by-side cold vs warm comparison from CSV data.
-        This matches the friends' plotting style exactly.
-        
-        Args:
-            df: DataFrame from SeBS CSV (must have 'type', 'memory', 'client_time_ms' columns)
-            benchmark_name: Name of benchmark for title
-            language_name: Language name for title
-            title_comment: Additional comment for title
-        """
-        if not self.is_csv:
-            self.logger.warning("This plot type requires CSV input from SeBS perf-cost")
-            return
-        
-        fig, axes = plt.subplots(1, 2, figsize=(12, 5), sharey=False)
-        
-        title_parts = [benchmark_name, language_name, title_comment, "Runtime Performance: Cold vs Warm"]
-        title = ' '.join([p for p in title_parts if p])
-        fig.suptitle(title, fontsize=14, fontweight='bold')
-        
-        # Cold Start Plot
-        cold_data = df[df['type'] == 'cold']
-        if not cold_data.empty:
-            sns.boxplot(ax=axes[0], x="memory", y="client_time_ms", data=cold_data, color="skyblue")
-            axes[0].set_title("Cold Start Latency")
-            axes[0].set_ylabel("Time (ms)")
-            axes[0].set_xlabel("Memory (MB)")
-        
-        # Warm Execution Plot
-        warm_data = df[df['type'] == 'warm']
-        if not warm_data.empty:
-            sns.boxplot(ax=axes[1], x="memory", y="client_time_ms", data=warm_data, color="orange")
-            axes[1].set_title("Warm Execution Latency")
-            axes[1].set_ylabel("Time (ms)")
-            axes[1].set_xlabel("Memory (MB)")
-        
-        plt.tight_layout()
-        
-        filepath = self.output_dir / "sebs_cold_warm_comparison.png"
-        plt.savefig(filepath, dpi=300, bbox_inches='tight')
-        self.logger.info(f"Saved: {filepath}")
-        plt.close()
-    
-    def plot_sebs_overhead_breakdown(self, df: pd.DataFrame, start_type: str = 'cold',
-                                     benchmark_name: str = "", language_name: str = "", 
-                                     title_comment: str = ""):
-        """
-        Create SeBS-style overhead breakdown showing client/provider/exec times.
-        
-        Args:
-            df: DataFrame from SeBS CSV
-            start_type: 'cold' or 'warm'
-            benchmark_name: Name of benchmark for title
-            language_name: Language name for title
-            title_comment: Additional comment for title
-        """
-        if not self.is_csv:
-            self.logger.warning("This plot type requires CSV input from SeBS perf-cost")
-            return
-        
-        # Filter by type
-        filtered_df = df[df['type'] == start_type].copy()
-        
-        if filtered_df.empty:
-            self.logger.warning(f"No {start_type} data for overhead breakdown")
-            return
-        
-        # For cold starts, filter out entries with no provider time
-        if start_type == 'cold':
-            filtered_df = filtered_df[filtered_df['provider_time'] > 0]
-        
-        # Melt for seaborn
-        melted = filtered_df.melt(
-            id_vars=['memory'],
-            value_vars=['client_time_ms', 'provider_time_ms', 'exec_time_ms'],
-            var_name='Metric',
-            value_name='Time'
-        )
-        
-        plt.figure(figsize=(10, 6))
-        sns.barplot(x="memory", y="Time", hue="Metric", data=melted, errorbar='sd', palette="muted")
-        
-        title_parts = [benchmark_name, language_name, title_comment, 
-                      f"Overhead ({start_type.title()} Start)"]
-        title = ' '.join([p for p in title_parts if p])
-        
-        plt.title(title, fontweight='bold')
-        plt.ylabel("Time (ms)")
-        plt.xlabel("Memory (MB)")
-        plt.tight_layout()
-        
-        filepath = self.output_dir / f"sebs_{start_type}_overhead.png"
-        plt.savefig(filepath, dpi=300, bbox_inches='tight')
-        self.logger.info(f"Saved: {filepath}")
-        plt.close()
-    
-    def plot_sebs_memory_usage(self, df: pd.DataFrame, benchmark_name: str = "",
-                               language_name: str = "", title_comment: str = ""):
-        """
-        Create SeBS-style memory usage distribution plot.
-        
-        Args:
-            df: DataFrame from SeBS CSV
-            benchmark_name: Name of benchmark for title
-            language_name: Language name for title  
-            title_comment: Additional comment for title
-        """
-        if not self.is_csv:
-            self.logger.warning("This plot type requires CSV input from SeBS perf-cost")
-            return
-        
-        plt.figure(figsize=(8, 6))
-        sns.boxplot(x="memory", y="mem_used", data=df, color="lightgreen")
-        
-        if not df.empty and df['mem_used'].max() > 0:
-            plt.ylim(0, df['mem_used'].max() * 1.2)
-        
-        title_parts = [benchmark_name, language_name, title_comment, "Memory Usage Distribution"]
-        title = ' '.join([p for p in title_parts if p])
-        
-        plt.title(title, fontweight='bold')
-        plt.ylabel("Used Memory (MB)")
-        plt.xlabel("Allocated Memory (MB)")
-        plt.tight_layout()
-        
-        filepath = self.output_dir / "sebs_memory_usage.png"
-        plt.savefig(filepath, dpi=300, bbox_inches='tight')
-        self.logger.info(f"Saved: {filepath}")
-        plt.close()
-    
-    def create_sebs_plots(self, benchmark_name: str = "", language_name: str = "", 
-                         title_comment: str = ""):
-        """
-        Generate all SeBS-style plots from CSV data.
-        This creates plots matching your friends' style.
-        
-        Args:
-            benchmark_name: Name of benchmark (e.g., "110.html")
-            language_name: Language name (e.g., "Python 3.11")
-            title_comment: Additional comment (e.g., "codepackage", "-O2")
-        """
-        if not self.is_csv:
-            self.logger.error("SeBS plots require CSV input. Use JSON input for standard plots.")
-            return
-        
-        self.logger.info("Generating SeBS-style plots...")
-        
-        df = self.extract_dataframe()
-        
-        if df.empty:
-            self.logger.error("No data to plot!")
-            return
-        
-        # Cold vs Warm comparison
-        self.plot_sebs_cold_warm_comparison(df, benchmark_name, language_name, title_comment)
-        
-        # Overhead breakdowns
-        self.plot_sebs_overhead_breakdown(df, 'cold', benchmark_name, language_name, title_comment)
-        self.plot_sebs_overhead_breakdown(df, 'warm', benchmark_name, language_name, title_comment)
-        
-        # Memory usage
-        self.plot_sebs_memory_usage(df, benchmark_name, language_name, title_comment)
-        
-        self.logger.info(f"\n✓ All SeBS plots generated in: {self.output_dir}")
-    
-    def create_all_plots(self):
-        """Generate all available plots from the benchmark results."""
-        self.logger.info("Generating all plots...")
-        
-        df = self.extract_dataframe()
-        
-        if df.empty:
-            self.logger.error("No data to plot!")
-            return
-        
-        plot_count = 0
-        error_count = 0
-        
-        # Create summary report
-        try:
-            self.create_summary_report(df)
-            plot_count += 1
-        except Exception as e:
-            self.logger.warning(f"Could not create summary report: {e}")
-            error_count += 1
-        
-        # Language comparison
-        try:
-            self.plot_language_comparison(df)
-            plot_count += 1
-        except Exception as e:
-            self.logger.warning(f"Could not create language comparison: {e}")
-            error_count += 1
-        
-        # Memory scaling
-        if df['memory_mb'].nunique() > 1:
-            try:
-                self.plot_memory_scaling(df)
-                plot_count += 1
-            except Exception as e:
-                self.logger.warning(f"Could not create memory scaling plot: {e}")
-                error_count += 1
-        
-        # Cold vs warm starts (original stacked bar)
-        if 'cold_starts' in df.columns and 'warm_starts' in df.columns:
-            try:
-                self.plot_cold_vs_warm(df)
-                plot_count += 1
-            except Exception as e:
-                self.logger.warning(f"Could not create cold vs warm plot: {e}")
-                error_count += 1
-        
-        # Heatmap
-        try:
-            self.plot_heatmap(df)
-            plot_count += 1
-        except Exception as e:
-            self.logger.warning(f"Could not create heatmap: {e}")
-            error_count += 1
-        
-        # Version comparisons for each language
-        for language in df['language'].unique():
-            if df[df['language'] == language]['version'].nunique() > 1:
-                try:
-                    self.plot_version_comparison(df, language)
-                    plot_count += 1
-                except Exception as e:
-                    self.logger.warning(f"Could not create version comparison for {language}: {e}")
-                    error_count += 1
-        
-        # Per-benchmark plots
-        for benchmark in df['benchmark'].unique():
-            try:
-                self.plot_language_comparison(df, benchmark=benchmark)
-                plot_count += 1
-            except Exception as e:
-                self.logger.warning(f"Could not create language comparison for {benchmark}: {e}")
-                error_count += 1
-            
-            # New enhanced plots per benchmark and language
-            for language in df[df['benchmark'] == benchmark]['language'].unique():
-                lang_data = df[(df['benchmark'] == benchmark) & (df['language'] == language)]
-                
-                # Cold/Warm boxplot comparison
-                for version in lang_data['version'].unique():
-                    try:
-                        self.plot_cold_warm_comparison_boxplot(df, benchmark, language, version)
-                        plot_count += 1
-                    except Exception as e:
-                        self.logger.debug(f"Could not create boxplot for {benchmark}/{language}/{version}: {e}")
-                        error_count += 1
-                
-                # Overhead breakdowns
-                try:
-                    self.plot_overhead_breakdown(df, benchmark, language, start_type='cold')
-                    plot_count += 1
-                except Exception as e:
-                    self.logger.debug(f"Could not create cold overhead for {benchmark}/{language}: {e}")
-                    error_count += 1
-                    
-                try:
-                    self.plot_overhead_breakdown(df, benchmark, language, start_type='warm')
-                    plot_count += 1
-                except Exception as e:
-                    self.logger.debug(f"Could not create warm overhead for {benchmark}/{language}: {e}")
-                    error_count += 1
-                
-                # Memory usage
-                try:
-                    self.plot_memory_usage_distribution(df, benchmark, language)
-                    plot_count += 1
-                except Exception as e:
-                    self.logger.debug(f"Could not create memory usage for {benchmark}/{language}: {e}")
-                    error_count += 1
-            
-        self.logger.info(f"\n✓ Generated {plot_count} plots in: {self.output_dir}")
-        if error_count > 0:
-            self.logger.info(f"  ({error_count} plots skipped due to insufficient data)")
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description='Visualize cross-platform benchmark comparison results',
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-        epilog="""
-Examples:
-  # Generate all plots from JSON (cross_platform_benchmark)
-  %(prog)s results/comparison_20241212/comparison_results.json
-
-  # Generate SeBS-style plots from CSV (perf-cost result)
-  %(prog)s measurements/110-html/perf-cost/result.csv --plot-type sebs --benchmark-name "110.html" --language-name "Python 3.11" --title-comment "codepackage"
-
-  # Specify output directory
-  %(prog)s results/comparison_results.json --output plots/
-
-  # Generate specific plot types
-  %(prog)s results.json --plot-type language_comparison platform_comparison
-        """
-    )
-    
-    parser.add_argument('results_file',
-                        help='Path to comparison_results.json or result.csv file')
-    parser.add_argument('--output', '-o',
-                        help='Output directory for plots (default: results_dir/plots)')
-    parser.add_argument('--plot-type', nargs='+',
-                        choices=['language_comparison', 'platform_comparison', 
-                                'memory_scaling', 'cold_warm', 'cold_warm_boxplot', 
-                                'overhead', 'memory_usage', 'heatmap', 'versions', 'all', 'sebs'],
-                        default=['all'],
-                        help='Types of plots to generate (default: all, use sebs for CSV input)')
-    parser.add_argument('--language', '-l',
-                        help='Filter by specific language')
-    parser.add_argument('--benchmark', '-b',
-                        help='Filter by specific benchmark')
-    parser.add_argument('--format', choices=['png', 'pdf', 'svg'], default='png',
-                        help='Output format for plots (default: png)')
-    
-    # SeBS-specific arguments
-    parser.add_argument('--benchmark-name',
-                        help='Benchmark name for SeBS plots (e.g., "110.html")')
-    parser.add_argument('--language-name', 
-                        help='Language name for SeBS plots (e.g., "Python 3.11")')
-    parser.add_argument('--title-comment',
-                        help='Additional title comment for SeBS plots (e.g., "codepackage", "-O2")')
-    
-    args = parser.parse_args()
-    
-    try:
-        visualizer = BenchmarkVisualizer(args.results_file, args.output)
-        
-        # Handle CSV files (SeBS perf-cost format)
-        if visualizer.is_csv:
-            if 'sebs' in args.plot_type or 'all' in args.plot_type:
-                visualizer.create_sebs_plots(
-                    benchmark_name=args.benchmark_name or "",
-                    language_name=args.language_name or "",
-                    title_comment=args.title_comment or ""
-                )
-            else:
-                print("CSV input detected. Use --plot-type sebs to generate SeBS-style plots.")
-                print("Example: python plot_comparison.py result.csv --plot-type sebs --benchmark-name '110.html' --language-name 'Python 3.11'")
-                return 1
-        
-        # Handle JSON files (cross_platform_benchmark format)
-        elif 'all' in args.plot_type:
-            visualizer.create_all_plots()
-        else:
-            df = visualizer.extract_dataframe()
-            
-            if 'language_comparison' in args.plot_type:
-                visualizer.plot_language_comparison(df, benchmark=args.benchmark)
-            
-            if 'platform_comparison' in args.plot_type:
-                visualizer.plot_platform_comparison(df, language=args.language)
-            
-            if 'memory_scaling' in args.plot_type:
-                visualizer.plot_memory_scaling(df, benchmark=args.benchmark)
-            
-            if 'cold_warm' in args.plot_type:
-                visualizer.plot_cold_vs_warm(df)
-            
-            if 'cold_warm_boxplot' in args.plot_type:
-                visualizer.plot_cold_warm_comparison_boxplot(
-                    df, benchmark=args.benchmark, language=args.language
-                )
-            
-            if 'overhead' in args.plot_type:
-                visualizer.plot_overhead_breakdown(
-                    df, benchmark=args.benchmark, language=args.language, start_type='cold'
-                )
-                visualizer.plot_overhead_breakdown(
-                    df, benchmark=args.benchmark, language=args.language, start_type='warm'
-                )
-            
-            if 'memory_usage' in args.plot_type:
-                visualizer.plot_memory_usage_distribution(
-                    df, benchmark=args.benchmark, language=args.language
-                )
-            
-            if 'heatmap' in args.plot_type:
-                visualizer.plot_heatmap(df)
-            
-            if 'versions' in args.plot_type and args.language:
-                visualizer.plot_version_comparison(df, args.language)
-            
-            visualizer.create_summary_report(df)
-        
-        print(f"\n✓ Visualization complete! Plots saved to: {visualizer.output_dir}")
-        return 0
-        
-    except FileNotFoundError:
-        print(f"Error: Results file not found: {args.results_file}")
-        return 1
-    except json.JSONDecodeError:
-        print(f"Error: Invalid JSON in results file: {args.results_file}")
-        return 1
-    except Exception as e:
-        print(f"Error: {e}")
-        import traceback
-        traceback.print_exc()
-        return 1
-
-
-if __name__ == '__main__':
-    sys.exit(main())
-
diff --git a/scripts/run_comparison.sh b/scripts/run_comparison.sh
deleted file mode 100755
index 1fde995e0..000000000
--- a/scripts/run_comparison.sh
+++ /dev/null
@@ -1,253 +0,0 @@
-#!/bin/bash
-
-# Convenience wrapper for running benchmark comparisons and generating plots
-
-set -e
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
-
-# Colors for output
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-NC='\033[0m' # No Color
-
-# Default values
-BENCHMARKS="010.sleep"
-PLATFORMS="aws"
-LANGUAGES="python"
-CONFIG="${PROJECT_ROOT}/config/example.json"
-OUTPUT_DIR="${PROJECT_ROOT}/results/comparison_$(date +%Y%m%d_%H%M%S)"
-REPETITIONS=5
-MEMORY="256"
-INPUT_SIZE="test"
-ARCHITECTURE="x64"
-GENERATE_PLOTS=true
-CONTAINER_DEPLOYMENT=false
-CONTAINER_DEPLOYMENT_FOR=""
-
-# Print usage
-usage() {
-    cat << EOF
-Usage: $(basename "$0") [OPTIONS]
-
-Run cross-platform benchmark comparisons and generate plots.
-
-Options:
-    -b, --benchmarks NAMES      Benchmark names (space-separated, default: 010.sleep)
-    -p, --platforms PLATFORMS   Platforms to test (space-separated, default: aws)
-                                Available: aws azure gcp local
-    -l, --languages LANGUAGES   Languages to test (space-separated, default: python)
-                                Available: python nodejs rust java pypy
-    -c, --config FILE           Configuration file (default: config/example.json)
-    -o, --output DIR            Output directory (default: results/comparison_TIMESTAMP)
-    -r, --repetitions NUM       Number of repetitions (default: 5)
-    -m, --memory SIZES          Memory sizes in MB (space-separated, default: 256)
-    -i, --input-size SIZE       Input size: test, small, large (default: test)
-    -a, --architecture ARCH     Architecture: x64, arm64 (default: x64)
-    --container-deployment      Run functions as containers (all platforms)
-    --container-deployment-for  Platforms to use container deployment (space-separated)
-                                Example: --container-deployment-for "aws gcp"
-    --no-plots                  Skip plot generation
-    --skip-benchmark            Skip benchmark run, only generate plots
-    -h, --help                  Show this help message
-
-Examples:
-    # Compare Python and Node.js on AWS and Azure
-    $(basename "$0") -b "010.sleep 110.dynamic-html" -p "aws azure" -l "python nodejs"
-
-    # Compare AWS (container) vs Azure (package deployment)
-    $(basename "$0") -b "010.sleep" -p "aws azure" -l "python" --container-deployment-for "aws"
-
-    # Test different memory configurations
-    $(basename "$0") -b "501.graph-pagerank" -m "512 1024 2048" -r 10
-
-    # Just generate plots from existing results
-    $(basename "$0") --skip-benchmark -o results/comparison_20241212_120000
-
-EOF
-}
-
-# Parse arguments
-SKIP_BENCHMARK=false
-while [[ $# -gt 0 ]]; do
-    case $1 in
-        -b|--benchmarks)
-            BENCHMARKS="$2"
-            shift 2
-            ;;
-        -p|--platforms)
-            PLATFORMS="$2"
-            shift 2
-            ;;
-        -l|--languages)
-            LANGUAGES="$2"
-            shift 2
-            ;;
-        -c|--config)
-            CONFIG="$2"
-            shift 2
-            ;;
-        -o|--output)
-            OUTPUT_DIR="$2"
-            shift 2
-            ;;
-        -r|--repetitions)
-            REPETITIONS="$2"
-            shift 2
-            ;;
-        -m|--memory)
-            MEMORY="$2"
-            shift 2
-            ;;
-        -i|--input-size)
-            INPUT_SIZE="$2"
-            shift 2
-            ;;
-        -a|--architecture)
-            ARCHITECTURE="$2"
-            shift 2
-            ;;
-        --container-deployment)
-            CONTAINER_DEPLOYMENT=true
-            shift
-            ;;
-        --container-deployment-for)
-            CONTAINER_DEPLOYMENT_FOR="$2"
-            shift 2
-            ;;
-        --no-plots)
-            GENERATE_PLOTS=false
-            shift
-            ;;
-        --skip-benchmark)
-            SKIP_BENCHMARK=true
-            shift
-            ;;
-        -h|--help)
-            usage
-            exit 0
-            ;;
-        *)
-            echo -e "${RED}Error: Unknown option $1${NC}"
-            usage
-            exit 1
-            ;;
-    esac
-done
-
-echo "=================================="
-echo "Benchmark Comparison Tool"
-echo "=================================="
-echo ""
-
-# Check if config exists
-if [ ! -f "$CONFIG" ]; then
-    echo -e "${RED}Error: Configuration file not found: $CONFIG${NC}"
-    exit 1
-fi
-
-# Run benchmarks unless skipped
-if [ "$SKIP_BENCHMARK" = false ]; then
-    echo -e "${GREEN}Step 1: Running Benchmarks${NC}"
-    echo "  Benchmarks: $BENCHMARKS"
-    echo "  Platforms: $PLATFORMS"
-    echo "  Languages: $LANGUAGES"
-    echo "  Repetitions: $REPETITIONS"
-    echo "  Memory: $MEMORY MB"
-    echo "  Input Size: $INPUT_SIZE"
-    echo "  Architecture: $ARCHITECTURE"
-    echo "  Container Deployment: $CONTAINER_DEPLOYMENT"
-    if [ -n "$CONTAINER_DEPLOYMENT_FOR" ]; then
-        echo "  Container Deployment For: $CONTAINER_DEPLOYMENT_FOR"
-    fi
-    echo "  Output: $OUTPUT_DIR"
-    echo ""
-    
-    # Build command
-    CMD=(
-        python3 "${SCRIPT_DIR}/cross_platform_benchmark.py"
-        --benchmarks $BENCHMARKS
-        --platforms $PLATFORMS
-        --languages $LANGUAGES
-        --config "$CONFIG"
-        --output "$OUTPUT_DIR"
-        --repetitions "$REPETITIONS"
-        --memory $MEMORY
-        --input-size "$INPUT_SIZE"
-        --architecture "$ARCHITECTURE"
-        --verbose
-    )
-
-    if [ "$CONTAINER_DEPLOYMENT" = true ]; then
-        CMD+=(--container-deployment)
-    fi
-    
-    if [ -n "$CONTAINER_DEPLOYMENT_FOR" ]; then
-        CMD+=(--container-deployment-for $CONTAINER_DEPLOYMENT_FOR)
-    fi
-    
-    # Add --plot flag if plots are enabled (uses integrated plotting)
-    if [ "$GENERATE_PLOTS" = true ]; then
-        CMD+=(--plot)
-    fi
-    
-    echo "Running: ${CMD[@]}"
-    echo ""
-    
-    if "${CMD[@]}"; then
-        echo -e "${GREEN}✓ Benchmarks completed successfully!${NC}"
-    else
-        echo -e "${RED}✗ Benchmark execution failed!${NC}"
-        exit 1
-    fi
-else
-    echo -e "${YELLOW}Skipping benchmark execution${NC}"
-    
-    # Check if results file exists
-    if [ ! -f "$OUTPUT_DIR/comparison_results.json" ]; then
-        echo -e "${RED}Error: Results file not found: $OUTPUT_DIR/comparison_results.json${NC}"
-        exit 1
-    fi
-    
-    # Generate plots from existing results if requested
-    if [ "$GENERATE_PLOTS" = true ]; then
-        echo ""
-        echo -e "${GREEN}Generating Plots from Existing Results${NC}"
-        echo ""
-        
-        PLOT_CMD=(
-            python3 "${SCRIPT_DIR}/plot_comparison.py"
-            "$OUTPUT_DIR/comparison_results.json"
-            --output "$OUTPUT_DIR/plots"
-        )
-        
-        if "${PLOT_CMD[@]}"; then
-            echo -e "${GREEN}✓ Plots generated successfully!${NC}"
-        else
-            echo -e "${YELLOW}⚠ Plot generation failed (may need matplotlib/seaborn)${NC}"
-        fi
-    fi
-fi
-
-echo ""
-echo "=================================="
-echo -e "${GREEN}Comparison Complete!${NC}"
-echo "=================================="
-echo ""
-echo "Results Location: $OUTPUT_DIR"
-echo "  - comparison_results.json  (raw results)"
-echo "  - benchmark_run.log        (execution log)"
-if [ "$GENERATE_PLOTS" = true ]; then
-    echo "  - plots/                   (visualizations)"
-fi
-echo ""
-echo "Useful commands:"
-echo "  # Regenerate plots"
-echo "  python3 ${SCRIPT_DIR}/plot_comparison.py $OUTPUT_DIR/comparison_results.json"
-echo ""
-echo "  # Regenerate with specific plot types"
-echo "  python3 ${SCRIPT_DIR}/plot_comparison.py $OUTPUT_DIR/comparison_results.json --plot-type cold_warm_boxplot memory_scaling"
-echo ""
-
diff --git a/scripts/run_sebs_with_plots.sh b/scripts/run_sebs_with_plots.sh
deleted file mode 100755
index c5e04c8d6..000000000
--- a/scripts/run_sebs_with_plots.sh
+++ /dev/null
@@ -1,218 +0,0 @@
-#!/bin/bash
-# Wrapper script to run SeBS perf-cost experiment and generate plots in one command
-
-set -e  # Exit on error
-
-# Color output
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-NC='\033[0m' # No Color
-
-usage() {
-    cat << EOF
-Usage: $0 [OPTIONS]
-
-Run SeBS perf-cost experiment and automatically generate plots.
-
-Required Options:
-    --config CONFIG           Path to SeBS config file
-    --deployment PLATFORM     Deployment platform (aws, azure, gcp)
-    --output-dir DIR          Output directory for measurements
-
-Optional Options:
-    --benchmark-name NAME     Benchmark name for plot titles (e.g., "110.html")
-    --language-name LANG      Language name for plot titles (e.g., "Python 3.11")
-    --title-comment TEXT      Additional comment for titles (e.g., "codepackage", "-O2")
-    --skip-invoke            Skip the invoke step (only process existing data)
-    --skip-process           Skip the process step (only invoke)
-    --update-code            Update function code before running
-    --help                   Show this help message
-
-Examples:
-    # Run full workflow (invoke + process + plot)
-    $0 --config config/aws_110_html_python_128_512_2048.json \\
-       --deployment aws \\
-       --output-dir measurements/110-html-python \\
-       --benchmark-name "110.html" \\
-       --language-name "Python 3.11" \\
-       --title-comment "codepackage"
-
-    # Just process existing data and plot
-    $0 --config config/aws_110_html_cpp.json \\
-       --deployment aws \\
-       --output-dir measurements/110-html-cpp \\
-       --benchmark-name "110.html" \\
-       --language-name "C++ -O2" \\
-       --skip-invoke
-
-EOF
-    exit 1
-}
-
-# Default values
-SKIP_INVOKE=false
-SKIP_PROCESS=false
-UPDATE_CODE=""
-BENCHMARK_NAME=""
-LANGUAGE_NAME=""
-TITLE_COMMENT=""
-
-# Parse arguments
-while [[ $# -gt 0 ]]; do
-    case $1 in
-        --config)
-            CONFIG="$2"
-            shift 2
-            ;;
-        --deployment)
-            DEPLOYMENT="$2"
-            shift 2
-            ;;
-        --output-dir)
-            OUTPUT_DIR="$2"
-            shift 2
-            ;;
-        --benchmark-name)
-            BENCHMARK_NAME="$2"
-            shift 2
-            ;;
-        --language-name)
-            LANGUAGE_NAME="$2"
-            shift 2
-            ;;
-        --title-comment)
-            TITLE_COMMENT="$2"
-            shift 2
-            ;;
-        --skip-invoke)
-            SKIP_INVOKE=true
-            shift
-            ;;
-        --skip-process)
-            SKIP_PROCESS=true
-            shift
-            ;;
-        --update-code)
-            UPDATE_CODE="--update-code"
-            shift
-            ;;
-        --help)
-            usage
-            ;;
-        *)
-            echo -e "${RED}Error: Unknown option $1${NC}"
-            usage
-            ;;
-    esac
-done
-
-# Validate required arguments
-if [[ -z "$CONFIG" ]] || [[ -z "$DEPLOYMENT" ]] || [[ -z "$OUTPUT_DIR" ]]; then
-    echo -e "${RED}Error: Missing required arguments${NC}"
-    usage
-fi
-
-# Get script directory
-SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
-
-echo -e "${BLUE}======================================${NC}"
-echo -e "${BLUE}SeBS Perf-Cost Experiment with Plots${NC}"
-echo -e "${BLUE}======================================${NC}"
-echo ""
-echo -e "Config:      ${GREEN}$CONFIG${NC}"
-echo -e "Deployment:  ${GREEN}$DEPLOYMENT${NC}"
-echo -e "Output:      ${GREEN}$OUTPUT_DIR${NC}"
-echo ""
-
-# Step 1: Invoke experiment
-if [[ "$SKIP_INVOKE" == false ]]; then
-    echo -e "${YELLOW}Step 1: Running experiment (invoke perf-cost)...${NC}"
-    cd "$PROJECT_ROOT"
-    ./sebs.py experiment invoke perf-cost \
-        --config "$CONFIG" \
-        --deployment "$DEPLOYMENT" \
-        --output-dir "$OUTPUT_DIR" \
-        $UPDATE_CODE
-    
-    if [[ $? -eq 0 ]]; then
-        echo -e "${GREEN}✓ Experiment completed successfully${NC}"
-    else
-        echo -e "${RED}✗ Experiment failed${NC}"
-        exit 1
-    fi
-    echo ""
-else
-    echo -e "${YELLOW}Step 1: Skipping invoke step${NC}"
-    echo ""
-fi
-
-# Step 2: Process experiment
-if [[ "$SKIP_PROCESS" == false ]]; then
-    echo -e "${YELLOW}Step 2: Processing results...${NC}"
-    cd "$PROJECT_ROOT"
-    ./sebs.py experiment process perf-cost \
-        --config "$CONFIG" \
-        --deployment "$DEPLOYMENT" \
-        --output-dir "$OUTPUT_DIR"
-    
-    if [[ $? -eq 0 ]]; then
-        echo -e "${GREEN}✓ Processing completed successfully${NC}"
-    else
-        echo -e "${RED}✗ Processing failed${NC}"
-        exit 1
-    fi
-    echo ""
-else
-    echo -e "${YELLOW}Step 2: Skipping process step${NC}"
-    echo ""
-fi
-
-# Step 3: Generate plots
-echo -e "${YELLOW}Step 3: Generating plots...${NC}"
-
-CSV_FILE="$OUTPUT_DIR/perf-cost/result.csv"
-
-if [[ ! -f "$CSV_FILE" ]]; then
-    echo -e "${RED}✗ CSV file not found: $CSV_FILE${NC}"
-    echo -e "${RED}  Make sure the process step completed successfully${NC}"
-    exit 1
-fi
-
-# Build plot command
-PLOT_CMD="python3 $SCRIPT_DIR/plot_comparison.py $CSV_FILE --plot-type sebs"
-
-if [[ -n "$BENCHMARK_NAME" ]]; then
-    PLOT_CMD="$PLOT_CMD --benchmark-name \"$BENCHMARK_NAME\""
-fi
-
-if [[ -n "$LANGUAGE_NAME" ]]; then
-    PLOT_CMD="$PLOT_CMD --language-name \"$LANGUAGE_NAME\""
-fi
-
-if [[ -n "$TITLE_COMMENT" ]]; then
-    PLOT_CMD="$PLOT_CMD --title-comment \"$TITLE_COMMENT\""
-fi
-
-eval $PLOT_CMD
-
-if [[ $? -eq 0 ]]; then
-    echo -e "${GREEN}✓ Plots generated successfully${NC}"
-    echo -e "${GREEN}  Output: $OUTPUT_DIR/perf-cost/plots/${NC}"
-else
-    echo -e "${RED}✗ Plot generation failed${NC}"
-    exit 1
-fi
-
-echo ""
-echo -e "${BLUE}======================================${NC}"
-echo -e "${GREEN}✓ All steps completed successfully!${NC}"
-echo -e "${BLUE}======================================${NC}"
-echo ""
-echo -e "Results:"
-echo -e "  CSV:   ${GREEN}$CSV_FILE${NC}"
-echo -e "  Plots: ${GREEN}$OUTPUT_DIR/perf-cost/plots/${NC}"
-echo ""
-

From e99192eecef552c9fdc7470fe066f6e0dd3ecbde Mon Sep 17 00:00:00 2001
From: Alexander Schlieper <alexander@schlieper.ch>
Date: Thu, 8 Jan 2026 15:59:29 +0100
Subject: [PATCH 29/31] Update build and deployment configurations

- Modified `java_installer.sh` to use `mvn clean package` for more reliable builds.
- Updated `Dockerfile.function` to unzip the function JAR and remove the original after extraction.
---
 benchmarks/100.webapps/110.dynamic-html/java/pom.xml |  1 +
 dockerfiles/aws/java/Dockerfile.function             |  4 +++-
 dockerfiles/java_installer.sh                        |  2 +-
 sebs/experiments/perf_cost.py                        | 10 ++++++----
 4 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/benchmarks/100.webapps/110.dynamic-html/java/pom.xml b/benchmarks/100.webapps/110.dynamic-html/java/pom.xml
index d46a81c75..42de9497e 100644
--- a/benchmarks/100.webapps/110.dynamic-html/java/pom.xml
+++ b/benchmarks/100.webapps/110.dynamic-html/java/pom.xml
@@ -38,6 +38,7 @@
         </dependency>
     </dependencies>
     <build>
+        <!-- Force rebuild for Architecture fix -->
         <finalName>function</finalName>
         <resources>
             <resource>
diff --git a/dockerfiles/aws/java/Dockerfile.function b/dockerfiles/aws/java/Dockerfile.function
index bc20eb685..aa94525b1 100644
--- a/dockerfiles/aws/java/Dockerfile.function
+++ b/dockerfiles/aws/java/Dockerfile.function
@@ -23,6 +23,8 @@ RUN if [ -f "function/function.jar" ]; then \
       ls -la function/ 2>/dev/null || true; \
       exit 1; \
     fi \
-    && test -f function.jar
+    && test -f function.jar \
+    && (unzip function.jar || jar xf function.jar) \
+    && rm function.jar
 
 CMD ["org.serverlessbench.Handler::handleRequest"]
diff --git a/dockerfiles/java_installer.sh b/dockerfiles/java_installer.sh
index 59abeb5e7..d2ff97a90 100644
--- a/dockerfiles/java_installer.sh
+++ b/dockerfiles/java_installer.sh
@@ -13,7 +13,7 @@ if [[ -n "${POM_PATH}" ]]; then
   cd "${POM_DIR}"
 
   # Note: -q flag causes issues in Docker, removed for reliable builds
-  mvn -DskipTests package
+  mvn -DskipTests clean package
 
   if ls target/*.jar >/dev/null 2>&1; then
     # Prefer the shaded/fat JAR (exclude "original" JARs created by maven-shade-plugin)
diff --git a/sebs/experiments/perf_cost.py b/sebs/experiments/perf_cost.py
index 7b940f8df..998d4db45 100644
--- a/sebs/experiments/perf_cost.py
+++ b/sebs/experiments/perf_cost.py
@@ -342,10 +342,12 @@ def process(
                         for func in experiments.functions():
                             for id, invoc in experiments.invocations(func).items():
                                 # FIXME: compatibility with old results
-                                if "output" in invoc.output["result"]:
-                                    del invoc.output["result"]["output"]
-                                elif "result" in invoc.output["result"]:
-                                    del invoc.output["result"]["result"]
+                                # Only process if result is a dict (some languages return primitives directly)
+                                if isinstance(invoc.output["result"], dict):
+                                    if "output" in invoc.output["result"]:
+                                        del invoc.output["result"]["output"]
+                                    elif "result" in invoc.output["result"]:
+                                        del invoc.output["result"]["result"]
 
                         name, extension = os.path.splitext(f)
                         with open(

From c4b559fdddcc4426e991d41263fdf13c7b31bc7b Mon Sep 17 00:00:00 2001
From: Alexander Schlieper <alexander@schlieper.ch>
Date: Thu, 8 Jan 2026 16:20:24 +0100
Subject: [PATCH 30/31] Refactor build configurations and enhance Java support

- Removed unnecessary comment in `pom.xml`.
- Updated `entrypoint.sh` to ensure correct ownership of the `/mnt/function` directory.
- Modified `benchmark.py` to include Java files in the hash calculation and added logic for handling Java installer script.
- Improved error handling and logging for Docker builds, particularly for Rust projects.
---
 .../100.webapps/110.dynamic-html/java/pom.xml |  1 -
 dockerfiles/entrypoint.sh                     |  2 +-
 sebs/benchmark.py                             | 63 +++++++++++++++++--
 sebs/faas/container.py                        |  3 +-
 4 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/benchmarks/100.webapps/110.dynamic-html/java/pom.xml b/benchmarks/100.webapps/110.dynamic-html/java/pom.xml
index 42de9497e..d46a81c75 100644
--- a/benchmarks/100.webapps/110.dynamic-html/java/pom.xml
+++ b/benchmarks/100.webapps/110.dynamic-html/java/pom.xml
@@ -38,7 +38,6 @@
         </dependency>
     </dependencies>
     <build>
-        <!-- Force rebuild for Architecture fix -->
         <finalName>function</finalName>
         <resources>
             <resource>
diff --git a/dockerfiles/entrypoint.sh b/dockerfiles/entrypoint.sh
index c8e24cd4e..91f3e56d9 100755
--- a/dockerfiles/entrypoint.sh
+++ b/dockerfiles/entrypoint.sh
@@ -6,7 +6,7 @@ USER=${CONTAINER_USER}
 
 useradd --non-unique -m -u ${USER_ID} ${USER}
 groupmod --non-unique -g ${GROUP_ID} ${USER}
-mkdir -p /mnt/function && chown -R ${USER}:${USER} /mnt/function
+mkdir -p /mnt/function && chown -R ${USER_ID}:${GROUP_ID} /mnt/function 2>/dev/null || true
 export HOME=/home/${USER}
 echo "Running as ${USER}, with ${USER_ID} and ${GROUP_ID}"
 
diff --git a/sebs/benchmark.py b/sebs/benchmark.py
index 3c0deb9dd..d060b1475 100644
--- a/sebs/benchmark.py
+++ b/sebs/benchmark.py
@@ -253,7 +253,7 @@ def hash_directory(directory: str, deployment: str, language: str):
             "python": ["*.py", "requirements.txt*"],
             "nodejs": ["*.js", "package.json"],
             "rust": ["*.rs", "Cargo.toml", "Cargo.lock"],
-            "java": [],
+            "java": ["src", "pom.xml"],
             "pypy": ["*.py", "requirements.txt*"],
         }
         WRAPPERS = {
@@ -268,8 +268,15 @@ def hash_directory(directory: str, deployment: str, language: str):
         for file_type in selected_files:
             for f in glob.glob(os.path.join(directory, file_type)):
                 path = os.path.join(directory, f)
-                with open(path, "rb") as opened_file:
-                    hash_sum.update(opened_file.read())
+                if os.path.isdir(path):
+                    for root, _, files in os.walk(path):
+                        for file in sorted(files):
+                            file_path = os.path.join(root, file)
+                            with open(file_path, "rb") as opened_file:
+                                hash_sum.update(opened_file.read())
+                else:
+                    with open(path, "rb") as opened_file:
+                        hash_sum.update(opened_file.read())
         # For rust, also hash the src directory recursively
         if language == "rust":
             src_dir = os.path.join(directory, "src")
@@ -702,6 +709,15 @@ def ensure_image(name: str) -> None:
                         "bind": "/mnt/function/package.sh",
                         "mode": "ro",
                     }
+                
+                # Mount updated java_installer.sh if language is java
+                if self.language_name == "java":
+                    installer_path = os.path.abspath("dockerfiles/java_installer.sh")
+                    if os.path.exists(installer_path):
+                        volumes[installer_path] = {
+                            "bind": "/sebs/installer.sh",
+                            "mode": "ro",
+                        }
 
             # run Docker container to install packages
             PACKAGE_FILES = {"python": "requirements.txt", "nodejs": "package.json", "rust": "Cargo.toml", "java": "pom.xml", "pypy": "requirements.txt"}
@@ -796,14 +812,51 @@ def ensure_image(name: str) -> None:
 
                     # Pass to output information on optimizing builds.
                     # Useful for AWS where packages have to obey size limits.
-                    for line in stdout.decode("utf-8").split("\n"):
-                        if "size" in line:
+                    build_output = ""
+                    if isinstance(stdout, bytes):
+                        build_output = stdout.decode("utf-8")
+                    elif isinstance(stdout, tuple):
+                        # exec_run returns (exit_code, output)
+                        exit_code, output = stdout
+                        build_output = output.decode("utf-8") if isinstance(output, bytes) else str(output)
+                        if exit_code != 0:
+                            self.logging.error(f"Docker build exited with code {exit_code}")
+                    else:
+                        build_output = str(stdout)
+                    
+                    for line in build_output.split("\n"):
+                        if "size" in line or "error" in line.lower() or "Error" in line or "failed" in line.lower():
                             self.logging.info("Docker build: {}".format(line))
+                    
+                    # For Rust, check if bootstrap binary was created
+                    if self.language_name == "rust":
+                        bootstrap_path = os.path.join(output_dir, "bootstrap")
+                        if not os.path.exists(bootstrap_path):
+                            self.logging.error("Rust build failed: bootstrap binary not found!")
+                            self.logging.error("Build output:\n{}".format(build_output[-2000:]))  # Last 2000 chars
+                            raise RuntimeError("Rust build failed: bootstrap binary not created")
                 except docker.errors.ContainerError as e:
                     self.logging.error("Package build failed!")
                     self.logging.error(e)
                     self.logging.error(f"Docker mount volumes: {volumes}")
+                    # For Rust, also check bootstrap even if ContainerError occurred
+                    if self.language_name == "rust":
+                        bootstrap_path = os.path.join(output_dir, "bootstrap")
+                        if not os.path.exists(bootstrap_path):
+                            self.logging.error("Rust bootstrap binary not found after Docker build failure!")
                     raise e
+            else:
+                # Package file doesn't exist
+                error_msg = f"Package file {file} not found in {output_dir}"
+                self.logging.error(error_msg)
+                if self.language_name == "rust":
+                    # List files in output_dir for debugging
+                    files_in_dir = os.listdir(output_dir) if os.path.exists(output_dir) else []
+                    self.logging.error(f"Files in output_dir: {files_in_dir}")
+                    raise RuntimeError(
+                        f"{error_msg}. For Rust, Cargo.toml must exist after merging wrapper and benchmark files. "
+                        "Check that Cargo.toml merge completed successfully."
+                    )
 
     def recalculate_code_size(self):
         self._code_size = Benchmark.directory_size(self._output_dir)
diff --git a/sebs/faas/container.py b/sebs/faas/container.py
index cc4b136c9..bb87646ad 100644
--- a/sebs/faas/container.py
+++ b/sebs/faas/container.py
@@ -210,8 +210,9 @@ def build_base_image(
             "BASE_IMAGE": builder_image,
             "TARGET_ARCHITECTURE": architecture,
         }
+        docker_platform = "linux/arm64" if architecture == "arm64" else "linux/amd64"
         image, _ = self.docker_client.images.build(
-            tag=image_uri, path=build_dir, buildargs=buildargs
+            tag=image_uri, path=build_dir, buildargs=buildargs, platform=docker_platform
         )
 
         self.logging.info(

From e299a326c54da3640afee2ece8e297081819a43c Mon Sep 17 00:00:00 2001
From: Alexander Schlieper <alexander@schlieper.ch>
Date: Sat, 10 Jan 2026 13:52:16 +0100
Subject: [PATCH 31/31] PR code review changes. Remove ColdStartTracker and
 refactor cold start detection in AWS Java handler

- Deleted ColdStartTracker class from the Java AWS wrapper.
- Integrated cold start detection logic directly into the Handler class.
- Updated configuration to reflect changes in runtime language and version.
- Removed unused cross-platform benchmarking script to streamline the repository.
---
 .../org/serverlessbench/ColdStartTracker.java |  35 --
 .../java/org/serverlessbench/Handler.java     |  18 +-
 benchmarks/wrappers/azure/pypy/handler.py     |  53 +-
 config/example.json                           |  21 +-
 scripts/cross_platform_benchmark.py           | 562 ------------------
 sebs/benchmark.py                             |   5 +-
 tools/build_docker_images.py                  |  11 +-
 7 files changed, 41 insertions(+), 664 deletions(-)
 delete mode 100644 benchmarks/wrappers/aws/java/src/main/java/org/serverlessbench/ColdStartTracker.java
 delete mode 100644 scripts/cross_platform_benchmark.py

diff --git a/benchmarks/wrappers/aws/java/src/main/java/org/serverlessbench/ColdStartTracker.java b/benchmarks/wrappers/aws/java/src/main/java/org/serverlessbench/ColdStartTracker.java
deleted file mode 100644
index e7cb2e011..000000000
--- a/benchmarks/wrappers/aws/java/src/main/java/org/serverlessbench/ColdStartTracker.java
+++ /dev/null
@@ -1,35 +0,0 @@
-package org.serverlessbench;
-
-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.UUID;
-import java.util.concurrent.atomic.AtomicBoolean;
-
-final class ColdStartTracker {
-
-    private static final AtomicBoolean COLD = new AtomicBoolean(true);
-    private static final Path MARKER = Path.of("/tmp/cold_run");
-
-    private ColdStartTracker() {}
-
-    static boolean isCold() {
-        if (Files.exists(MARKER)) {
-            COLD.set(false);
-            return false;
-        }
-        boolean first = COLD.getAndSet(false);
-        if (first) {
-            try {
-                Files.writeString(
-                        MARKER,
-                        UUID.randomUUID().toString().substring(0, 8),
-                        StandardCharsets.UTF_8);
-            } catch (IOException ignored) {
-                // best-effort marker write
-            }
-        }
-        return first;
-    }
-}
diff --git a/benchmarks/wrappers/aws/java/src/main/java/org/serverlessbench/Handler.java b/benchmarks/wrappers/aws/java/src/main/java/org/serverlessbench/Handler.java
index 5c1781e6b..93eb41ba0 100644
--- a/benchmarks/wrappers/aws/java/src/main/java/org/serverlessbench/Handler.java
+++ b/benchmarks/wrappers/aws/java/src/main/java/org/serverlessbench/Handler.java
@@ -4,12 +4,16 @@
 import com.amazonaws.services.lambda.runtime.RequestHandler;
 import com.fasterxml.jackson.databind.ObjectMapper;
 
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
 import java.util.HashMap;
 import java.util.Map;
 
 public class Handler implements RequestHandler<Map<String, Object>, Map<String, Object>> {
 
     private static final ObjectMapper MAPPER = new ObjectMapper();
+    private static final Path MARKER = Path.of("/tmp/cold_run");
 
     @Override
     public Map<String, Object> handleRequest(Map<String, Object> event, Context context) {
@@ -24,12 +28,24 @@ public Map<String, Object> handleRequest(Map<String, Object> event, Context cont
         body.put("compute_time", (endNs - beginNs) / 1_000.0);
         body.put("results_time", 0);
         body.put("result", result);
-        body.put("is_cold", ColdStartTracker.isCold());
+        body.put("is_cold", isCold());
         body.put("request_id", context != null ? context.getAwsRequestId() : "");
 
         return body;
     }
 
+    private boolean isCold() {
+        if (Files.exists(MARKER)) {
+            return false;
+        }
+        try {
+            Files.createFile(MARKER);
+        } catch (IOException ignored) {
+            // best-effort marker write
+        }
+        return true;
+    }
+
     private Map<String, Object> normalize(Map<String, Object> event) {
         if (event == null) {
             return new HashMap<>();
diff --git a/benchmarks/wrappers/azure/pypy/handler.py b/benchmarks/wrappers/azure/pypy/handler.py
index 4b6e662da..69703f668 100644
--- a/benchmarks/wrappers/azure/pypy/handler.py
+++ b/benchmarks/wrappers/azure/pypy/handler.py
@@ -202,41 +202,27 @@ def _do_POST(self):
             
             # Import user function
             # In Azure, function.py is in the handler directory
-            # AWS uses: from function import function (because AWS has function/function.py)
-            # Azure structure: handler/function.py, so we use: import function
-            function_module = None
             try:
                 import function
-                function_module = function
             except ImportError as e:
-                # Try AWS-style import as fallback
-                try:
-                    from function import function as aws_function_module
-                    function_module = aws_function_module
-                except ImportError as e2:
-                    logging.error(f"Failed to import function: {e}")
-                    logging.error(f"AWS-style import also failed: {e2}")
-                    logging.error(f"sys.path: {sys.path}")
-                    logging.error(f"Current directory: {os.getcwd()}")
-                    logging.error(f"Handler path exists: {os.path.exists(os.path.join(os.getcwd(), 'handler'))}")
-                    # List files in handler directory for debugging
-                    handler_dir = os.path.join(os.getcwd(), 'handler')
-                    if os.path.exists(handler_dir):
-                        try:
-                            files = os.listdir(handler_dir)
-                            logging.error(f"Files in handler directory: {files}")
-                        except Exception as list_err:
-                            logging.error(f"Failed to list handler directory: {list_err}")
-                    self.send_json_response(500, {'error': f'Failed to import function: {str(e)}'})
-                    return
-            
-            if function_module is None:
-                self.send_json_response(500, {'error': 'Function module is None after import'})
+                logging.error(f"Failed to import function: {e}")
+                logging.error(f"sys.path: {sys.path}")
+                logging.error(f"Current directory: {os.getcwd()}")
+                logging.error(f"Handler path exists: {os.path.exists(os.path.join(os.getcwd(), 'handler'))}")
+                # List files in handler directory for debugging
+                handler_dir = os.path.join(os.getcwd(), 'handler')
+                if os.path.exists(handler_dir):
+                    try:
+                        files = os.listdir(handler_dir)
+                        logging.error(f"Files in handler directory: {files}")
+                    except Exception as list_err:
+                        logging.error(f"Failed to list handler directory: {list_err}")
+                self.send_json_response(500, {'error': f'Failed to import function: {str(e)}'})
                 return
             
             try:
-                # Call the user function - AWS uses: ret = function.handler(event)
-                ret = function_module.handler(req_json)
+                # Call the user function
+                ret = function.handler(req_json)
             except Exception as e:
                 logging.error(f"Function handler error: {e}", exc_info=True)
                 self.send_json_response(500, {'error': str(e)})
@@ -305,21 +291,12 @@ def _do_POST(self):
                 logging.warning(f"Failed to read/write cold_run file: {e}")
                 container_id = str(uuid.uuid4())[0:8]
                     
-            is_cold_worker = False
-            global cold_marker
-            try:
-                _ = cold_marker
-            except NameError:
-                cold_marker = True
-                is_cold_worker = True
-
             response_data = {
                 'begin': begin.strftime('%s.%f'),
                 'end': end.strftime('%s.%f'),
                 'results_time': results_time,
                 'result': log_data,
                 'is_cold': is_cold,
-                'is_cold_worker': is_cold_worker,
                 'container_id': container_id,
                 'environ_container_id': os.environ.get('CONTAINER_NAME', ''),
                 'request_id': invocation_id
diff --git a/config/example.json b/config/example.json
index 313925cbf..ea62910dc 100644
--- a/config/example.json
+++ b/config/example.json
@@ -7,12 +7,12 @@
     "architecture": "arm64", 
     "container_deployment": true,
     "runtime": {
-      "language": "rust",
-      "version": "1.88"
+      "language": "python",
+      "version": "3.8"
     },
     "type": "invocation-overhead",
     "perf-cost": {
-      "benchmark": "010.sleep",
+      "benchmark": "110.dynamic-html",
       "experiments": ["cold", "warm", "burst", "sequential"],
       "input-size": "test",
       "repetitions": 50,
@@ -45,20 +45,11 @@
   "deployment": {
     "name": "aws",
     "aws": {
-      "region": "eu-north-1",
-      "lambda-role": "sebs-lambda-role",
-      "credentials": {
-        "access_key": "",
-        "secret_key": ""
-      }
+      "region": "us-east-1",
+      "lambda-role": ""
     },
     "azure": {
-      "region": "westeurope",
-      "credentials": {
-        "appID": "",
-        "tenant": "",
-        "password": ""
-      }
+      "region": "westeurope"
     },
     "gcp": {
       "region": "europe-west1",
diff --git a/scripts/cross_platform_benchmark.py b/scripts/cross_platform_benchmark.py
deleted file mode 100644
index 9ac83f1d0..000000000
--- a/scripts/cross_platform_benchmark.py
+++ /dev/null
@@ -1,562 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-Cross-platform benchmark comparison tool for SeBS.
-Runs benchmarks across multiple languages and cloud platforms,
-aggregates results, and provides comparison analysis.
-"""
-
-import argparse
-import json
-import logging
-import os
-import sys
-import time
-from datetime import datetime
-from pathlib import Path
-from typing import Dict, List, Optional, Tuple
-import subprocess
-import traceback
-
-SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
-PROJECT_ROOT = os.path.abspath(os.path.join(SCRIPT_DIR, os.pardir))
-sys.path.insert(0, PROJECT_ROOT)
-
-# Language-version mappings for different platforms
-LANGUAGE_CONFIGS = {
-    'aws': {
-        'python': ['3.11', '3.10', '3.9', '3.8'],
-        'nodejs': ['16'],
-        'rust': ['1.80', '1.81', '1.82'],
-        'java': ['17'],
-        'pypy': ['3.11']
-    },
-    'azure': {
-        'python': ['3.11', '3.10', '3.9', '3.8'],
-        'nodejs': ['20', '18', '16'],
-        'java': ['17'],
-        'pypy': ['3.11']
-    },
-    'gcp': {
-        'python': ['3.12', '3.11', '3.10', '3.9', '3.8'],
-        'nodejs': ['20', '18']
-    },
-    'local': {
-        'python': ['3.11', '3.10', '3.9'],
-        'nodejs': ['20', '18', '16'],
-        'pypy': ['3.11']
-    }
-}
-
-class BenchmarkRunner:
-    """Orchestrates benchmark execution across platforms and languages."""
-    
-    def __init__(self, output_dir: str, cache_dir: str = 'cache', verbose: bool = False, container_deployment_for: Optional[List[str]] = None):
-        self.output_dir = Path(output_dir).resolve()
-        self.cache_dir = cache_dir
-        self.verbose = verbose
-        self.container_deployment_for = set(container_deployment_for or [])
-        self.results = {
-            'metadata': {
-                'start_time': datetime.now().isoformat(),
-                'end_time': None,
-                'version': '1.0.0'
-            },
-            'benchmarks': {}
-        }
-        
-        # Create output directory
-        self.output_dir.mkdir(parents=True, exist_ok=True)
-        
-        # Setup logging
-        log_file = self.output_dir / 'benchmark_run.log'
-        logging.basicConfig(
-            level=logging.DEBUG if verbose else logging.INFO,
-            format='%(asctime)s - %(levelname)s - %(message)s',
-            handlers=[
-                logging.FileHandler(log_file),
-                logging.StreamHandler()
-            ]
-        )
-        self.logger = logging.getLogger(__name__)
-    
-    def run_single_benchmark(
-        self,
-        benchmark: str,
-        platform: str,
-        language: str,
-        version: str,
-        config_file: str,
-        input_size: str = 'test',
-        repetitions: int = 5,
-        memory: int = 256,
-        architecture: str = 'x64',
-        container_deployment: bool = False
-    ) -> Tuple[bool, Optional[str], Optional[Dict]]:
-        """
-        Run a single benchmark configuration.
-        
-        Returns:
-            (success, output_file, error_message)
-        """
-        run_id = f"{benchmark}_{platform}_{language}_{version}_{memory}MB"
-        
-        # Determine deployment type for logging
-        should_use_container = (
-            container_deployment or 
-            platform in self.container_deployment_for or
-            ((platform == 'aws' or platform == 'gcp') and language == 'pypy')
-        )
-        deployment_type = "container" if should_use_container else "package"
-        
-        self.logger.info(f"Starting: {run_id} (deployment: {deployment_type})")
-        
-        # Create experiment output directory (use absolute path)
-        experiment_dir = (self.output_dir / run_id).resolve()
-        experiment_dir.mkdir(parents=True, exist_ok=True)
-        
-        # Update config for this run
-        try:
-            with open(config_file, 'r') as f:
-                config = json.load(f)
-            
-            # Update configuration
-            config['experiments']['runtime'] = {
-                'language': language,
-                'version': version
-            }
-            config['experiments']['repetitions'] = repetitions
-            config['experiments']['memory'] = memory
-            config['experiments']['architecture'] = architecture
-            config['deployment']['name'] = platform
-            
-            # Write updated config
-            run_config_file = experiment_dir / 'config.json'
-            with open(run_config_file, 'w') as f:
-                json.dump(config, f, indent=2)
-            
-            # Construct sebs.py command
-            cmd = [
-                sys.executable,
-                os.path.join(PROJECT_ROOT, 'sebs.py'),
-                'benchmark',
-                'invoke',
-                benchmark,
-                input_size,
-                '--config', str(run_config_file),
-                '--deployment', platform,
-                '--language', language,
-                '--language-version', version,
-                '--memory', str(memory),
-                '--architecture', architecture,
-                '--output-dir', str(experiment_dir),
-                '--cache', self.cache_dir
-            ]
-            
-            # Add --container-deployment if requested or required
-            # Priority: explicit flag > per-platform setting > automatic for PyPy on AWS/GCP
-            should_use_container = (
-                container_deployment or 
-                platform in self.container_deployment_for or
-                ((platform == 'aws' or platform == 'gcp') and language == 'pypy')
-            )
-            if should_use_container:
-                cmd.append('--container-deployment')
-            
-            if self.verbose:
-                cmd.append('--verbose')
-            
-            self.logger.debug(f"Command: {' '.join(cmd)}")
-            
-            # Execute benchmark (run from project root for proper path resolution)
-            start_time = time.time()
-            result = subprocess.run(
-                cmd,
-                capture_output=True,
-                text=True,
-                timeout=600,  # 10 minute timeout
-                cwd=PROJECT_ROOT  # Run from project root
-            )
-            execution_time = time.time() - start_time
-            
-            # Ensure the directory still exists (sebs.py might have cleaned it up on error)
-            experiment_dir.mkdir(parents=True, exist_ok=True)
-            
-            # Save stdout/stderr
-            with open(experiment_dir / 'stdout.log', 'w') as f:
-                f.write(result.stdout)
-            with open(experiment_dir / 'stderr.log', 'w') as f:
-                f.write(result.stderr)
-            
-            if result.returncode == 0:
-                self.logger.info(f"✓ Completed: {run_id} ({execution_time:.2f}s)")
-                
-                # Look for experiments.json in the output
-                exp_json = experiment_dir / 'experiments.json'
-                if not exp_json.exists():
-                    # Try to find it in subdirectories
-                    exp_files = list(experiment_dir.glob('**/experiments.json'))
-                    if exp_files:
-                        exp_json = exp_files[0]
-                
-                return True, str(experiment_dir), None
-            else:
-                error_msg = f"Failed with return code {result.returncode}"
-                self.logger.error(f"✗ Failed: {run_id} - {error_msg}")
-                self.logger.debug(f"Stderr: {result.stderr[:500]}")
-                return False, str(experiment_dir), error_msg
-                
-        except subprocess.TimeoutExpired:
-            error_msg = "Benchmark execution timed out"
-            self.logger.error(f"✗ Timeout: {run_id}")
-            return False, str(experiment_dir), error_msg
-        except Exception as e:
-            error_msg = f"Exception: {str(e)}"
-            self.logger.error(f"✗ Error: {run_id} - {error_msg}")
-            self.logger.debug(traceback.format_exc())
-            return False, str(experiment_dir), error_msg
-    
-    def run_comparison(
-        self,
-        benchmarks: List[str],
-        platforms: List[str],
-        languages: List[str],
-        config_file: str,
-        input_size: str = 'test',
-        repetitions: int = 5,
-        memory_sizes: List[int] = [256],
-        architecture: str = 'x64',
-        versions: Optional[Dict[str, List[str]]] = None,
-        container_deployment: bool = False
-    ):
-        """
-        Run benchmarks across multiple configurations.
-        
-        Args:
-            benchmarks: List of benchmark names (e.g., ['010.sleep', '110.dynamic-html'])
-            platforms: List of platforms (e.g., ['aws', 'azure'])
-            languages: List of languages (e.g., ['python', 'nodejs'])
-            config_file: Path to base configuration file
-            input_size: Benchmark input size
-            repetitions: Number of repetitions per benchmark
-            memory_sizes: List of memory configurations to test
-            architecture: Target architecture (x64 or arm64)
-            versions: Optional dict mapping language to specific versions
-        """
-        total_runs = 0
-        successful_runs = 0
-        failed_runs = 0
-        
-        for benchmark in benchmarks:
-            self.results['benchmarks'][benchmark] = {}
-            
-            for platform in platforms:
-                self.results['benchmarks'][benchmark][platform] = {}
-                
-                for language in languages:
-                    # Check if language is supported on this platform
-                    if language not in LANGUAGE_CONFIGS.get(platform, {}):
-                        self.logger.warning(f"Skipping {language} on {platform} (not supported)")
-                        continue
-                    
-                    # Get versions to test
-                    if versions and language in versions:
-                        lang_versions = versions[language]
-                    else:
-                        # Use first available version by default
-                        lang_versions = [LANGUAGE_CONFIGS[platform][language][0]]
-                    
-                    self.results['benchmarks'][benchmark][platform][language] = {}
-                    
-                    for version in lang_versions:
-                        # Verify version is supported
-                        if version not in LANGUAGE_CONFIGS[platform][language]:
-                            self.logger.warning(
-                                f"Skipping {language} {version} on {platform} (version not supported)"
-                            )
-                            continue
-                        
-                        self.results['benchmarks'][benchmark][platform][language][version] = {}
-                        
-                        for memory in memory_sizes:
-                            total_runs += 1
-                            
-                            success, output_dir, error = self.run_single_benchmark(
-                                benchmark=benchmark,
-                                platform=platform,
-                                language=language,
-                                version=version,
-                                config_file=config_file,
-                                input_size=input_size,
-                                repetitions=repetitions,
-                                memory=memory,
-                                architecture=architecture,
-                                container_deployment=container_deployment
-                            )
-                            
-                            result_entry = {
-                                'success': success,
-                                'output_directory': output_dir,
-                                'memory_mb': memory,
-                                'architecture': architecture,
-                                'repetitions': repetitions,
-                                'input_size': input_size
-                            }
-                            
-                            if success:
-                                successful_runs += 1
-                                # Try to extract metrics and full experiment data
-                                try:
-                                    extracted = self._extract_metrics(output_dir)
-                                    
-                                    # Store full experiments.json data if available
-                                    if 'full_experiment_data' in extracted:
-                                        result_entry['experiment_data'] = extracted['full_experiment_data']
-                                        # Also store summary metrics
-                                        result_entry['metrics'] = {
-                                            k: v for k, v in extracted.items() 
-                                            if k != 'full_experiment_data'
-                                        }
-                                    else:
-                                        result_entry['metrics'] = extracted
-                                except Exception as e:
-                                    self.logger.warning(f"Could not extract metrics: {e}")
-                            else:
-                                failed_runs += 1
-                                result_entry['error'] = error
-                            
-                            self.results['benchmarks'][benchmark][platform][language][version][f'{memory}MB'] = result_entry
-        
-        # Update end time and summary
-        self.results['metadata']['end_time'] = datetime.now().isoformat()
-        self.results['metadata']['summary'] = {
-            'total_runs': total_runs,
-            'successful': successful_runs,
-            'failed': failed_runs,
-            'success_rate': f"{(successful_runs/total_runs*100):.1f}%" if total_runs > 0 else "N/A"
-        }
-        
-        # Save results
-        output_file = self.output_dir / 'comparison_results.json'
-        with open(output_file, 'w') as f:
-            json.dump(self.results, f, indent=2)
-        
-        self.logger.info(f"\n{'='*60}")
-        self.logger.info(f"Benchmark Comparison Complete!")
-        self.logger.info(f"{'='*60}")
-        self.logger.info(f"Total runs: {total_runs}")
-        self.logger.info(f"Successful: {successful_runs}")
-        self.logger.info(f"Failed: {failed_runs}")
-        self.logger.info(f"Results saved to: {output_file}")
-        
-        return self.results
-    
-    def _extract_metrics(self, output_dir: str) -> Dict:
-        """Extract key metrics from experiment output and preserve full experiments.json data."""
-        metrics = {}
-        
-        # Look for experiments.json
-        exp_json_paths = [
-            Path(output_dir) / 'experiments.json',
-            *Path(output_dir).glob('**/experiments.json')
-        ]
-        
-        for exp_json in exp_json_paths:
-            if exp_json.exists():
-                with open(exp_json, 'r') as f:
-                    data = json.load(f)
-                
-                # Store the full experiments.json data
-                metrics['full_experiment_data'] = data
-                
-                # Extract timing information from invocations for summary
-                if '_invocations' in data:
-                    invocations = data['_invocations']
-                    
-                    for func_name, func_data in invocations.items():
-                        execution_times = []
-                        cold_starts = 0
-                        warm_starts = 0
-                        
-                        for inv_id, inv_data in func_data.items():
-                            if 'times' in inv_data:
-                                if 'client' in inv_data['times']:
-                                    # Client time is in microseconds, convert to ms
-                                    execution_times.append(inv_data['times']['client'] / 1000)
-                            
-                            if 'stats' in inv_data:
-                                if inv_data['stats'].get('cold_start'):
-                                    cold_starts += 1
-                                else:
-                                    warm_starts += 1
-                        
-                        if execution_times:
-                            metrics['execution_times_ms'] = execution_times
-                            metrics['avg_execution_time_ms'] = sum(execution_times) / len(execution_times)
-                            metrics['min_execution_time_ms'] = min(execution_times)
-                            metrics['max_execution_time_ms'] = max(execution_times)
-                            metrics['cold_starts'] = cold_starts
-                            metrics['warm_starts'] = warm_starts
-                
-                break
-        
-        return metrics
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description='Run cross-platform benchmark comparisons',
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-        epilog="""
-Examples:
-  # Compare Python and Node.js on AWS and Azure (with auto-plotting)
-  %(prog)s --benchmarks 010.sleep 110.dynamic-html \\
-           --platforms aws azure \\
-           --languages python nodejs \\
-           --config config/example.json \\
-           --output results/comparison_$(date +%%Y%%m%%d) \\
-           --plot
-
-  # Compare AWS (container) vs Azure (package deployment)
-  %(prog)s --benchmarks 010.sleep \\
-           --platforms aws azure \\
-           --languages python \\
-           --container-deployment-for aws \\
-           --config config/example.json \\
-           --output results/aws_container_vs_azure_package \\
-           --plot
-
-  # Compare specific Python versions on AWS
-  %(prog)s --benchmarks 501.graph-pagerank \\
-           --platforms aws \\
-           --languages python \\
-           --python-versions 3.11 3.10 3.9 \\
-           --memory 512 1024 \\
-           --config config/example.json \\
-           --plot
-        """
-    )
-    
-    parser.add_argument('--benchmarks', nargs='+', required=True,
-                        help='Benchmark names to run (e.g., 010.sleep 110.dynamic-html)')
-    parser.add_argument('--platforms', nargs='+', required=True,
-                        choices=['aws', 'azure', 'gcp', 'local'],
-                        help='Platforms to test on')
-    parser.add_argument('--languages', nargs='+', required=True,
-                        help='Languages to test (e.g., python nodejs rust java)')
-    parser.add_argument('--config', required=True,
-                        help='Base configuration file')
-    parser.add_argument('--output', required=True,
-                        help='Output directory for results')
-    
-    # Optional parameters
-    parser.add_argument('--input-size', default='test',
-                        choices=['test', 'small', 'large'],
-                        help='Benchmark input size (default: test)')
-    parser.add_argument('--repetitions', type=int, default=5,
-                        help='Number of repetitions per benchmark (default: 5)')
-    parser.add_argument('--memory', nargs='+', type=int, default=[256],
-                        help='Memory sizes in MB to test (default: 256)')
-    parser.add_argument('--architecture', default='x64',
-                        choices=['x64', 'arm64'],
-                        help='Target architecture (default: x64)')
-    parser.add_argument('--cache', default='cache',
-                        help='Cache directory (default: cache)')
-    
-    # Language-specific version overrides
-    parser.add_argument('--python-versions', nargs='+',
-                        help='Specific Python versions to test')
-    parser.add_argument('--nodejs-versions', nargs='+',
-                        help='Specific Node.js versions to test')
-    parser.add_argument('--rust-versions', nargs='+',
-                        help='Specific Rust versions to test')
-    parser.add_argument('--java-versions', nargs='+',
-                        help='Specific Java versions to test')
-    
-    parser.add_argument('--verbose', action='store_true',
-                        help='Enable verbose output')
-
-    parser.add_argument('--container-deployment', action='store_true',
-                        help='Run functions as containers (all platforms)')
-    parser.add_argument('--container-deployment-for', nargs='+',
-                        help='Specific platforms to use container deployment (e.g., aws gcp)')
-    
-    parser.add_argument('--plot', action='store_true',
-                        help='Automatically generate plots after benchmarking')
-    
-    args = parser.parse_args()
-    
-    # Build version overrides
-    versions = {}
-    if args.python_versions:
-        versions['python'] = args.python_versions
-    if args.nodejs_versions:
-        versions['nodejs'] = args.nodejs_versions
-    if args.rust_versions:
-        versions['rust'] = args.rust_versions
-    if args.java_versions:
-        versions['java'] = args.java_versions
-    
-    # Create runner
-    runner = BenchmarkRunner(
-        output_dir=args.output,
-        cache_dir=args.cache,
-        verbose=args.verbose,
-        container_deployment_for=args.container_deployment_for
-    )
-    
-    # Run comparison
-    try:
-        results = runner.run_comparison(
-            benchmarks=args.benchmarks,
-            platforms=args.platforms,
-            languages=args.languages,
-            config_file=args.config,
-            input_size=args.input_size,
-            repetitions=args.repetitions,
-            memory_sizes=args.memory,
-            architecture=args.architecture,
-            versions=versions if versions else None,
-            container_deployment=args.container_deployment
-        )
-        
-        print("\n" + "="*60)
-        print("✓ Benchmark comparison completed successfully!")
-        print("="*60)
-        print(f"Results: {args.output}/comparison_results.json")
-        print(f"Logs: {args.output}/benchmark_run.log")
-        
-        # Auto-generate plots if requested
-        if args.plot:
-            print("\n" + "="*60)
-            print("Generating plots...")
-            print("="*60)
-            try:
-                # Suppress matplotlib debug output
-                logging.getLogger('matplotlib').setLevel(logging.WARNING)
-                logging.getLogger('PIL').setLevel(logging.WARNING)
-                
-                from plot_comparison import BenchmarkVisualizer
-                results_file = f"{args.output}/comparison_results.json"
-                visualizer = BenchmarkVisualizer(results_file)
-                visualizer.create_all_plots()
-                print(f"\n✓ Plots saved to: {visualizer.output_dir}")
-            except Exception as e:
-                print(f"Warning: Failed to generate plots: {e}")
-                print("You can generate plots manually with:")
-                print(f"  python scripts/plot_comparison.py {args.output}/comparison_results.json")
-        
-        return 0
-        
-    except KeyboardInterrupt:
-        print("\n\nBenchmark interrupted by user")
-        return 130
-    except Exception as e:
-        print(f"\n\nError during benchmark execution: {e}")
-        traceback.print_exc()
-        return 1
-
-
-if __name__ == '__main__':
-    sys.exit(main())
-
diff --git a/sebs/benchmark.py b/sebs/benchmark.py
index d060b1475..7d82f34c9 100644
--- a/sebs/benchmark.py
+++ b/sebs/benchmark.py
@@ -364,9 +364,8 @@ def copy_code(self, output_dir):
             matches = glob.glob(os.path.join(path, file_type))
             self.logging.info(f"copy_code: Pattern {file_type} matched {len(matches)} files: {matches}")
             for f in matches:
-                dest = os.path.join(output_dir, os.path.basename(f))
-                self.logging.info(f"copy_code: Copying {f} to {dest}")
-                shutil.copy2(f, dest)
+                self.logging.info(f"copy_code: Copying {f} to {output_dir}")
+                shutil.copy2(f, output_dir)
         
         # For Rust, copy the entire src directory
         if self.language_name == "rust":
diff --git a/tools/build_docker_images.py b/tools/build_docker_images.py
index c80ecfae4..21a130498 100755
--- a/tools/build_docker_images.py
+++ b/tools/build_docker_images.py
@@ -69,21 +69,12 @@ def build(image_type, system, language=None, version=None, version_name=None):
         "tag": target,
     }
     
-    # Platform selection priority: CLI arg > env var > system config
+    # Platform selection priority: CLI arg > env var
     platform_arg = args.platform or os.environ.get("DOCKER_DEFAULT_PLATFORM")
     if platform_arg:
         build_kwargs["platform"] = platform_arg
     elif PLATFORM:
         build_kwargs["platform"] = PLATFORM
-    elif system in config and "architecture" in config[system]:
-        archs = config[system]["architecture"]
-        if len(archs) == 1:
-            if archs[0] == "x64":
-                build_kwargs["platform"] = "linux/amd64"
-                print(f"Automatically using platform linux/amd64 for {system}")
-            elif archs[0] == "arm64":
-                build_kwargs["platform"] = "linux/arm64"
-                print(f"Automatically using platform linux/arm64 for {system}")
 
     try:
         client.images.build(**build_kwargs)