diff --git a/.gitignore b/.gitignore index 274165ed8..ae5108f68 100644 --- a/.gitignore +++ b/.gitignore @@ -189,4 +189,5 @@ cache *.iml # MacOS Finder -**/.DS_Store \ No newline at end of file +**/.DS_Store +results/* \ No newline at end of file diff --git a/benchmarks/000.microbenchmarks/010.sleep/config.json b/benchmarks/000.microbenchmarks/010.sleep/config.json index 93ce2f561..e195d3f0c 100644 --- a/benchmarks/000.microbenchmarks/010.sleep/config.json +++ b/benchmarks/000.microbenchmarks/010.sleep/config.json @@ -1,6 +1,6 @@ { "timeout": 120, "memory": 128, - "languages": ["python", "nodejs"], + "languages": ["python", "nodejs", "java", "rust", "pypy"], "modules": [] } diff --git a/benchmarks/000.microbenchmarks/010.sleep/java/src/main/java/function/Function.java b/benchmarks/000.microbenchmarks/010.sleep/java/src/main/java/function/Function.java new file mode 100644 index 000000000..acd2b8f32 --- /dev/null +++ b/benchmarks/000.microbenchmarks/010.sleep/java/src/main/java/function/Function.java @@ -0,0 +1,33 @@ +package function; + +import java.util.HashMap; +import java.util.Map; + +public class Function { + + public Map handler(Map event) { + double sleepSeconds = parseSeconds(event.get("sleep")); + try { + Thread.sleep((long) (sleepSeconds * 1000)); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + Map result = new HashMap<>(); + result.put("result", sleepSeconds); + return result; + } + + private double parseSeconds(Object value) { + if (value instanceof Number) { + return ((Number) value).doubleValue(); + } + if (value instanceof String) { + try { + return Double.parseDouble((String) value); + } catch (NumberFormatException ignored) { + return 0; + } + } + return 0; + } +} diff --git a/benchmarks/000.microbenchmarks/010.sleep/pypy/function.py b/benchmarks/000.microbenchmarks/010.sleep/pypy/function.py new file mode 100644 index 000000000..7dda59a57 --- /dev/null +++ b/benchmarks/000.microbenchmarks/010.sleep/pypy/function.py @@ -0,0 +1,9 @@ + +from time import sleep + +def handler(event): + + # start timing + sleep_time = event.get('sleep') + sleep(sleep_time) + return { 'result': sleep_time } diff --git a/benchmarks/000.microbenchmarks/010.sleep/rust/.gitignore b/benchmarks/000.microbenchmarks/010.sleep/rust/.gitignore new file mode 100644 index 000000000..34b463310 --- /dev/null +++ b/benchmarks/000.microbenchmarks/010.sleep/rust/.gitignore @@ -0,0 +1,2 @@ +target/ +bootstrap diff --git a/benchmarks/000.microbenchmarks/010.sleep/rust/Cargo.toml b/benchmarks/000.microbenchmarks/010.sleep/rust/Cargo.toml new file mode 100644 index 000000000..67b85cb29 --- /dev/null +++ b/benchmarks/000.microbenchmarks/010.sleep/rust/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "sleep-benchmark" +version = "0.1.0" +edition = "2021" +rust-version = "1.88" + +# Note: This Cargo.toml only contains benchmark-specific dependencies. +# Wrapper dependencies (lambda_http, aws-sdk-*, etc.) are provided by the wrapper Cargo.toml +# and will be merged during the build process. + +[dependencies] +# Benchmark-specific dependencies only +# serde is already in wrapper, but we can override features if needed +serde = { version = "1.0", features = ["derive"] } diff --git a/benchmarks/000.microbenchmarks/010.sleep/rust/src/function.rs b/benchmarks/000.microbenchmarks/010.sleep/rust/src/function.rs new file mode 100644 index 000000000..64a34cd3c --- /dev/null +++ b/benchmarks/000.microbenchmarks/010.sleep/rust/src/function.rs @@ -0,0 +1,24 @@ +use serde::{Deserialize, Serialize}; +use std::thread; +use std::time::Duration; + +#[derive(Deserialize)] +pub struct RequestPayload { + pub sleep: Option, +} + +#[derive(Serialize)] +pub struct FunctionResponse { + pub result: f64, +} + +pub fn handler(event: RequestPayload) -> FunctionResponse { + let sleep_time = event.sleep.unwrap_or(0.0); + if sleep_time > 0.0 { + thread::sleep(Duration::from_secs_f64(sleep_time)); + } + + FunctionResponse { + result: sleep_time, + } +} diff --git a/benchmarks/000.microbenchmarks/020.network-benchmark/config.json b/benchmarks/000.microbenchmarks/020.network-benchmark/config.json index c3c2c73b1..4011ea075 100644 --- a/benchmarks/000.microbenchmarks/020.network-benchmark/config.json +++ b/benchmarks/000.microbenchmarks/020.network-benchmark/config.json @@ -1,6 +1,6 @@ { "timeout": 30, "memory": 128, - "languages": ["python"], - "modules": [] + "languages": ["python", "pypy"], + "modules": ["storage"] } diff --git a/benchmarks/000.microbenchmarks/020.network-benchmark/pypy/function.py b/benchmarks/000.microbenchmarks/020.network-benchmark/pypy/function.py new file mode 100644 index 000000000..44398b7bb --- /dev/null +++ b/benchmarks/000.microbenchmarks/020.network-benchmark/pypy/function.py @@ -0,0 +1,58 @@ +import csv +import json +import os.path +import socket +from datetime import datetime +from time import sleep + +import storage + +def handler(event): + + request_id = event['request-id'] + address = event['server-address'] + port = event['server-port'] + repetitions = event['repetitions'] + output_bucket = event.get('bucket').get('bucket') + output_prefix = event.get('bucket').get('output') + times = [] + i = 0 + socket.setdefaulttimeout(3) + server_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + server_socket.bind(('', 0)) + message = request_id.encode('utf-8') + adr = (address, port) + consecutive_failures = 0 + while i < repetitions + 1: + try: + send_begin = datetime.now().timestamp() + server_socket.sendto(message, adr) + msg, addr = server_socket.recvfrom(1024) + recv_end = datetime.now().timestamp() + except socket.timeout: + i += 1 + consecutive_failures += 1 + if consecutive_failures == 5: + print("Can't setup the connection") + break + continue + if i > 0: + times.append([i, send_begin, recv_end]) + i += 1 + consecutive_failures = 0 + server_socket.settimeout(2) + server_socket.close() + + if consecutive_failures != 5: + with open('/tmp/data.csv', 'w', newline='') as csvfile: + writer = csv.writer(csvfile, delimiter=',') + writer.writerow(["id", "client_send", "client_rcv"]) + for row in times: + writer.writerow(row) + + client = storage.storage.get_instance() + filename = 'results-{}.csv'.format(request_id) + key = client.upload(output_bucket, os.path.join(output_prefix, filename), '/tmp/data.csv') + + return { 'result': key } diff --git a/benchmarks/000.microbenchmarks/030.clock-synchronization/config.json b/benchmarks/000.microbenchmarks/030.clock-synchronization/config.json index c3c2c73b1..4011ea075 100644 --- a/benchmarks/000.microbenchmarks/030.clock-synchronization/config.json +++ b/benchmarks/000.microbenchmarks/030.clock-synchronization/config.json @@ -1,6 +1,6 @@ { "timeout": 30, "memory": 128, - "languages": ["python"], - "modules": [] + "languages": ["python", "pypy"], + "modules": ["storage"] } diff --git a/benchmarks/000.microbenchmarks/030.clock-synchronization/pypy/function.py b/benchmarks/000.microbenchmarks/030.clock-synchronization/pypy/function.py new file mode 100644 index 000000000..c3f3f3934 --- /dev/null +++ b/benchmarks/000.microbenchmarks/030.clock-synchronization/pypy/function.py @@ -0,0 +1,74 @@ +import csv +import json +import os +import socket +from datetime import datetime +from time import sleep + +import storage + +def handler(event): + + request_id = event['request-id'] + address = event['server-address'] + port = event['server-port'] + repetitions = event['repetitions'] + output_bucket = event.get('bucket').get('bucket') + output_prefix = event.get('bucket').get('output') + times = [] + print("Starting communication with {}:{}".format(address, port)) + i = 0 + socket.setdefaulttimeout(4) + server_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + server_socket.bind(('', 0)) + message = request_id.encode('utf-8') + adr = (address, port) + consecutive_failures = 0 + measurements_not_smaller = 0 + cur_min = 0 + while i < 1000: + try: + send_begin = datetime.now().timestamp() + server_socket.sendto(message, adr) + msg, addr = server_socket.recvfrom(1024) + recv_end = datetime.now().timestamp() + except socket.timeout: + i += 1 + consecutive_failures += 1 + if consecutive_failures == 7: + print("Can't setup the connection") + break + continue + if i > 0: + times.append([i, send_begin, recv_end]) + cur_time = recv_end - send_begin + print("Time {} Min Time {} NotSmaller {}".format(cur_time, cur_min, measurements_not_smaller)) + if cur_time > cur_min and cur_min > 0: + measurements_not_smaller += 1 + if measurements_not_smaller == repetitions: + message = "stop".encode('utf-8') + server_socket.sendto(message, adr) + break + else: + cur_min = cur_time + measurements_not_smaller = 0 + i += 1 + consecutive_failures = 0 + server_socket.settimeout(4) + server_socket.close() + + if consecutive_failures != 5: + with open('/tmp/data.csv', 'w', newline='') as csvfile: + writer = csv.writer(csvfile, delimiter=',') + writer.writerow(["id", "client_send", "client_rcv"]) + for row in times: + writer.writerow(row) + + client = storage.storage.get_instance() + filename = 'results-{}.csv'.format(request_id) + key = client.upload(output_bucket, os.path.join(output_prefix, filename), '/tmp/data.csv') + else: + key = None + + return { 'result': {'bucket-key': key, 'timestamp': event['income-timestamp']} } diff --git a/benchmarks/000.microbenchmarks/030.clock-synchronization/pypy/init.sh b/benchmarks/000.microbenchmarks/030.clock-synchronization/pypy/init.sh new file mode 100755 index 000000000..ecf0aff65 --- /dev/null +++ b/benchmarks/000.microbenchmarks/030.clock-synchronization/pypy/init.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +DIR=$1 +VERBOSE=$2 +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +cp ${SCRIPT_DIR}/file ${DIR} diff --git a/benchmarks/000.microbenchmarks/040.server-reply/config.json b/benchmarks/000.microbenchmarks/040.server-reply/config.json index 93ce2f561..53f6349d6 100644 --- a/benchmarks/000.microbenchmarks/040.server-reply/config.json +++ b/benchmarks/000.microbenchmarks/040.server-reply/config.json @@ -1,6 +1,6 @@ { "timeout": 120, "memory": 128, - "languages": ["python", "nodejs"], + "languages": ["python", "nodejs", "pypy"], "modules": [] } diff --git a/benchmarks/000.microbenchmarks/040.server-reply/pypy/function.py b/benchmarks/000.microbenchmarks/040.server-reply/pypy/function.py new file mode 100644 index 000000000..fb5b57aa3 --- /dev/null +++ b/benchmarks/000.microbenchmarks/040.server-reply/pypy/function.py @@ -0,0 +1,13 @@ + +import socket +from time import sleep + +def handler(event): + + # start timing + addr = (event.get('ip-address'), event.get('port')) + socket.setdefaulttimeout(20) + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.connect(addr) + msg = s.recv(1024).decode() + return {"result": msg} diff --git a/benchmarks/100.webapps/110.dynamic-html/config.json b/benchmarks/100.webapps/110.dynamic-html/config.json index 25254c247..cdeb1aa30 100644 --- a/benchmarks/100.webapps/110.dynamic-html/config.json +++ b/benchmarks/100.webapps/110.dynamic-html/config.json @@ -1,6 +1,6 @@ { "timeout": 10, "memory": 128, - "languages": ["python", "nodejs"], + "languages": ["python", "nodejs", "java", "rust", "pypy"], "modules": [] } diff --git a/benchmarks/100.webapps/110.dynamic-html/java/init.sh b/benchmarks/100.webapps/110.dynamic-html/java/init.sh new file mode 100755 index 000000000..b26574290 --- /dev/null +++ b/benchmarks/100.webapps/110.dynamic-html/java/init.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +OUTPUT_DIR=$1 + +# Copy templates directory to the output directory +cp -r templates "$OUTPUT_DIR/" diff --git a/benchmarks/100.webapps/110.dynamic-html/java/pom.xml b/benchmarks/100.webapps/110.dynamic-html/java/pom.xml new file mode 100644 index 000000000..d46a81c75 --- /dev/null +++ b/benchmarks/100.webapps/110.dynamic-html/java/pom.xml @@ -0,0 +1,83 @@ + + + 4.0.0 + function + dynamic-html + 1.0 + + 17 + 17 + UTF-8 + + + + + com.github.spullara.mustache.java + compiler + 0.9.10 + + + + com.amazonaws + aws-lambda-java-core + 1.2.3 + + + + com.microsoft.azure.functions + azure-functions-java-library + 3.0.0 + + + + com.fasterxml.jackson.core + jackson-databind + 2.17.1 + + + + function + + + ${project.basedir}/templates + templates + + **/*.html + + + + + + org.apache.maven.plugins + maven-shade-plugin + 3.5.1 + + + package + + shade + + + false + + + *:* + + META-INF/*.SF + META-INF/*.RSA + META-INF/*.DSA + module-info.class + META-INF/versions/*/module-info.class + META-INF/versions/**/module-info.class + + + + + + + + + + diff --git a/benchmarks/100.webapps/110.dynamic-html/java/src/main/java/function/Function.java b/benchmarks/100.webapps/110.dynamic-html/java/src/main/java/function/Function.java new file mode 100644 index 000000000..20b38f474 --- /dev/null +++ b/benchmarks/100.webapps/110.dynamic-html/java/src/main/java/function/Function.java @@ -0,0 +1,100 @@ +package function; + +import com.github.mustachejava.DefaultMustacheFactory; +import com.github.mustachejava.Mustache; +import com.github.mustachejava.MustacheFactory; + +import java.io.*; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.*; + +public class Function { + + private static final DateTimeFormatter DATE_FORMATTER = + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); + + public Map handler(Map event) { + try { + // Get input parameters + String username = (String) event.getOrDefault("username", "Guest"); + int randomLen = parseRandomLen(event.get("random_len")); + + // Generate random numbers + List randomNumbers = generateRandomNumbers(randomLen); + + // Get current time + String currentTime = LocalDateTime.now().format(DATE_FORMATTER); + + // Prepare template data + Map templateData = new HashMap<>(); + templateData.put("username", username); + templateData.put("cur_time", currentTime); + templateData.put("random_numbers", randomNumbers); + + // Render HTML + String html = renderTemplate(templateData); + + // Return result + Map result = new HashMap<>(); + result.put("result", html); + return result; + + } catch (Exception e) { + // Return error as result to avoid crashing + Map result = new HashMap<>(); + StringWriter sw = new StringWriter(); + PrintWriter pw = new PrintWriter(sw); + e.printStackTrace(pw); + result.put("result", "

Error

" + 
+                      sw.toString() + "
"); + return result; + } + } + + private int parseRandomLen(Object value) { + if (value instanceof Number) { + return ((Number) value).intValue(); + } + if (value instanceof String) { + try { + return Integer.parseInt((String) value); + } catch (NumberFormatException e) { + return 10; // default + } + } + return 10; // default + } + + private List generateRandomNumbers(int count) { + Random random = new Random(); + List numbers = new ArrayList<>(count); + for (int i = 0; i < count; i++) { + numbers.add(random.nextInt(1000000)); + } + return numbers; + } + + private String renderTemplate(Map data) throws Exception { + // Try to load template from classpath + InputStream templateStream = getClass().getClassLoader() + .getResourceAsStream("templates/template.html"); + + if (templateStream == null) { + throw new IOException("Template not found in classpath"); + } + + // Create Mustache factory and compile template + MustacheFactory mf = new DefaultMustacheFactory(); + Mustache mustache; + + try (InputStreamReader reader = new InputStreamReader(templateStream)) { + mustache = mf.compile(reader, "template"); + } + + // Render template + StringWriter writer = new StringWriter(); + mustache.execute(writer, data).flush(); + return writer.toString(); + } +} diff --git a/benchmarks/100.webapps/110.dynamic-html/java/templates/template.html b/benchmarks/100.webapps/110.dynamic-html/java/templates/template.html new file mode 100644 index 000000000..46199563c --- /dev/null +++ b/benchmarks/100.webapps/110.dynamic-html/java/templates/template.html @@ -0,0 +1,26 @@ + + + + Randomly generated data. + + + + + +
+

Welcome {{username}}!

+

Data generated at: {{cur_time}}!

+

Requested random numbers:

+
    + {{#random_numbers}} +
  • {{.}}
  • + {{/random_numbers}} +
+
+ + diff --git a/benchmarks/100.webapps/110.dynamic-html/pypy/function.py b/benchmarks/100.webapps/110.dynamic-html/pypy/function.py new file mode 100644 index 000000000..7c990f4eb --- /dev/null +++ b/benchmarks/100.webapps/110.dynamic-html/pypy/function.py @@ -0,0 +1,22 @@ +from datetime import datetime +from random import sample +from os import path +from time import time +import os + +from jinja2 import Template + +SCRIPT_DIR = path.abspath(path.join(path.dirname(__file__))) + +def handler(event): + + # start timing + name = event.get('username') + size = event.get('random_len') + cur_time = datetime.now() + random_numbers = sample(range(0, 1000000), size) + template = Template( open(path.join(SCRIPT_DIR, 'templates', 'template.html'), 'r').read()) + html = template.render(username = name, cur_time = cur_time, random_numbers = random_numbers) + # end timing + # dump stats + return {'result': html} diff --git a/benchmarks/100.webapps/110.dynamic-html/pypy/init.sh b/benchmarks/100.webapps/110.dynamic-html/pypy/init.sh new file mode 100755 index 000000000..7b047bff1 --- /dev/null +++ b/benchmarks/100.webapps/110.dynamic-html/pypy/init.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +DIR=$1 +VERBOSE=$2 +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +path="${SCRIPT_DIR}/templates/" +if [ "$VERBOSE" = true ]; then + echo "Update ${DIR} with static templates ${path}" +fi +cp -r ${SCRIPT_DIR}/templates ${DIR} diff --git a/benchmarks/100.webapps/110.dynamic-html/pypy/requirements.txt b/benchmarks/100.webapps/110.dynamic-html/pypy/requirements.txt new file mode 100644 index 000000000..5ca569440 --- /dev/null +++ b/benchmarks/100.webapps/110.dynamic-html/pypy/requirements.txt @@ -0,0 +1 @@ +jinja2>=2.10.3 diff --git a/benchmarks/100.webapps/110.dynamic-html/pypy/templates/template.html b/benchmarks/100.webapps/110.dynamic-html/pypy/templates/template.html new file mode 100644 index 000000000..d4a11f019 --- /dev/null +++ b/benchmarks/100.webapps/110.dynamic-html/pypy/templates/template.html @@ -0,0 +1,26 @@ + + + + Randomly generated data. + + + + + +
+

Welcome {{username}}!

+

Data generated at: {{cur_time}}!

+

Requested random numbers:

+
    + {% for n in random_numbers %} +
  • {{n}}
  • + {% endfor %} +
+
+ + diff --git a/benchmarks/100.webapps/110.dynamic-html/rust/Cargo.toml b/benchmarks/100.webapps/110.dynamic-html/rust/Cargo.toml new file mode 100644 index 000000000..02fb138eb --- /dev/null +++ b/benchmarks/100.webapps/110.dynamic-html/rust/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "dynamic-html-benchmark" +version = "0.1.0" +edition = "2021" +rust-version = "1.88" + +# Note: This Cargo.toml only contains benchmark-specific dependencies. +# Wrapper dependencies (lambda_http, aws-sdk-*, etc.) are provided by the wrapper Cargo.toml +# and will be merged during the build process. + +[dependencies] +# Benchmark-specific dependencies only +serde = { version = "1.0", features = ["derive"] } +rand = "0.8" +chrono = "0.4" + diff --git a/benchmarks/100.webapps/110.dynamic-html/rust/src/function.rs b/benchmarks/100.webapps/110.dynamic-html/rust/src/function.rs new file mode 100644 index 000000000..e644f12ce --- /dev/null +++ b/benchmarks/100.webapps/110.dynamic-html/rust/src/function.rs @@ -0,0 +1,52 @@ +use serde::{Deserialize, Serialize}; + +#[derive(Deserialize)] +pub struct RequestPayload { + pub username: String, + pub random_len: usize, +} + +#[derive(Serialize)] +pub struct FunctionResponse { + pub result: String, +} + +pub fn handler(event: RequestPayload) -> FunctionResponse { + // Generate random numbers + use rand::Rng; + let mut rng = rand::thread_rng(); + let random_numbers: Vec = (0..event.random_len) + .map(|_| rng.gen_range(0..1_000_000)) + .collect(); + + // Get current time formatted as locale string + use chrono::Local; + let cur_time = Local::now().format("%Y-%m-%d %H:%M:%S").to_string(); + + // Use embedded template (compiled into binary) + // This is more reliable than reading from filesystem in Lambda + let template_content = include_str!("templates/template.html"); + + // Simple template rendering (replace placeholders) + // Generate list items for random numbers + let list_items: String = random_numbers + .iter() + .map(|n| format!("
  • {}
  • ", n)) + .collect::>() + .join("\n"); + + // Replace template variables + let html = template_content + .replace("{{username}}", &event.username) + .replace("{{cur_time}}", &cur_time) + // Replace the entire loop block with generated list items + .replace( + " {% for n in random_numbers %}\n
  • {{n}}
  • \n {% endfor %}", + &list_items, + ); + + FunctionResponse { + result: html, + } +} + diff --git a/benchmarks/100.webapps/110.dynamic-html/rust/src/templates/template.html b/benchmarks/100.webapps/110.dynamic-html/rust/src/templates/template.html new file mode 100644 index 000000000..284499ded --- /dev/null +++ b/benchmarks/100.webapps/110.dynamic-html/rust/src/templates/template.html @@ -0,0 +1,27 @@ + + + + Randomly generated data. + + + + + +
    +

    Welcome {{username}}!

    +

    Data generated at: {{cur_time}}!

    +

    Requested random numbers:

    +
      + {% for n in random_numbers %} +
    • {{n}}
    • + {% endfor %} +
    +
    + + + diff --git a/benchmarks/100.webapps/110.dynamic-html/rust/templates/template.html b/benchmarks/100.webapps/110.dynamic-html/rust/templates/template.html new file mode 100644 index 000000000..284499ded --- /dev/null +++ b/benchmarks/100.webapps/110.dynamic-html/rust/templates/template.html @@ -0,0 +1,27 @@ + + + + Randomly generated data. + + + + + +
    +

    Welcome {{username}}!

    +

    Data generated at: {{cur_time}}!

    +

    Requested random numbers:

    +
      + {% for n in random_numbers %} +
    • {{n}}
    • + {% endfor %} +
    +
    + + + diff --git a/benchmarks/100.webapps/120.uploader/config.json b/benchmarks/100.webapps/120.uploader/config.json index cbc635670..90bf42e3b 100644 --- a/benchmarks/100.webapps/120.uploader/config.json +++ b/benchmarks/100.webapps/120.uploader/config.json @@ -1,6 +1,6 @@ { "timeout": 30, "memory": 128, - "languages": ["python", "nodejs"], + "languages": ["python", "nodejs", "pypy"], "modules": ["storage"] } diff --git a/benchmarks/100.webapps/120.uploader/pypy/function.py b/benchmarks/100.webapps/120.uploader/pypy/function.py new file mode 100644 index 000000000..d032bbdb6 --- /dev/null +++ b/benchmarks/100.webapps/120.uploader/pypy/function.py @@ -0,0 +1,48 @@ + +import datetime +import os + +import urllib.request + +from . import storage +client = storage.storage.get_instance() + +SEBS_USER_AGENT = "SeBS/1.2 (https://github.com/spcl/serverless-benchmarks) SeBS Benchmark Suite/1.2" + +def handler(event): + + bucket = event.get('bucket').get('bucket') + output_prefix = event.get('bucket').get('output') + url = event.get('object').get('url') + name = os.path.basename(url) + download_path = '/tmp/{}'.format(name) + + process_begin = datetime.datetime.now() + req = urllib.request.Request(url) + req.add_header('User-Agent', SEBS_USER_AGENT) + with open(download_path, 'wb') as f: + with urllib.request.urlopen(req) as response: + f.write(response.read()) + size = os.path.getsize(download_path) + process_end = datetime.datetime.now() + + upload_begin = datetime.datetime.now() + key_name = client.upload(bucket, os.path.join(output_prefix, name), download_path) + upload_end = datetime.datetime.now() + + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1) + return { + 'result': { + 'bucket': bucket, + 'url': url, + 'key': key_name + }, + 'measurement': { + 'download_time': 0, + 'download_size': 0, + 'upload_time': upload_time, + 'upload_size': size, + 'compute_time': process_time + } + } diff --git a/benchmarks/100.webapps/120.uploader/pypy/requirements.txt b/benchmarks/100.webapps/120.uploader/pypy/requirements.txt new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/100.webapps/130.crud-api/config.json b/benchmarks/100.webapps/130.crud-api/config.json index 25c6cb05e..ba846d706 100644 --- a/benchmarks/100.webapps/130.crud-api/config.json +++ b/benchmarks/100.webapps/130.crud-api/config.json @@ -3,7 +3,8 @@ "memory": 128, "languages": [ "python", - "nodejs" + "nodejs", + "pypy" ], "modules": [ "nosql" diff --git a/benchmarks/100.webapps/130.crud-api/pypy/function.py b/benchmarks/100.webapps/130.crud-api/pypy/function.py new file mode 100644 index 000000000..0b5e0e8c0 --- /dev/null +++ b/benchmarks/100.webapps/130.crud-api/pypy/function.py @@ -0,0 +1,67 @@ +from . import nosql + +nosql_client = nosql.nosql.get_instance() + +nosql_table_name = "shopping_cart" + + +def add_product(cart_id: str, product_id: str, product_name: str, price: float, quantity: int): + + nosql_client.insert( + nosql_table_name, + ("cart_id", cart_id), + ("product_id", product_id), + {"price": price, "quantity": quantity, "name": product_name}, + ) + + +def get_products(cart_id: str, product_id: str): + return nosql_client.get(nosql_table_name, ("cart_id", cart_id), ("product_id", product_id)) + + +def query_products(cart_id: str): + + res = nosql_client.query( + nosql_table_name, + ("cart_id", cart_id), + "product_id", + ) + + products = [] + price_sum = 0 + quantity_sum = 0 + for product in res: + + products.append(product["name"]) + price_sum += product["price"] + quantity_sum += product["quantity"] + + avg_price = price_sum / quantity_sum if quantity_sum > 0 else 0.0 + + return {"products": products, "total_cost": price_sum, "avg_price": avg_price} + + +def handler(event): + + results = [] + + for request in event["requests"]: + + route = request["route"] + body = request["body"] + + if route == "PUT /cart": + add_product( + body["cart"], body["product_id"], body["name"], body["price"], body["quantity"] + ) + res = {} + elif route == "GET /cart/{id}": + res = get_products(body["cart"], request["path"]["id"]) + elif route == "GET /cart": + res = query_products(body["cart"]) + else: + raise RuntimeError(f"Unknown request route: {route}") + + results.append(res) + + return {"result": results} diff --git a/benchmarks/200.multimedia/210.thumbnailer/config.json b/benchmarks/200.multimedia/210.thumbnailer/config.json index 8edb99e52..c250fe15b 100644 --- a/benchmarks/200.multimedia/210.thumbnailer/config.json +++ b/benchmarks/200.multimedia/210.thumbnailer/config.json @@ -1,6 +1,6 @@ { "timeout": 60, "memory": 256, - "languages": ["python", "nodejs"], + "languages": ["python", "nodejs", "pypy"], "modules": ["storage"] } diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/README.md b/benchmarks/200.multimedia/210.thumbnailer/pypy/README.md new file mode 100755 index 000000000..fc6a75265 --- /dev/null +++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/README.md @@ -0,0 +1,12 @@ +# Image Processing + +A simple pipeline performing basic image operations with Pillow. + +[Inspired by AWS Lambda tutorial code.](https://docs.aws.amazon.com/lambda/latest/dg/with-s3-example-deployment-pkg.htm) + +### Instructions + +1. Deploy Docker container with function code and input data. + +2. Example of JSON payload: `{ "dir": "input_data", "id": "1" }'`. + diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/function.py b/benchmarks/200.multimedia/210.thumbnailer/pypy/function.py new file mode 100644 index 000000000..20527067b --- /dev/null +++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/function.py @@ -0,0 +1,70 @@ +import datetime +import io +import os +import sys +import uuid +from urllib.parse import unquote_plus +from PIL import Image + +from . import storage +client = storage.storage.get_instance() + +# Disk-based solution +#def resize_image(image_path, resized_path, w, h): +# with Image.open(image_path) as image: +# image.thumbnail((w,h)) +# image.save(resized_path) + +# Memory-based solution +def resize_image(image_bytes, w, h): + with Image.open(io.BytesIO(image_bytes)) as image: + image.thumbnail((w,h)) + out = io.BytesIO() + image.save(out, format='jpeg') + # necessary to rewind to the beginning of the buffer + out.seek(0) + return out + +def handler(event): + + bucket = event.get('bucket').get('bucket') + input_prefix = event.get('bucket').get('input') + output_prefix = event.get('bucket').get('output') + key = unquote_plus(event.get('object').get('key')) + width = event.get('object').get('width') + height = event.get('object').get('height') + # UUID to handle multiple calls + #download_path = '/tmp/{}-{}'.format(uuid.uuid4(), key) + #upload_path = '/tmp/resized-{}'.format(key) + #client.download(input_bucket, key, download_path) + #resize_image(download_path, upload_path, width, height) + #client.upload(output_bucket, key, upload_path) + download_begin = datetime.datetime.now() + img = client.download_stream(bucket, os.path.join(input_prefix, key)) + download_end = datetime.datetime.now() + + process_begin = datetime.datetime.now() + resized = resize_image(img, width, height) + resized_size = resized.getbuffer().nbytes + process_end = datetime.datetime.now() + + upload_begin = datetime.datetime.now() + key_name = client.upload_stream(bucket, os.path.join(output_prefix, key), resized) + upload_end = datetime.datetime.now() + + download_time = (download_end - download_begin) / datetime.timedelta(microseconds=1) + upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1) + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + return { + 'result': { + 'bucket': bucket, + 'key': key_name + }, + 'measurement': { + 'download_time': download_time, + 'download_size': len(img), + 'upload_time': upload_time, + 'upload_size': resized_size, + 'compute_time': process_time + } + } diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt new file mode 100644 index 000000000..f29e80646 --- /dev/null +++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt @@ -0,0 +1 @@ +Pillow>=8.0.0 diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.10 b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.10 new file mode 100644 index 000000000..9caa46c8d --- /dev/null +++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.10 @@ -0,0 +1 @@ +pillow==10.3.0 diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.11 b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.11 new file mode 100644 index 000000000..9caa46c8d --- /dev/null +++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.11 @@ -0,0 +1 @@ +pillow==10.3.0 diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.12 b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.12 new file mode 100644 index 000000000..9caa46c8d --- /dev/null +++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.12 @@ -0,0 +1 @@ +pillow==10.3.0 diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.6 b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.6 new file mode 100755 index 000000000..118ca689e --- /dev/null +++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.6 @@ -0,0 +1 @@ +Pillow==7.0.0 diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.7 b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.7 new file mode 100755 index 000000000..91d1b3192 --- /dev/null +++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.7 @@ -0,0 +1 @@ +Pillow==8.0.0 diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.8 b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.8 new file mode 100755 index 000000000..8da721c23 --- /dev/null +++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.8 @@ -0,0 +1 @@ +Pillow==9.0.0 diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.9 b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.9 new file mode 100755 index 000000000..8da721c23 --- /dev/null +++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.9 @@ -0,0 +1 @@ +Pillow==9.0.0 diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.arm.3.8 b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.arm.3.8 new file mode 100644 index 000000000..68ac1eb37 --- /dev/null +++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.arm.3.8 @@ -0,0 +1 @@ +Pillow==10.0.0 diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.arm.3.9 b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.arm.3.9 new file mode 100644 index 000000000..68ac1eb37 --- /dev/null +++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.arm.3.9 @@ -0,0 +1 @@ +Pillow==10.0.0 diff --git a/benchmarks/200.multimedia/220.video-processing/config.json b/benchmarks/200.multimedia/220.video-processing/config.json index 94ede7925..8e00a88e2 100644 --- a/benchmarks/200.multimedia/220.video-processing/config.json +++ b/benchmarks/200.multimedia/220.video-processing/config.json @@ -1,6 +1,6 @@ { "timeout": 60, "memory": 512, - "languages": ["python"], + "languages": ["python", "pypy"], "modules": ["storage"] } diff --git a/benchmarks/200.multimedia/220.video-processing/pypy/function.py b/benchmarks/200.multimedia/220.video-processing/pypy/function.py new file mode 100644 index 000000000..af5c09a4d --- /dev/null +++ b/benchmarks/200.multimedia/220.video-processing/pypy/function.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python + +import datetime +import os +import stat +import subprocess + + +import storage +client = storage.storage.get_instance() + +SCRIPT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__))) + +def call_ffmpeg(args): + ret = subprocess.run([os.path.join(SCRIPT_DIR, 'ffmpeg', 'ffmpeg'), '-y'] + args, + #subprocess might inherit Lambda's input for some reason + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ) + if ret.returncode != 0: + print('Invocation of ffmpeg failed!') + print('Out: ', ret.stdout.decode('utf-8')) + raise RuntimeError() + +# https://superuser.com/questions/556029/how-do-i-convert-a-video-to-gif-using-ffmpeg-with-reasonable-quality +def to_gif(video, duration, event): + output = '/tmp/processed-{}.gif'.format(os.path.basename(video)) + call_ffmpeg(["-i", video, + "-t", + "{0}".format(duration), + "-vf", + "fps=10,scale=320:-1:flags=lanczos,split[s0][s1];[s0]palettegen[p];[s1][p]paletteuse", + "-loop", "0", + output]) + return output + +# https://devopstar.com/2019/01/28/serverless-watermark-using-aws-lambda-layers-ffmpeg/ +def watermark(video, duration, event): + output = '/tmp/processed-{}'.format(os.path.basename(video)) + watermark_file = os.path.dirname(os.path.realpath(__file__)) + call_ffmpeg([ + "-i", video, + "-i", os.path.join(watermark_file, os.path.join('resources', 'watermark.png')), + "-t", "{0}".format(duration), + "-filter_complex", "overlay=main_w/2-overlay_w/2:main_h/2-overlay_h/2", + output]) + return output + +def transcode_mp3(video, duration, event): + pass + +operations = { 'transcode' : transcode_mp3, 'extract-gif' : to_gif, 'watermark' : watermark } + +def handler(event): + + bucket = event.get('bucket').get('bucket') + input_prefix = event.get('bucket').get('input') + output_prefix = event.get('bucket').get('output') + key = event.get('object').get('key') + duration = event.get('object').get('duration') + op = event.get('object').get('op') + download_path = '/tmp/{}'.format(key) + + # Restore executable permission + ffmpeg_binary = os.path.join(SCRIPT_DIR, 'ffmpeg', 'ffmpeg') + # needed on Azure but read-only filesystem on AWS + try: + st = os.stat(ffmpeg_binary) + os.chmod(ffmpeg_binary, st.st_mode | stat.S_IEXEC) + except OSError: + pass + + download_begin = datetime.datetime.now() + client.download(bucket, os.path.join(input_prefix, key), download_path) + download_size = os.path.getsize(download_path) + download_stop = datetime.datetime.now() + + process_begin = datetime.datetime.now() + upload_path = operations[op](download_path, duration, event) + process_end = datetime.datetime.now() + + upload_begin = datetime.datetime.now() + filename = os.path.basename(upload_path) + upload_size = os.path.getsize(upload_path) + upload_key = client.upload(bucket, os.path.join(output_prefix, filename), upload_path) + upload_stop = datetime.datetime.now() + + download_time = (download_stop - download_begin) / datetime.timedelta(microseconds=1) + upload_time = (upload_stop - upload_begin) / datetime.timedelta(microseconds=1) + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + return { + 'result': { + 'bucket': bucket, + 'key': upload_key + }, + 'measurement': { + 'download_time': download_time, + 'download_size': download_size, + 'upload_time': upload_time, + 'upload_size': upload_size, + 'compute_time': process_time + } + } diff --git a/benchmarks/200.multimedia/220.video-processing/pypy/init.sh b/benchmarks/200.multimedia/220.video-processing/pypy/init.sh new file mode 100755 index 000000000..de6048a36 --- /dev/null +++ b/benchmarks/200.multimedia/220.video-processing/pypy/init.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +DIR=$1 +VERBOSE=$2 +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +path="${SCRIPT_DIR}/../init.sh" +if [ "$VERBOSE" = true ]; then + echo "Update ${DIR} with init script ${path}" +fi +bash ${path} ${DIR} ${VERBOSE} diff --git a/benchmarks/200.multimedia/220.video-processing/pypy/requirements.txt b/benchmarks/200.multimedia/220.video-processing/pypy/requirements.txt new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/300.utilities/311.compression/config.json b/benchmarks/300.utilities/311.compression/config.json index 8edb99e52..c250fe15b 100644 --- a/benchmarks/300.utilities/311.compression/config.json +++ b/benchmarks/300.utilities/311.compression/config.json @@ -1,6 +1,6 @@ { "timeout": 60, "memory": 256, - "languages": ["python", "nodejs"], + "languages": ["python", "nodejs", "pypy"], "modules": ["storage"] } diff --git a/benchmarks/300.utilities/311.compression/pypy/README.md b/benchmarks/300.utilities/311.compression/pypy/README.md new file mode 100755 index 000000000..fc6a75265 --- /dev/null +++ b/benchmarks/300.utilities/311.compression/pypy/README.md @@ -0,0 +1,12 @@ +# Image Processing + +A simple pipeline performing basic image operations with Pillow. + +[Inspired by AWS Lambda tutorial code.](https://docs.aws.amazon.com/lambda/latest/dg/with-s3-example-deployment-pkg.htm) + +### Instructions + +1. Deploy Docker container with function code and input data. + +2. Example of JSON payload: `{ "dir": "input_data", "id": "1" }'`. + diff --git a/benchmarks/300.utilities/311.compression/pypy/function.py b/benchmarks/300.utilities/311.compression/pypy/function.py new file mode 100644 index 000000000..67aee8ca4 --- /dev/null +++ b/benchmarks/300.utilities/311.compression/pypy/function.py @@ -0,0 +1,58 @@ +import datetime +import io +import os +import shutil +import uuid +import zlib + +from . import storage +client = storage.storage.get_instance() + +def parse_directory(directory): + + size = 0 + for root, dirs, files in os.walk(directory): + for file in files: + size += os.path.getsize(os.path.join(root, file)) + return size + +def handler(event): + + bucket = event.get('bucket').get('bucket') + input_prefix = event.get('bucket').get('input') + output_prefix = event.get('bucket').get('output') + key = event.get('object').get('key') + download_path = '/tmp/{}-{}'.format(key, uuid.uuid4()) + os.makedirs(download_path) + + s3_download_begin = datetime.datetime.now() + client.download_directory(bucket, os.path.join(input_prefix, key), download_path) + s3_download_stop = datetime.datetime.now() + size = parse_directory(download_path) + + compress_begin = datetime.datetime.now() + shutil.make_archive(os.path.join(download_path, key), 'zip', root_dir=download_path) + compress_end = datetime.datetime.now() + + s3_upload_begin = datetime.datetime.now() + archive_name = '{}.zip'.format(key) + archive_size = os.path.getsize(os.path.join(download_path, archive_name)) + key_name = client.upload(bucket, os.path.join(output_prefix, archive_name), os.path.join(download_path, archive_name)) + s3_upload_stop = datetime.datetime.now() + + download_time = (s3_download_stop - s3_download_begin) / datetime.timedelta(microseconds=1) + upload_time = (s3_upload_stop - s3_upload_begin) / datetime.timedelta(microseconds=1) + process_time = (compress_end - compress_begin) / datetime.timedelta(microseconds=1) + return { + 'result': { + 'bucket': bucket, + 'key': key_name + }, + 'measurement': { + 'download_time': download_time, + 'download_size': size, + 'upload_time': upload_time, + 'upload_size': archive_size, + 'compute_time': process_time + } + } diff --git a/benchmarks/300.utilities/311.compression/pypy/requirements.txt b/benchmarks/300.utilities/311.compression/pypy/requirements.txt new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/500.scientific/501.graph-pagerank/config.json b/benchmarks/500.scientific/501.graph-pagerank/config.json index e80fb4351..eaf5a60b4 100644 --- a/benchmarks/500.scientific/501.graph-pagerank/config.json +++ b/benchmarks/500.scientific/501.graph-pagerank/config.json @@ -1,6 +1,6 @@ { "timeout": 120, "memory": 512, - "languages": ["python"], + "languages": ["python", "pypy"], "modules": [] -} +} \ No newline at end of file diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/function.py b/benchmarks/500.scientific/501.graph-pagerank/pypy/function.py new file mode 100644 index 000000000..0e462e9b4 --- /dev/null +++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/function.py @@ -0,0 +1,29 @@ +import datetime +import igraph + +def handler(event): + + size = event.get('size') + if "seed" in event: + import random + + random.seed(event["seed"]) + + graph_generating_begin = datetime.datetime.now() + graph = igraph.Graph.Barabasi(size, 10) + graph_generating_end = datetime.datetime.now() + + process_begin = datetime.datetime.now() + result = graph.pagerank() + process_end = datetime.datetime.now() + + graph_generating_time = (graph_generating_end - graph_generating_begin) / datetime.timedelta(microseconds=1) + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + + return { + 'result': result[0], + 'measurement': { + 'graph_generating_time': graph_generating_time, + 'compute_time': process_time + } + } diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt new file mode 100644 index 000000000..b6287cf18 --- /dev/null +++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt @@ -0,0 +1 @@ +python-igraph>=0.9.0 diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.10 b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.10 new file mode 100644 index 000000000..e291b7b39 --- /dev/null +++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.10 @@ -0,0 +1 @@ +igraph==0.11.4 diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.11 b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.11 new file mode 100644 index 000000000..e291b7b39 --- /dev/null +++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.11 @@ -0,0 +1 @@ +igraph==0.11.4 diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.12 b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.12 new file mode 100644 index 000000000..e291b7b39 --- /dev/null +++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.12 @@ -0,0 +1 @@ +igraph==0.11.4 diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.7 b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.7 new file mode 100755 index 000000000..4e4d562fd --- /dev/null +++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.7 @@ -0,0 +1 @@ +python-igraph==0.8.0 diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.8 b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.8 new file mode 100755 index 000000000..4e4d562fd --- /dev/null +++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.8 @@ -0,0 +1 @@ +python-igraph==0.8.0 diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.9 b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.9 new file mode 100755 index 000000000..0918761fe --- /dev/null +++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.9 @@ -0,0 +1 @@ +python-igraph==0.9.0 diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.arm.3.8 b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.arm.3.8 new file mode 100644 index 000000000..398b70edc --- /dev/null +++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.arm.3.8 @@ -0,0 +1 @@ +python-igraph==0.11.4 diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.arm.3.9 b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.arm.3.9 new file mode 100644 index 000000000..398b70edc --- /dev/null +++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.arm.3.9 @@ -0,0 +1 @@ +python-igraph==0.11.4 diff --git a/benchmarks/500.scientific/502.graph-mst/config.json b/benchmarks/500.scientific/502.graph-mst/config.json index e80fb4351..a97e13cf5 100644 --- a/benchmarks/500.scientific/502.graph-mst/config.json +++ b/benchmarks/500.scientific/502.graph-mst/config.json @@ -1,6 +1,6 @@ { "timeout": 120, "memory": 512, - "languages": ["python"], + "languages": ["python", "pypy"], "modules": [] } diff --git a/benchmarks/500.scientific/502.graph-mst/pypy/function.py b/benchmarks/500.scientific/502.graph-mst/pypy/function.py new file mode 100644 index 000000000..b63fbdce2 --- /dev/null +++ b/benchmarks/500.scientific/502.graph-mst/pypy/function.py @@ -0,0 +1,29 @@ +import datetime +import igraph + +def handler(event): + + size = event.get('size') + if "seed" in event: + import random + + random.seed(event["seed"]) + + graph_generating_begin = datetime.datetime.now() + graph = igraph.Graph.Barabasi(size, 10) + graph_generating_end = datetime.datetime.now() + + process_begin = datetime.datetime.now() + result = graph.spanning_tree(None, False) + process_end = datetime.datetime.now() + + graph_generating_time = (graph_generating_end - graph_generating_begin) / datetime.timedelta(microseconds=1) + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + + return { + 'result': result[0], + 'measurement': { + 'graph_generating_time': graph_generating_time, + 'compute_time': process_time + } + } diff --git a/benchmarks/500.scientific/502.graph-mst/pypy/requirements.txt b/benchmarks/500.scientific/502.graph-mst/pypy/requirements.txt new file mode 100644 index 000000000..b6287cf18 --- /dev/null +++ b/benchmarks/500.scientific/502.graph-mst/pypy/requirements.txt @@ -0,0 +1 @@ +python-igraph>=0.9.0 diff --git a/benchmarks/500.scientific/503.graph-bfs/config.json b/benchmarks/500.scientific/503.graph-bfs/config.json index e80fb4351..a97e13cf5 100644 --- a/benchmarks/500.scientific/503.graph-bfs/config.json +++ b/benchmarks/500.scientific/503.graph-bfs/config.json @@ -1,6 +1,6 @@ { "timeout": 120, "memory": 512, - "languages": ["python"], + "languages": ["python", "pypy"], "modules": [] } diff --git a/benchmarks/500.scientific/503.graph-bfs/pypy/function.py b/benchmarks/500.scientific/503.graph-bfs/pypy/function.py new file mode 100644 index 000000000..18423ae1a --- /dev/null +++ b/benchmarks/500.scientific/503.graph-bfs/pypy/function.py @@ -0,0 +1,29 @@ +import datetime +import igraph + +def handler(event): + + size = event.get('size') + if "seed" in event: + import random + + random.seed(event["seed"]) + + graph_generating_begin = datetime.datetime.now() + graph = igraph.Graph.Barabasi(size, 10) + graph_generating_end = datetime.datetime.now() + + process_begin = datetime.datetime.now() + result = graph.bfs(0) + process_end = datetime.datetime.now() + + graph_generating_time = (graph_generating_end - graph_generating_begin) / datetime.timedelta(microseconds=1) + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + + return { + 'result': result, + 'measurement': { + 'graph_generating_time': graph_generating_time, + 'compute_time': process_time + } + } diff --git a/benchmarks/500.scientific/503.graph-bfs/pypy/requirements.txt b/benchmarks/500.scientific/503.graph-bfs/pypy/requirements.txt new file mode 100644 index 000000000..b6287cf18 --- /dev/null +++ b/benchmarks/500.scientific/503.graph-bfs/pypy/requirements.txt @@ -0,0 +1 @@ +python-igraph>=0.9.0 diff --git a/benchmarks/wrappers/aws/java/pom.xml b/benchmarks/wrappers/aws/java/pom.xml new file mode 100644 index 000000000..f4d083216 --- /dev/null +++ b/benchmarks/wrappers/aws/java/pom.xml @@ -0,0 +1,55 @@ + + 4.0.0 + org.serverlessbench + function + 1.0.0 + + 17 + 17 + + + + com.amazonaws + aws-lambda-java-core + 1.2.3 + + + com.fasterxml.jackson.core + jackson-databind + 2.17.1 + + + + function + + + org.apache.maven.plugins + maven-shade-plugin + 3.5.1 + + + package + + shade + + + false + + + *:* + + META-INF/*.SF + META-INF/*.RSA + META-INF/*.DSA + + + + + + + + + + diff --git a/benchmarks/wrappers/aws/java/src/main/java/org/serverlessbench/FunctionInvoker.java b/benchmarks/wrappers/aws/java/src/main/java/org/serverlessbench/FunctionInvoker.java new file mode 100644 index 000000000..7d9c83570 --- /dev/null +++ b/benchmarks/wrappers/aws/java/src/main/java/org/serverlessbench/FunctionInvoker.java @@ -0,0 +1,41 @@ +package org.serverlessbench; + +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.HashMap; +import java.util.Map; + +final class FunctionInvoker { + + private static final String DEFAULT_CLASS = "function.Function"; + private static final String DEFAULT_METHOD = "handler"; + + private FunctionInvoker() {} + + static Map invoke(Map input) { + try { + Class fnClass = Class.forName(DEFAULT_CLASS); + Object instance = fnClass.getDeclaredConstructor().newInstance(); + Method method = fnClass.getMethod(DEFAULT_METHOD, Map.class); + Object result = method.invoke(instance, input); + if (result instanceof Map) { + @SuppressWarnings("unchecked") + Map casted = (Map) result; + return casted; + } + } catch (ClassNotFoundException e) { + return defaultResponse("Function implementation not found"); + } catch (NoSuchMethodException e) { + return defaultResponse("Function.handler(Map) missing"); + } catch (InvocationTargetException | InstantiationException | IllegalAccessException e) { + return defaultResponse("Failed to invoke function: " + e.getMessage()); + } + return defaultResponse("Function returned unsupported type"); + } + + private static Map defaultResponse(String message) { + Map out = new HashMap<>(); + out.put("output", message); + return out; + } +} diff --git a/benchmarks/wrappers/aws/java/src/main/java/org/serverlessbench/Handler.java b/benchmarks/wrappers/aws/java/src/main/java/org/serverlessbench/Handler.java new file mode 100644 index 000000000..93eb41ba0 --- /dev/null +++ b/benchmarks/wrappers/aws/java/src/main/java/org/serverlessbench/Handler.java @@ -0,0 +1,65 @@ +package org.serverlessbench; + +import com.amazonaws.services.lambda.runtime.Context; +import com.amazonaws.services.lambda.runtime.RequestHandler; +import com.fasterxml.jackson.databind.ObjectMapper; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; + +public class Handler implements RequestHandler, Map> { + + private static final ObjectMapper MAPPER = new ObjectMapper(); + private static final Path MARKER = Path.of("/tmp/cold_run"); + + @Override + public Map handleRequest(Map event, Context context) { + long beginNs = System.nanoTime(); + Map normalized = normalize(event); + Map result = FunctionInvoker.invoke(normalized); + long endNs = System.nanoTime(); + + Map body = new HashMap<>(); + body.put("begin", beginNs / 1_000_000_000.0); + body.put("end", endNs / 1_000_000_000.0); + body.put("compute_time", (endNs - beginNs) / 1_000.0); + body.put("results_time", 0); + body.put("result", result); + body.put("is_cold", isCold()); + body.put("request_id", context != null ? context.getAwsRequestId() : ""); + + return body; + } + + private boolean isCold() { + if (Files.exists(MARKER)) { + return false; + } + try { + Files.createFile(MARKER); + } catch (IOException ignored) { + // best-effort marker write + } + return true; + } + + private Map normalize(Map event) { + if (event == null) { + return new HashMap<>(); + } + Object body = event.get("body"); + if (body instanceof String) { + try { + @SuppressWarnings("unchecked") + Map parsed = MAPPER.readValue((String) body, Map.class); + return parsed; + } catch (Exception ignored) { + // fall back to original event + } + } + return new HashMap<>(event); + } +} diff --git a/benchmarks/wrappers/aws/pypy/handler.py b/benchmarks/wrappers/aws/pypy/handler.py new file mode 100644 index 000000000..907b2c612 --- /dev/null +++ b/benchmarks/wrappers/aws/pypy/handler.py @@ -0,0 +1,71 @@ + +import datetime, io, json, os, sys, uuid + +# Add current directory to allow location of packages +sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) + +# TODO: usual trigger +# implement support for S3 and others +def handler(event, context): + + income_timestamp = datetime.datetime.now().timestamp() + + # HTTP trigger with API Gateaway + if 'body' in event: + event = json.loads(event['body']) + req_id = context.aws_request_id + event['request-id'] = req_id + event['income-timestamp'] = income_timestamp + begin = datetime.datetime.now() + from function import function + ret = function.handler(event) + end = datetime.datetime.now() + + log_data = { + 'output': ret['result'] + } + if 'measurement' in ret: + log_data['measurement'] = ret['measurement'] + if 'logs' in event: + log_data['time'] = (end - begin) / datetime.timedelta(microseconds=1) + results_begin = datetime.datetime.now() + from function import storage + storage_inst = storage.storage.get_instance() + b = event.get('logs').get('bucket') + storage_inst.upload_stream(b, '{}.json'.format(req_id), + io.BytesIO(json.dumps(log_data).encode('utf-8'))) + results_end = datetime.datetime.now() + results_time = (results_end - results_begin) / datetime.timedelta(microseconds=1) + else: + results_time = 0 + + # cold test + is_cold = False + fname = os.path.join('/tmp', 'cold_run') + if not os.path.exists(fname): + is_cold = True + container_id = str(uuid.uuid4())[0:8] + with open(fname, 'a') as f: + f.write(container_id) + else: + with open(fname, 'r') as f: + container_id = f.read() + + cold_start_var = "" + if "cold_start" in os.environ: + cold_start_var = os.environ["cold_start"] + + return { + 'statusCode': 200, + 'body': json.dumps({ + 'begin': begin.strftime('%s.%f'), + 'end': end.strftime('%s.%f'), + 'results_time': results_time, + 'is_cold': is_cold, + 'result': log_data, + 'request_id': context.aws_request_id, + 'cold_start_var': cold_start_var, + 'container_id': container_id, + }) + } + diff --git a/benchmarks/wrappers/aws/pypy/nosql.py b/benchmarks/wrappers/aws/pypy/nosql.py new file mode 100644 index 000000000..72bc2d9da --- /dev/null +++ b/benchmarks/wrappers/aws/pypy/nosql.py @@ -0,0 +1,121 @@ +from decimal import Decimal +from os import environ +from typing import List, Optional, Union, Tuple + +import boto3 + + +class nosql: + + instance: Optional["nosql"] = None + + def __init__(self): + self.client = boto3.resource("dynamodb") + self._tables = {} + + # Based on: https://github.com/boto/boto3/issues/369#issuecomment-157205696 + def _remove_decimals(self, data: dict) -> Union[dict, list, int, float]: + + if isinstance(data, list): + return [self._remove_decimals(x) for x in data] + elif isinstance(data, dict): + return {k: self._remove_decimals(v) for k, v in data.items()} + elif isinstance(data, Decimal): + if data.as_integer_ratio()[1] == 1: + return int(data) + else: + return float(data) + else: + return data + + def _get_table(self, table_name: str): + + if table_name not in self._tables: + + env_name = f"NOSQL_STORAGE_TABLE_{table_name}" + + if env_name in environ: + aws_name = environ[env_name] + self._tables[table_name] = self.client.Table(aws_name) + else: + raise RuntimeError( + f"Couldn't find an environment variable {env_name} for table {table_name}" + ) + + return self._tables[table_name] + + def insert( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + data: dict, + ): + for key in (primary_key, secondary_key): + data[key[0]] = key[1] + + self._get_table(table_name).put_item(Item=data) + + def get( + self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str] + ) -> dict: + + data = {} + for key in (primary_key, secondary_key): + data[key[0]] = key[1] + + res = self._get_table(table_name).get_item(Key=data) + return self._remove_decimals(res["Item"]) + + def update( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + updates: dict, + ): + + key_data = {} + for key in (primary_key, secondary_key): + key_data[key[0]] = key[1] + + update_expression = "SET " + update_values = {} + update_names = {} + + # We use attribute names because DynamoDB reserves some keywords, like 'status' + for key, value in updates.items(): + + update_expression += f" #{key}_name = :{key}_value, " + update_values[f":{key}_value"] = value + update_names[f"#{key}_name"] = key + + update_expression = update_expression[:-2] + + self._get_table(table_name).update_item( + Key=key_data, + UpdateExpression=update_expression, + ExpressionAttributeValues=update_values, + ExpressionAttributeNames=update_names, + ) + + def query(self, table_name: str, primary_key: Tuple[str, str], _: str) -> List[dict]: + + res = self._get_table(table_name).query( + KeyConditionExpression=f"{primary_key[0]} = :keyvalue", + ExpressionAttributeValues={":keyvalue": primary_key[1]}, + )["Items"] + return self._remove_decimals(res) + + def delete(self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str]): + data = {} + for key in (primary_key, secondary_key): + data[key[0]] = key[1] + + self._get_table(table_name).delete_item(Key=data) + + @staticmethod + def get_instance(): + if nosql.instance is None: + nosql.instance = nosql() + return nosql.instance diff --git a/benchmarks/wrappers/aws/pypy/setup.py b/benchmarks/wrappers/aws/pypy/setup.py new file mode 100644 index 000000000..b3d878351 --- /dev/null +++ b/benchmarks/wrappers/aws/pypy/setup.py @@ -0,0 +1,15 @@ +from distutils.core import setup +from glob import glob +from pkg_resources import parse_requirements + +with open('requirements.txt') as f: + requirements = [str(r) for r in parse_requirements(f)] + +setup( + name='function', + install_requires=requirements, + packages=['function'], + package_dir={'function': '.'}, + package_data={'function': glob('**', recursive=True)}, +) + diff --git a/benchmarks/wrappers/aws/pypy/storage.py b/benchmarks/wrappers/aws/pypy/storage.py new file mode 100644 index 000000000..4be0025e8 --- /dev/null +++ b/benchmarks/wrappers/aws/pypy/storage.py @@ -0,0 +1,53 @@ +import io +import os +import uuid + +import boto3 + + +class storage: + instance = None + client = None + + def __init__(self): + self.client = boto3.client('s3') + + @staticmethod + def unique_name(name): + name, extension = os.path.splitext(name) + return '{name}.{random}{extension}'.format( + name=name, + extension=extension, + random=str(uuid.uuid4()).split('-')[0] + ) + + def upload(self, bucket, file, filepath): + key_name = storage.unique_name(file) + self.client.upload_file(filepath, bucket, key_name) + return key_name + + def download(self, bucket, file, filepath): + self.client.download_file(bucket, file, filepath) + + def download_directory(self, bucket, prefix, path): + objects = self.client.list_objects_v2(Bucket=bucket, Prefix=prefix) + for obj in objects['Contents']: + file_name = obj['Key'] + path_to_file = os.path.dirname(file_name) + os.makedirs(os.path.join(path, path_to_file), exist_ok=True) + self.download(bucket, file_name, os.path.join(path, file_name)) + + def upload_stream(self, bucket, file, data): + key_name = storage.unique_name(file) + self.client.upload_fileobj(data, bucket, key_name) + return key_name + + def download_stream(self, bucket, file): + data = io.BytesIO() + self.client.download_fileobj(bucket, file, data) + return data.getbuffer() + + def get_instance(): + if storage.instance is None: + storage.instance = storage() + return storage.instance diff --git a/benchmarks/wrappers/aws/rust/Cargo.toml b/benchmarks/wrappers/aws/rust/Cargo.toml new file mode 100644 index 000000000..a8c5d7a9e --- /dev/null +++ b/benchmarks/wrappers/aws/rust/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "aws-lambda-wrapper" +version = "0.1.0" +edition = "2021" +rust-version = "1.88" + +[[bin]] +name = "bootstrap" +path = "src/main.rs" + +[dependencies] +lambda_http = "0.9" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +tokio = { version = "1", features = ["full"] } +# Pin AWS SDK versions to support Rust 1.87 +# Versions 1.1.x are the last to support Rust 1.87, 1.2+ requires Rust 1.88 +# Use version range that excludes 1.2+ (which requires Rust 1.88) +aws-config = ">=1.0, <1.2" +aws-sdk-s3 = ">=1.0, <1.2" +aws-sdk-dynamodb = ">=1.0, <1.2" +uuid = { version = "1.4", features = ["v4"] } + diff --git a/benchmarks/wrappers/aws/rust/src/main.rs b/benchmarks/wrappers/aws/rust/src/main.rs new file mode 100644 index 000000000..b9b9fd29f --- /dev/null +++ b/benchmarks/wrappers/aws/rust/src/main.rs @@ -0,0 +1,87 @@ +use lambda_http::{run, service_fn, Body, Error, Request, RequestExt, RequestPayloadExt, Response}; +use serde::Serialize; +use std::time::{SystemTime, UNIX_EPOCH}; + +mod function; +pub mod storage; +pub mod nosql; + +use function::{RequestPayload, FunctionResponse}; + +#[derive(Serialize)] +struct ResponsePayload { + result: serde_json::Value, + begin: f64, + end: f64, + is_cold: bool, + request_id: String, +} + +static mut IS_COLD: bool = true; + +async fn handler(event: Request) -> Result, Error> { + let begin = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs_f64(); + + let is_cold = unsafe { + let cold = IS_COLD; + IS_COLD = false; + cold + }; + + let request_id = event + .lambda_context_ref() + .map(|ctx| ctx.request_id.clone()) + .unwrap_or_else(|| "unknown".to_string()); + + // Parse Body + let payload: RequestPayload = match event.payload() { + Ok(Some(p)) => p, + Ok(None) => { + return Ok(Response::builder() + .status(400) + .body(Body::from("Missing request body")) + .unwrap()); + } + Err(e) => { + return Ok(Response::builder() + .status(400) + .body(Body::from(format!("Invalid JSON: {}", e))) + .unwrap()); + } + }; + + // Call the benchmark function (sync) + let function_result: FunctionResponse = function::handler(payload); + + let end = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs_f64(); + + // Convert result to Value + let result_value = serde_json::to_value(&function_result.result)?; + + let response_payload = ResponsePayload { + result: result_value, + begin, + end, + is_cold, + request_id, + }; + + let response_json = serde_json::to_string(&response_payload)?; + + Ok(Response::builder() + .status(200) + .header("Content-Type", "application/json") + .body(Body::from(response_json)) + .unwrap()) +} + +#[tokio::main] +async fn main() -> Result<(), Error> { + run(service_fn(handler)).await +} diff --git a/benchmarks/wrappers/aws/rust/src/nosql.rs b/benchmarks/wrappers/aws/rust/src/nosql.rs new file mode 100644 index 000000000..9cc3a64a8 --- /dev/null +++ b/benchmarks/wrappers/aws/rust/src/nosql.rs @@ -0,0 +1,88 @@ +use aws_sdk_dynamodb::{Client, types::AttributeValue}; +use std::collections::HashMap; +use std::env; + +pub struct NoSQL { + client: Client, + tables: std::sync::Mutex>, +} + +impl NoSQL { + pub async fn get_instance() -> Self { + let config = aws_config::load_defaults(aws_config::BehaviorVersion::latest()).await; + let client = Client::new(&config); + NoSQL { + client, + tables: std::sync::Mutex::new(HashMap::new()), + } + } + + fn get_table_name(&self, table_name: &str) -> Result { + let mut tables = self.tables.lock().unwrap(); + if let Some(name) = tables.get(table_name) { + return Ok(name.clone()); + } + + let env_name = format!("NOSQL_STORAGE_TABLE_{}", table_name); + match env::var(&env_name) { + Ok(aws_name) => { + tables.insert(table_name.to_string(), aws_name.clone()); + Ok(aws_name) + } + Err(_) => Err(format!("Couldn't find environment variable {} for table {}", env_name, table_name)), + } + } + + // Helper to convert HashMap to generic JSON/Map + // For simplicity in this wrapper, we accept/return HashMap + // or we could use serde_dynamo if added as dependency. + // Following the python "dict" approach, we'll try to use AttributeValue directly or simple conversion. + // For now, let's expose the raw AttributeValue or simple helpers. + + pub async fn insert( + &self, + table_name: &str, + primary_key: (&str, &str), + secondary_key: (&str, &str), + mut data: HashMap, + ) -> Result<(), Box> { + let aws_table_name = self.get_table_name(table_name)?; + + data.insert(primary_key.0.to_string(), AttributeValue::S(primary_key.1.to_string())); + data.insert(secondary_key.0.to_string(), AttributeValue::S(secondary_key.1.to_string())); + + self.client + .put_item() + .table_name(aws_table_name) + .set_item(Some(data)) + .send() + .await?; + + Ok(()) + } + + pub async fn get( + &self, + table_name: &str, + primary_key: (&str, &str), + secondary_key: (&str, &str), + ) -> Result, Box> { + let aws_table_name = self.get_table_name(table_name)?; + + let mut key = HashMap::new(); + key.insert(primary_key.0.to_string(), AttributeValue::S(primary_key.1.to_string())); + key.insert(secondary_key.0.to_string(), AttributeValue::S(secondary_key.1.to_string())); + + let resp = self.client + .get_item() + .table_name(aws_table_name) + .set_key(Some(key)) + .send() + .await?; + + Ok(resp.item.unwrap_or_default()) + } + + // Minimal implementation matching the python basics. + // update/query/delete can be added similarly. +} diff --git a/benchmarks/wrappers/aws/rust/src/storage.rs b/benchmarks/wrappers/aws/rust/src/storage.rs new file mode 100644 index 000000000..904bb531e --- /dev/null +++ b/benchmarks/wrappers/aws/rust/src/storage.rs @@ -0,0 +1,84 @@ +use aws_sdk_s3::Client; +use aws_sdk_s3::primitives::ByteStream; +use std::path::Path; +use uuid::Uuid; +use std::fs; +use std::io::Write; + +pub struct Storage { + client: Client, +} + +impl Storage { + pub async fn get_instance() -> Self { + let config = aws_config::load_defaults(aws_config::BehaviorVersion::latest()).await; + let client = Client::new(&config); + Storage { client } + } + + fn unique_name(name: &str) -> String { + let path = Path::new(name); + let stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or(name); + let ext = path.extension().and_then(|s| s.to_str()).map(|e| format!(".{}", e)).unwrap_or_default(); + let uuid = Uuid::new_v4().to_string(); + let uuid_short = uuid.split('-').next().unwrap_or(&uuid); + format!("{}.{}{}", stem, uuid_short, ext) + } + + pub async fn upload(&self, bucket: &str, file: &str, filepath: &str) -> Result> { + let key_name = Self::unique_name(file); + let body = ByteStream::from_path(Path::new(filepath)).await?; + + self.client + .put_object() + .bucket(bucket) + .key(&key_name) + .body(body) + .send() + .await?; + + Ok(key_name) + } + + pub async fn download(&self, bucket: &str, file: &str, filepath: &str) -> Result<(), Box> { + let resp = self.client + .get_object() + .bucket(bucket) + .key(file) + .send() + .await?; + + let data = resp.body.collect().await?; + let mut file = fs::File::create(filepath)?; + file.write_all(&data.into_bytes())?; + + Ok(()) + } + + pub async fn upload_stream(&self, bucket: &str, file: &str, data: Vec) -> Result> { + let key_name = Self::unique_name(file); + let body = ByteStream::from(data); + + self.client + .put_object() + .bucket(bucket) + .key(&key_name) + .body(body) + .send() + .await?; + + Ok(key_name) + } + + pub async fn download_stream(&self, bucket: &str, file: &str) -> Result, Box> { + let resp = self.client + .get_object() + .bucket(bucket) + .key(file) + .send() + .await?; + + let data = resp.body.collect().await?; + Ok(data.into_bytes().to_vec()) + } +} diff --git a/benchmarks/wrappers/azure/java/pom.xml b/benchmarks/wrappers/azure/java/pom.xml new file mode 100644 index 000000000..195df6160 --- /dev/null +++ b/benchmarks/wrappers/azure/java/pom.xml @@ -0,0 +1,71 @@ + + 4.0.0 + org.serverlessbench + function + 1.0.0 + + 17 + 17 + + + + com.microsoft.azure.functions + azure-functions-java-library + 3.0.0 + + + com.fasterxml.jackson.core + jackson-databind + 2.17.1 + + + + function + + + com.microsoft.azure + azure-functions-maven-plugin + 1.31.0 + + unused + unused + westeurope + + linux + 17 + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 3.5.1 + + + package + + shade + + + false + + + *:* + + META-INF/*.SF + META-INF/*.RSA + META-INF/*.DSA + + + + + + + + + + diff --git a/benchmarks/wrappers/azure/java/src/main/java/org/serverlessbench/ColdStartTracker.java b/benchmarks/wrappers/azure/java/src/main/java/org/serverlessbench/ColdStartTracker.java new file mode 100644 index 000000000..fbedaa208 --- /dev/null +++ b/benchmarks/wrappers/azure/java/src/main/java/org/serverlessbench/ColdStartTracker.java @@ -0,0 +1,33 @@ +package org.serverlessbench; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.UUID; +import java.util.concurrent.atomic.AtomicBoolean; + +final class ColdStartTracker { + + private static final AtomicBoolean WORKER_COLD = new AtomicBoolean(true); + private static final Path MARKER = Path.of("/tmp/cold_run"); + + private ColdStartTracker() {} + + static boolean isCold() { + if (Files.exists(MARKER)) { + return false; + } + try { + Files.writeString( + MARKER, UUID.randomUUID().toString().substring(0, 8), StandardCharsets.UTF_8); + } catch (IOException ignored) { + // best-effort marker write + } + return true; + } + + static boolean isWorkerCold() { + return WORKER_COLD.getAndSet(false); + } +} diff --git a/benchmarks/wrappers/azure/java/src/main/java/org/serverlessbench/FunctionInvoker.java b/benchmarks/wrappers/azure/java/src/main/java/org/serverlessbench/FunctionInvoker.java new file mode 100644 index 000000000..7d9c83570 --- /dev/null +++ b/benchmarks/wrappers/azure/java/src/main/java/org/serverlessbench/FunctionInvoker.java @@ -0,0 +1,41 @@ +package org.serverlessbench; + +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.HashMap; +import java.util.Map; + +final class FunctionInvoker { + + private static final String DEFAULT_CLASS = "function.Function"; + private static final String DEFAULT_METHOD = "handler"; + + private FunctionInvoker() {} + + static Map invoke(Map input) { + try { + Class fnClass = Class.forName(DEFAULT_CLASS); + Object instance = fnClass.getDeclaredConstructor().newInstance(); + Method method = fnClass.getMethod(DEFAULT_METHOD, Map.class); + Object result = method.invoke(instance, input); + if (result instanceof Map) { + @SuppressWarnings("unchecked") + Map casted = (Map) result; + return casted; + } + } catch (ClassNotFoundException e) { + return defaultResponse("Function implementation not found"); + } catch (NoSuchMethodException e) { + return defaultResponse("Function.handler(Map) missing"); + } catch (InvocationTargetException | InstantiationException | IllegalAccessException e) { + return defaultResponse("Failed to invoke function: " + e.getMessage()); + } + return defaultResponse("Function returned unsupported type"); + } + + private static Map defaultResponse(String message) { + Map out = new HashMap<>(); + out.put("output", message); + return out; + } +} diff --git a/benchmarks/wrappers/azure/java/src/main/java/org/serverlessbench/Handler.java b/benchmarks/wrappers/azure/java/src/main/java/org/serverlessbench/Handler.java new file mode 100644 index 000000000..8f9b3e1cf --- /dev/null +++ b/benchmarks/wrappers/azure/java/src/main/java/org/serverlessbench/Handler.java @@ -0,0 +1,79 @@ +package org.serverlessbench; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.microsoft.azure.functions.*; +import com.microsoft.azure.functions.annotation.AuthorizationLevel; +import com.microsoft.azure.functions.annotation.FunctionName; +import com.microsoft.azure.functions.annotation.HttpTrigger; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; + +public class Handler { + + private static final ObjectMapper MAPPER = new ObjectMapper(); + + @FunctionName("handler") + public HttpResponseMessage handleRequest( + @HttpTrigger( + name = "req", + methods = {HttpMethod.GET, HttpMethod.POST}, + authLevel = AuthorizationLevel.ANONYMOUS) + final HttpRequestMessage> request, + final ExecutionContext context) { + + long beginNs = System.nanoTime(); + Map normalized = normalizeRequest(request); + Map result = FunctionInvoker.invoke(normalized); + long endNs = System.nanoTime(); + + Map body = new HashMap<>(); + body.put("begin", beginNs / 1_000_000_000.0); + body.put("end", endNs / 1_000_000_000.0); + body.put("compute_time", (endNs - beginNs) / 1_000.0); + body.put("results_time", 0); + body.put("result", result); + body.put("is_cold", ColdStartTracker.isCold()); + body.put("is_cold_worker", ColdStartTracker.isWorkerCold()); + body.put("request_id", context != null ? context.getInvocationId() : ""); + + String coldStartVar = System.getenv("cold_start"); + if (coldStartVar != null) { + body.put("cold_start_var", coldStartVar); + } + + String json = toJson(body); + return request + .createResponseBuilder(HttpStatus.OK) + .header("Content-Type", "application/json") + .body(json) + .build(); + } + + private Map normalizeRequest(HttpRequestMessage> request) { + if (request == null) { + return new HashMap<>(); + } + Optional body = request.getBody(); + if (body.isPresent()) { + try { + @SuppressWarnings("unchecked") + Map parsed = MAPPER.readValue(body.get(), Map.class); + return parsed; + } catch (IOException ignored) { + // ignore and continue + } + } + return new HashMap<>(request.getQueryParameters()); + } + + private String toJson(Map payload) { + try { + return MAPPER.writeValueAsString(payload); + } catch (IOException e) { + return "{}"; + } + } +} diff --git a/benchmarks/wrappers/azure/pypy/handler.py b/benchmarks/wrappers/azure/pypy/handler.py new file mode 100644 index 000000000..69703f668 --- /dev/null +++ b/benchmarks/wrappers/azure/pypy/handler.py @@ -0,0 +1,355 @@ +import sys +import os +import json +import logging +from http.server import BaseHTTPRequestHandler, HTTPServer +import datetime +import uuid +import io + +# Add current directory and handler directory to path to find function modules +# Similar to AWS handler which uses: sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) +current_dir = os.getcwd() +sys.path.append(current_dir) +handler_path = os.path.join(current_dir, 'handler') +if os.path.exists(handler_path): + sys.path.insert(0, handler_path) +# Also add .python_packages like AWS does +python_packages_path = os.path.join(current_dir, '.python_packages', 'lib', 'site-packages') +if os.path.exists(python_packages_path): + sys.path.append(python_packages_path) + +# Initialize logging +logging.basicConfig(level=logging.INFO) + +# Initialize Storage/NoSQL if needed (Environment variables) +# Wrap in try-except to prevent crashes during initialization +try: + if 'NOSQL_STORAGE_DATABASE' in os.environ: + import nosql + nosql.nosql.get_instance( + os.environ['NOSQL_STORAGE_DATABASE'], + os.environ['NOSQL_STORAGE_URL'], + os.environ['NOSQL_STORAGE_CREDS'] + ) +except Exception as e: + logging.warning(f"Failed to initialize NoSQL: {e}") + +try: + if 'STORAGE_CONNECTION_STRING' in os.environ: + import storage + # Initialize storage instance + storage.storage.get_instance(os.environ['STORAGE_CONNECTION_STRING']) +except Exception as e: + logging.warning(f"Failed to initialize storage: {e}") + +class Handler(BaseHTTPRequestHandler): + def log_message(self, format, *args): + # Suppress default logging, we'll handle it ourselves + pass + + def handle_one_request(self): + """Override to ensure we always send a response""" + # Initialize response tracking before processing + self._response_sent = False + try: + super().handle_one_request() + except Exception as e: + logging.error(f"Unhandled exception in handle_one_request: {e}", exc_info=True) + # Try to send error response if not already sent + if not self._response_sent: + try: + error_body = json.dumps({'error': f'Unhandled exception: {str(e)}'}).encode('utf-8') + self.send_response(500) + self.send_header('Content-type', 'application/json') + self.send_header('Content-Length', str(len(error_body))) + self.end_headers() + self.wfile.write(error_body) + self.wfile.flush() + self._response_sent = True + except Exception as e2: + logging.error(f"Failed to send error response in handle_one_request: {e2}", exc_info=True) + # Last resort: try to write directly + try: + if not self._response_sent: + self.wfile.write(b'{"error":"Internal server error"}') + self.wfile.flush() + self._response_sent = True + except: + pass + + def send_json_response(self, status_code, data): + """Send a JSON response""" + # Ensure response tracking is initialized + if not hasattr(self, '_response_sent'): + self._response_sent = False + + # Don't send if already sent + if self._response_sent: + logging.warning("Attempted to send response when already sent") + return + + try: + response_body = json.dumps(data).encode('utf-8') + self.send_response(status_code) + self.send_header('Content-type', 'application/json') + self.send_header('Content-Length', str(len(response_body))) + self.end_headers() + self.wfile.write(response_body) + self.wfile.flush() + self._response_sent = True + except (BrokenPipeError, ConnectionResetError) as e: + # Client disconnected, can't send response + logging.warning(f"Client disconnected during response: {e}") + self._response_sent = True + except Exception as e: + logging.error(f"Error in send_json_response: {e}", exc_info=True) + # Try to send error response - check if headers were sent + try: + # Check if headers were already sent + if hasattr(self, '_headers_buffer') and self._headers_buffer: + # Headers were sent, try to write error to body + error_msg = json.dumps({'error': f'Error sending response: {str(e)}'}).encode('utf-8') + self.wfile.write(error_msg) + self.wfile.flush() + else: + # Headers not sent yet, send full error response + error_msg = json.dumps({'error': f'Error sending response: {str(e)}'}).encode('utf-8') + self.send_response(500) + self.send_header('Content-type', 'application/json') + self.send_header('Content-Length', str(len(error_msg))) + self.end_headers() + self.wfile.write(error_msg) + self.wfile.flush() + self._response_sent = True + except Exception as e2: + # Can't write to response, mark as sent to prevent double error handling + logging.error(f"Failed to send error response: {e2}", exc_info=True) + # Last resort: try minimal response + try: + if not self._response_sent: + self.wfile.write(b'{"error":"Internal server error"}') + self.wfile.flush() + self._response_sent = True + except: + self._response_sent = True + + def do_GET(self): + # Handle health checks and GET requests + self.send_json_response(200, {'status': 'ok'}) + + def do_POST(self): + # Initialize response tracking before processing + if not hasattr(self, '_response_sent'): + self._response_sent = False + # Wrap entire method to ensure we always send a response + try: + self._do_POST() + except Exception as e: + logging.error(f"Critical error in do_POST: {e}", exc_info=True) + # Last resort - try to send error response + if not self._response_sent: + try: + error_body = json.dumps({'error': f'Critical error: {str(e)}'}).encode('utf-8') + self.send_response(500) + self.send_header('Content-type', 'application/json') + self.send_header('Content-Length', str(len(error_body))) + self.end_headers() + self.wfile.write(error_body) + self.wfile.flush() + self._response_sent = True + except Exception as e2: + logging.error(f"Failed to send critical error response: {e2}", exc_info=True) + # Last resort: try to write minimal response + try: + if not self._response_sent: + self.wfile.write(b'{"error":"Internal server error"}') + self.wfile.flush() + self._response_sent = True + except: + pass + + def _do_POST(self): + # Initialize response tracking + self._response_sent = False + req_json = None + invocation_id = None + begin = None + + try: + logging.info(f"Received POST request to {self.path}") + content_length = int(self.headers.get('Content-Length', 0)) + if content_length > 0: + post_data = self.rfile.read(content_length) + else: + post_data = b'{}' + + try: + req_json = json.loads(post_data.decode('utf-8')) + except json.JSONDecodeError as e: + logging.error(f"JSON decode error: {e}, data: {post_data}") + self.send_json_response(400, {'error': f'Invalid JSON: {str(e)}'}) + return + + invocation_id = self.headers.get('X-Azure-Functions-InvocationId', str(uuid.uuid4())) + + # Update request with ID (consistent with python wrapper) + if isinstance(req_json, dict): + req_json['request-id'] = invocation_id + req_json['income-timestamp'] = datetime.datetime.now().timestamp() + + begin = datetime.datetime.now() + + # Import user function + # In Azure, function.py is in the handler directory + try: + import function + except ImportError as e: + logging.error(f"Failed to import function: {e}") + logging.error(f"sys.path: {sys.path}") + logging.error(f"Current directory: {os.getcwd()}") + logging.error(f"Handler path exists: {os.path.exists(os.path.join(os.getcwd(), 'handler'))}") + # List files in handler directory for debugging + handler_dir = os.path.join(os.getcwd(), 'handler') + if os.path.exists(handler_dir): + try: + files = os.listdir(handler_dir) + logging.error(f"Files in handler directory: {files}") + except Exception as list_err: + logging.error(f"Failed to list handler directory: {list_err}") + self.send_json_response(500, {'error': f'Failed to import function: {str(e)}'}) + return + + try: + # Call the user function + ret = function.handler(req_json) + except Exception as e: + logging.error(f"Function handler error: {e}", exc_info=True) + self.send_json_response(500, {'error': str(e)}) + return + except Exception as e: + logging.error(f"Unexpected error in _do_POST: {e}", exc_info=True) + if not self._response_sent: + self.send_json_response(500, {'error': str(e)}) + return + + # Process response - wrap in try-except to ensure we always send a response + try: + end = datetime.datetime.now() + + # Logging and storage upload + # Handle case where ret might be None or not a dict + if ret is None: + logging.error("Function handler returned None") + self.send_json_response(500, {'error': 'Function handler returned None'}) + return + + if not isinstance(ret, dict): + logging.warning(f"Function handler returned non-dict: {type(ret)}, value: {ret}") + ret = {'result': ret} + + log_data = { + 'output': ret.get('result', ret) if isinstance(ret, dict) else ret + } + if isinstance(ret, dict) and 'measurement' in ret: + log_data['measurement'] = ret['measurement'] + + if req_json is not None and isinstance(req_json, dict) and 'logs' in req_json: + log_data['time'] = (end - begin) / datetime.timedelta(microseconds=1) + results_begin = datetime.datetime.now() + try: + import storage + storage_inst = storage.storage.get_instance() + b = req_json.get('logs').get('bucket') + req_id = invocation_id + + storage_inst.upload_stream(b, '{}.json'.format(req_id), + io.BytesIO(json.dumps(log_data).encode('utf-8'))) + + results_end = datetime.datetime.now() + results_time = (results_end - results_begin) / datetime.timedelta(microseconds=1) + except Exception as e: + logging.warning(f"Failed to upload logs to storage: {e}") + results_time = 0 + else: + results_time = 0 + + # Cold start detection + is_cold = False + container_id = '' + try: + fname = os.path.join('/tmp','cold_run') + if not os.path.exists(fname): + is_cold = True + container_id = str(uuid.uuid4())[0:8] + with open(fname, 'a') as f: + f.write(container_id) + else: + with open(fname, 'r') as f: + container_id = f.read() + except Exception as e: + logging.warning(f"Failed to read/write cold_run file: {e}") + container_id = str(uuid.uuid4())[0:8] + + response_data = { + 'begin': begin.strftime('%s.%f'), + 'end': end.strftime('%s.%f'), + 'results_time': results_time, + 'result': log_data, + 'is_cold': is_cold, + 'container_id': container_id, + 'environ_container_id': os.environ.get('CONTAINER_NAME', ''), + 'request_id': invocation_id + } + + # Send response + self.send_json_response(200, response_data) + except Exception as e: + logging.error(f"Error processing response: {e}", exc_info=True) + # Try to send error response + try: + self.send_json_response(500, {'error': f'Error processing response: {str(e)}'}) + except Exception as send_error: + logging.error(f"Failed to send error response: {send_error}", exc_info=True) + # Last resort - try to send minimal response + try: + self.send_response(500) + self.end_headers() + self.wfile.write(json.dumps({'error': 'Internal server error'}).encode('utf-8')) + self.wfile.flush() + except: + pass + +def run(server_class=HTTPServer, handler_class=Handler): + try: + # Azure sets FUNCTIONS_CUSTOMHANDLER_PORT + port = int(os.environ.get('FUNCTIONS_CUSTOMHANDLER_PORT', 8080)) + server_address = ('', port) + httpd = server_class(server_address, handler_class) + logging.info(f"Starting httpd on port {port}...") + logging.info(f"Current directory: {os.getcwd()}") + logging.info(f"Handler path: {os.path.join(os.getcwd(), 'handler')}") + logging.info(f"Handler path exists: {os.path.exists(os.path.join(os.getcwd(), 'handler'))}") + logging.info(f"sys.path: {sys.path}") + # List files in current directory for debugging + try: + files = os.listdir(os.getcwd()) + logging.info(f"Files in current directory: {files}") + except Exception as e: + logging.warning(f"Failed to list current directory: {e}") + httpd.serve_forever() + except Exception as e: + logging.error(f"Failed to start server: {e}", exc_info=True) + # Don't raise - try to log and exit gracefully + sys.exit(1) + +if __name__ == "__main__": + try: + run() + except KeyboardInterrupt: + logging.info("Server interrupted by user") + sys.exit(0) + except Exception as e: + logging.error(f"Fatal error: {e}", exc_info=True) + sys.exit(1) + diff --git a/benchmarks/wrappers/azure/pypy/nosql.py b/benchmarks/wrappers/azure/pypy/nosql.py new file mode 100644 index 000000000..f7dd94851 --- /dev/null +++ b/benchmarks/wrappers/azure/pypy/nosql.py @@ -0,0 +1,94 @@ +from typing import Dict, List, Optional, Tuple + +from azure.cosmos import CosmosClient, ContainerProxy + + +class nosql: + instance = None + client = None + + def __init__(self, url: str, credential: str, database: str): + self._client = CosmosClient(url=url, credential=credential) + self._db_client = self._client.get_database_client(database) + self._containers: Dict[str, ContainerProxy] = {} + + def _get_table(self, table_name: str): + + if table_name not in self._containers: + self._containers[table_name] = self._db_client.get_container_client(table_name) + + return self._containers[table_name] + + def insert( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + data: dict, + ): + + data[primary_key[0]] = primary_key[1] + # secondary key must have that name in CosmosDB + data["id"] = secondary_key[1] + + self._get_table(table_name).upsert_item(data) + + def get( + self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str] + ) -> dict: + res = self._get_table(table_name).read_item( + item=secondary_key[1], partition_key=primary_key[1] + ) + res[secondary_key[0]] = secondary_key[1] + + return res + + def update( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + updates: dict, + ): + + ops = [] + for key, value in updates.items(): + ops.append({"op": "add", "path": f"/{key}", "value": value}) + + self._get_table(table_name).patch_item( + item=secondary_key[1], partition_key=primary_key[1], patch_operations=ops + ) + + """ + This query must involve partition key - it does not scan across partitions. + """ + + def query( + self, table_name: str, primary_key: Tuple[str, str], secondary_key_name: str + ) -> List[dict]: + + res = list( + self._get_table(table_name).query_items( + f"SELECT * FROM c WHERE c.{primary_key[0]} = '{primary_key[1]}'", + enable_cross_partition_query=False, + ) + ) + + # Emulate the kind key + for item in res: + item[secondary_key_name] = item["id"] + + return res + + def delete(self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str]): + + self._get_table(table_name).delete_item(item=secondary_key[1], partition_key=primary_key[1]) + + @staticmethod + def get_instance( + database: Optional[str] = None, url: Optional[str] = None, credential: Optional[str] = None + ): + if nosql.instance is None: + assert database is not None and url is not None and credential is not None + nosql.instance = nosql(url, credential, database) + return nosql.instance diff --git a/benchmarks/wrappers/azure/pypy/storage.py b/benchmarks/wrappers/azure/pypy/storage.py new file mode 100644 index 000000000..42b129c89 --- /dev/null +++ b/benchmarks/wrappers/azure/pypy/storage.py @@ -0,0 +1,59 @@ + +import os +import uuid +from typing import Optional + +from azure.storage.blob import BlobServiceClient + +class storage: + instance = None + client = None + + def __init__(self, connection_string: str): + self.client = BlobServiceClient.from_connection_string(connection_string) + + @staticmethod + def unique_name(name): + name, extension = os.path.splitext(name) + return '{name}.{random}{extension}'.format( + name=name, + extension=extension, + random=str(uuid.uuid4()).split('-')[0] + ) + + def upload(self, container, file, filepath): + with open(filepath, 'rb') as data: + return self.upload_stream(container, file, data) + + def download(self, container, file, filepath): + with open(filepath, 'wb') as download_file: + download_file.write( self.download_stream(container, file) ) + + def download_directory(self, container, prefix, path): + client = self.client.get_container_client(container=container) + objects = client.list_blobs(name_starts_with=prefix) + for obj in objects: + file_name = obj.name + path_to_file = os.path.dirname(file_name) + os.makedirs(os.path.join(path, path_to_file), exist_ok=True) + self.download(container, file_name, os.path.join(path, file_name)) + + def upload_stream(self, container, file, data): + key_name = storage.unique_name(file) + client = self.client.get_blob_client( + container=container, + blob=key_name + ) + client.upload_blob(data) + return key_name + + def download_stream(self, container, file): + client = self.client.get_blob_client(container=container, blob=file) + return client.download_blob().readall() + + @staticmethod + def get_instance(connection_string: Optional[str] = None): + if storage.instance is None: + assert connection_string is not None + storage.instance = storage(connection_string) + return storage.instance diff --git a/benchmarks/wrappers/gcp/pypy/handler.py b/benchmarks/wrappers/gcp/pypy/handler.py new file mode 100644 index 000000000..9b6989611 --- /dev/null +++ b/benchmarks/wrappers/gcp/pypy/handler.py @@ -0,0 +1,72 @@ +import datetime, io, json, os, uuid, sys + +sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) + +# This variable is defined by SeBS during function creation. +if 'NOSQL_STORAGE_DATABASE' in os.environ: + from function import nosql + + nosql.nosql.get_instance( + os.environ['NOSQL_STORAGE_DATABASE'] + ) + + +def handler(req): + income_timestamp = datetime.datetime.now().timestamp() + req_id = req.headers.get('Function-Execution-Id') + + + req_json = req.get_json() + req_json['request-id'] = req_id + req_json['income-timestamp'] = income_timestamp + begin = datetime.datetime.now() + # We are deployed in the same directorygit status + from function import function + ret = function.handler(req_json) + end = datetime.datetime.now() + + + log_data = { + 'output': ret['result'] + } + if 'measurement' in ret: + log_data['measurement'] = ret['measurement'] + if 'logs' in req_json: + log_data['time'] = (end - begin) / datetime.timedelta(microseconds=1) + results_begin = datetime.datetime.now() + from function import storage + storage_inst = storage.storage.get_instance() + b = req_json.get('logs').get('bucket') + storage_inst.upload_stream(b, '{}.json'.format(req_id), + io.BytesIO(json.dumps(log_data).encode('utf-8'))) + results_end = datetime.datetime.now() + results_time = (results_end - results_begin) / datetime.timedelta(microseconds=1) + else: + results_time = 0 + + # cold test + is_cold = False + fname = os.path.join('/tmp', 'cold_run') + if not os.path.exists(fname): + is_cold = True + container_id = str(uuid.uuid4())[0:8] + with open(fname, 'a') as f: + f.write(container_id) + else: + with open(fname, 'r') as f: + container_id = f.read() + + cold_start_var = "" + if "cold_start" in os.environ: + cold_start_var = os.environ["cold_start"] + + return json.dumps({ + 'begin': begin.strftime('%s.%f'), + 'end': end.strftime('%s.%f'), + 'results_time': results_time, + 'is_cold': is_cold, + 'result': log_data, + 'request_id': req_id, + 'cold_start_var': cold_start_var, + 'container_id': container_id, + }), 200, {'ContentType': 'application/json'} diff --git a/benchmarks/wrappers/gcp/pypy/nosql.py b/benchmarks/wrappers/gcp/pypy/nosql.py new file mode 100644 index 000000000..408712857 --- /dev/null +++ b/benchmarks/wrappers/gcp/pypy/nosql.py @@ -0,0 +1,131 @@ +from typing import List, Optional, Tuple + +from google.cloud import datastore + + +class nosql: + + instance: Optional["nosql"] = None + + """ + Each benchmark supports up to two keys - one for grouping items, + and for unique identification of each item. + + In Google Cloud Datastore, we determine different tables by using + its value for `kind` name. + + The primary key is assigned to the `kind` value. + + To implement sorting semantics, we use the ancestor relation: + the sorting key is used as the parent. + It is the assumption that all related items will have the same parent. + """ + + def __init__(self, database: str): + self._client = datastore.Client(database=database) + + def insert( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + data: dict, + ): + + parent_key = self._client.key(primary_key[0], primary_key[1]) + key = self._client.key( + # kind determines the table + table_name, + # main ID key + secondary_key[1], + # organization key + parent=parent_key, + ) + + val = datastore.Entity(key=key) + val.update(data) + self._client.put(val) + + def update( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + data: dict, + ): + # There is no direct update - we have to fetch the entire entity and manually change fields. + parent_key = self._client.key(primary_key[0], primary_key[1]) + key = self._client.key( + # kind determines the table + table_name, + # main ID key + secondary_key[1], + # organization key + parent=parent_key, + ) + res = self._client.get(key) + if res is None: + res = datastore.Entity(key=key) + res.update(data) + self._client.put(res) + + def get( + self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str] + ) -> Optional[dict]: + + parent_key = self._client.key(primary_key[0], primary_key[1]) + key = self._client.key( + # kind determines the table + table_name, + # main ID key + secondary_key[1], + # organization key + parent=parent_key, + ) + + res = self._client.get(key) + if res is None: + return None + + # Emulate the kind key + res[secondary_key[0]] = secondary_key[1] + + return res + + """ + This query must involve partition key - it does not scan across partitions. + """ + + def query( + self, table_name: str, primary_key: Tuple[str, str], secondary_key_name: str + ) -> List[dict]: + + ancestor = self._client.key(primary_key[0], primary_key[1]) + query = self._client.query(kind=table_name, ancestor=ancestor) + res = list(query.fetch()) + + # Emulate the kind key + for item in res: + item[secondary_key_name] = item.key.name + + return res + + def delete(self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str]): + parent_key = self._client.key(primary_key[0], primary_key[1]) + key = self._client.key( + # kind determines the table + table_name, + # main ID key + secondary_key[1], + # organization key + parent=parent_key, + ) + + return self._client.delete(key) + + @staticmethod + def get_instance(database: Optional[str] = None): + if nosql.instance is None: + assert database is not None + nosql.instance = nosql(database) + return nosql.instance diff --git a/benchmarks/wrappers/gcp/pypy/setup.py b/benchmarks/wrappers/gcp/pypy/setup.py new file mode 100644 index 000000000..b3d878351 --- /dev/null +++ b/benchmarks/wrappers/gcp/pypy/setup.py @@ -0,0 +1,15 @@ +from distutils.core import setup +from glob import glob +from pkg_resources import parse_requirements + +with open('requirements.txt') as f: + requirements = [str(r) for r in parse_requirements(f)] + +setup( + name='function', + install_requires=requirements, + packages=['function'], + package_dir={'function': '.'}, + package_data={'function': glob('**', recursive=True)}, +) + diff --git a/benchmarks/wrappers/gcp/pypy/storage.py b/benchmarks/wrappers/gcp/pypy/storage.py new file mode 100644 index 000000000..81163cb34 --- /dev/null +++ b/benchmarks/wrappers/gcp/pypy/storage.py @@ -0,0 +1,61 @@ +import io +import os +import uuid + +from google.cloud import storage as gcp_storage + + +class storage: + instance = None + client = None + + def __init__(self): + self.client = gcp_storage.Client() + + @staticmethod + def unique_name(name): + name, extension = os.path.splitext(name) + return '{name}.{random}{extension}'.format( + name=name, + extension=extension, + random=str(uuid.uuid4()).split('-')[0] + ) + + def upload(self, bucket, file, filepath): + key_name = storage.unique_name(file) + bucket_instance = self.client.bucket(bucket) + blob = bucket_instance.blob(key_name) + blob.upload_from_filename(filepath) + return key_name + + def download(self, bucket, file, filepath): + bucket_instance = self.client.bucket(bucket) + blob = bucket_instance.blob(file) + blob.download_to_filename(filepath) + + def download_directory(self, bucket, prefix, path): + objects = self.client.bucket(bucket).list_blobs(prefix=prefix) + for obj in objects: + file_name = obj.name + path_to_file = os.path.dirname(file_name) + os.makedirs(os.path.join(path, path_to_file), exist_ok=True) + self.download(bucket, file_name, os.path.join(path, file_name)) + + def upload_stream(self, bucket, file, data): + key_name = storage.unique_name(file) + bucket_instance = self.client.bucket(bucket) + blob = bucket_instance.blob(key_name) + blob.upload_from_file(data) + return key_name + + def download_stream(self, bucket, file): + data = io.BytesIO() + bucket_instance = self.client.bucket(bucket) + blob = bucket_instance.blob(file) + blob.download_to_file(data) + return data.getbuffer() + + def get_instance(): + if storage.instance is None: + storage.instance = storage() + return storage.instance diff --git a/benchmarks/wrappers/gcp/python/setup.py b/benchmarks/wrappers/gcp/python/setup.py new file mode 100644 index 000000000..b3d878351 --- /dev/null +++ b/benchmarks/wrappers/gcp/python/setup.py @@ -0,0 +1,15 @@ +from distutils.core import setup +from glob import glob +from pkg_resources import parse_requirements + +with open('requirements.txt') as f: + requirements = [str(r) for r in parse_requirements(f)] + +setup( + name='function', + install_requires=requirements, + packages=['function'], + package_dir={'function': '.'}, + package_data={'function': glob('**', recursive=True)}, +) + diff --git a/benchmarks/wrappers/local/pypy/nosql.py b/benchmarks/wrappers/local/pypy/nosql.py new file mode 100644 index 000000000..0e816954c --- /dev/null +++ b/benchmarks/wrappers/local/pypy/nosql.py @@ -0,0 +1,131 @@ +from decimal import Decimal +from os import environ +from typing import List, Optional, Union, Tuple + +import boto3 + + +class nosql: + + instance: Optional["nosql"] = None + + def __init__(self): + + if environ["NOSQL_STORAGE_TYPE"] != "scylladb": + raise RuntimeError(f"Unsupported NoSQL storage type: {environ['NOSQL_STORAGE_TYPE']}!") + + self.client = boto3.resource( + "dynamodb", + region_name="None", + aws_access_key_id="None", + aws_secret_access_key="None", + endpoint_url=f"http://{environ['NOSQL_STORAGE_ENDPOINT']}", + ) + self._tables = {} + + # Based on: https://github.com/boto/boto3/issues/369#issuecomment-157205696 + def _remove_decimals(self, data: dict) -> Union[dict, list, int, float]: + + if isinstance(data, list): + return [self._remove_decimals(x) for x in data] + elif isinstance(data, dict): + return {k: self._remove_decimals(v) for k, v in data.items()} + elif isinstance(data, Decimal): + if data.as_integer_ratio()[1] == 1: + return int(data) + else: + return float(data) + else: + return data + + def _get_table(self, table_name: str): + + if table_name not in self._tables: + + env_name = f"NOSQL_STORAGE_TABLE_{table_name}" + + if env_name in environ: + aws_name = environ[env_name] + self._tables[table_name] = self.client.Table(aws_name) + else: + raise RuntimeError( + f"Couldn't find an environment variable {env_name} for table {table_name}" + ) + + return self._tables[table_name] + + def insert( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + data: dict, + ): + for key in (primary_key, secondary_key): + data[key[0]] = key[1] + + self._get_table(table_name).put_item(Item=data) + + def get( + self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str] + ) -> dict: + + data = {} + for key in (primary_key, secondary_key): + data[key[0]] = key[1] + + res = self._get_table(table_name).get_item(Key=data) + return self._remove_decimals(res["Item"]) + + def update( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + updates: dict, + ): + + key_data = {} + for key in (primary_key, secondary_key): + key_data[key[0]] = key[1] + + update_expression = "SET " + update_values = {} + update_names = {} + + # We use attribute names because DynamoDB reserves some keywords, like 'status' + for key, value in updates.items(): + + update_expression += f" #{key}_name = :{key}_value, " + update_values[f":{key}_value"] = value + update_names[f"#{key}_name"] = key + + update_expression = update_expression[:-2] + + self._get_table(table_name).update_item( + Key=key_data, + UpdateExpression=update_expression, + ExpressionAttributeValues=update_values, + ExpressionAttributeNames=update_names, + ) + + def query(self, table_name: str, primary_key: Tuple[str, str], _: str) -> List[dict]: + + res = self._get_table(table_name).query( + KeyConditionExpression=f"{primary_key[0]} = :keyvalue", + ExpressionAttributeValues={":keyvalue": primary_key[1]}, + )["Items"] + return self._remove_decimals(res) + + def delete(self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str]): + data = {} + for key in (primary_key, secondary_key): + data[key[0]] = key[1] + + self._get_table(table_name).delete_item(Key=data) + + @staticmethod + def get_instance(): + if nosql.instance is None: + nosql.instance = nosql() + return nosql.instance diff --git a/benchmarks/wrappers/local/pypy/storage.py b/benchmarks/wrappers/local/pypy/storage.py new file mode 100644 index 000000000..b44968408 --- /dev/null +++ b/benchmarks/wrappers/local/pypy/storage.py @@ -0,0 +1,58 @@ +import io +import os +import uuid + +import minio + +class storage: + instance = None + client = None + + def __init__(self): + if 'MINIO_ADDRESS' in os.environ: + address = os.environ['MINIO_ADDRESS'] + access_key = os.environ['MINIO_ACCESS_KEY'] + secret_key = os.environ['MINIO_SECRET_KEY'] + self.client = minio.Minio( + address, + access_key=access_key, + secret_key=secret_key, + secure=False) + + @staticmethod + def unique_name(name): + name, extension = os.path.splitext(name) + return '{name}.{random}{extension}'.format( + name=name, + extension=extension, + random=str(uuid.uuid4()).split('-')[0] + ) + + def upload(self, bucket, file, filepath): + key_name = storage.unique_name(file) + self.client.fput_object(bucket, key_name, filepath) + return key_name + + def download(self, bucket, file, filepath): + self.client.fget_object(bucket, file, filepath) + + def download_directory(self, bucket, prefix, path): + objects = self.client.list_objects_v2(bucket, prefix, recursive=True) + for obj in objects: + file_name = obj.object_name + self.download(bucket, file_name, os.path.join(path, file_name)) + + def upload_stream(self, bucket, file, bytes_data): + key_name = storage.unique_name(file) + self.client.put_object(bucket, key_name, bytes_data, bytes_data.getbuffer().nbytes) + return key_name + + def download_stream(self, bucket, file): + data = self.client.get_object(bucket, file) + return data.read() + + def get_instance(): + if storage.instance is None: + storage.instance = storage() + return storage.instance + diff --git a/config/comparison_example.json b/config/comparison_example.json new file mode 100644 index 000000000..f9daf292a --- /dev/null +++ b/config/comparison_example.json @@ -0,0 +1,41 @@ +{ + "deployment": { + "name": "aws", + "aws": { + "region": "us-east-1", + "access_key_id": "", + "secret_access_key": "" + }, + "azure": { + "subscription_id": "", + "tenant_id": "", + "client_id": "", + "client_secret": "" + }, + "gcp": { + "project_id": "", + "credentials": "" + }, + "local": { + "runtime": { + "python": "3.11", + "nodejs": "20", + "pypy": "3.11" + } + } + }, + "experiments": { + "update_code": false, + "update_storage": false, + "download_results": true, + "architecture": "x64", + "container_deployment": false, + "runtime": { + "language": "python", + "version": "3.11" + }, + "repetitions": 5, + "memory": 256 + } +} + diff --git a/config/systems.json b/config/systems.json index 5a38b4965..770773fed 100644 --- a/config/systems.json +++ b/config/systems.json @@ -64,10 +64,41 @@ ], "packages": [] } + }, + "pypy": { + "base_images": { + "x64": { + "3.11": "pypy:3.11" + }, + "arm64": { + "3.11": "pypy:3.11" + } + }, + "images": [ + "run", + "build" + ], + "username": "docker_user", + "deployment": { + "files": [ + "storage.py", + "nosql.py" + ], + "packages": [], + "module_packages": { + "nosql": [ + "boto3==1.28.3" + ] + } + } } }, - "architecture": ["x64"], - "deployments": ["package"] + "architecture": [ + "x64" + ], + "deployments": [ + "package" + ] }, "aws": { "languages": { @@ -121,10 +152,80 @@ "uuid": "3.4.0" } } + }, + "rust": { + "base_images": { + "x64": { + "1.88": "amazonlinux:2023" + }, + "arm64": { + "1.88": "amazonlinux:2023" + } + }, + "images": [ + "build" + ], + "deployment": { + "files": [ + "src", + "Cargo.toml" + ], + "packages": {} + } + }, + "java": { + "base_images": { + "x64": { + "17": "public.ecr.aws/lambda/java:17" + }, + "arm64": { + "17": "public.ecr.aws/lambda/java:17" + } + }, + "images": [ + "build" + ], + "deployment": { + "files": [ + "pom.xml", + "src" + ], + "packages": {}, + "module_packages": {} + } + }, + "pypy": { + "base_images": { + "x64": { + "3.11": "pypy:3.11-slim" + }, + "arm64": { + "3.11": "pypy:3.11-slim" + } + }, + "images": [ + "build" + ], + "deployment": { + "files": [ + "handler.py", + "storage.py", + "nosql.py", + "setup.py" + ], + "packages": [], + "module_packages": {} + } } }, - "architecture": ["x64", "arm64"], - "deployments": ["package", "container"] + "architecture": [ + "x64", + "arm64" + ], + "deployments": [ + "package", + "container" + ] }, "azure": { "languages": { @@ -181,6 +282,52 @@ "uuid": "3.4.0" } } + }, + "java": { + "base_images": { + "x64": { + "17": "mcr.microsoft.com/azure-functions/java:4-java17" + } + }, + "images": [ + "build" + ], + "username": "docker_user", + "deployment": { + "files": [ + "pom.xml", + "src" + ], + "packages": {}, + "module_packages": {} + } + }, + "pypy": { + "base_images": { + "x64": { + "3.11": "ubuntu:20.04" + } + }, + "images": [ + "build" + ], + "username": "docker_user", + "deployment": { + "files": [ + "handler.py", + "storage.py", + "nosql.py" + ], + "packages": [], + "module_packages": { + "storage": [ + "azure-storage-blob" + ], + "nosql": [ + "azure-cosmos" + ] + } + } } }, "images": { @@ -188,19 +335,26 @@ "username": "docker_user" } }, - "architecture": ["x64"], - "deployments": ["package"] + "architecture": [ + "x64" + ], + "deployments": [ + "package" + ] }, "gcp": { "languages": { "python": { "base_images": { "x64": { - "3.8": "ubuntu:22.04", - "3.9": "ubuntu:22.04", - "3.10": "ubuntu:22.04", - "3.11": "ubuntu:22.04", - "3.12": "ubuntu:22.04" + "3.10": "python:3.10-slim", + "3.11": "python:3.11-slim", + "3.12": "python:3.12-slim" + }, + "arm64": { + "3.10": "python:3.10-slim", + "3.11": "python:3.11-slim", + "3.12": "python:3.12-slim" } }, "images": [ @@ -211,7 +365,39 @@ "files": [ "handler.py", "storage.py", - "nosql.py" + "nosql.py", + "setup.py" + ], + "packages": [], + "module_packages": { + "storage": [ + "google-cloud-storage" + ], + "nosql": [ + "google-cloud-datastore" + ] + } + } + }, + "pypy": { + "base_images": { + "x64": { + "3.11": "pypy:3.11-slim" + }, + "arm64": { + "3.11": "pypy:3.11-slim" + } + }, + "images": [ + "build" + ], + "username": "docker_user", + "deployment": { + "files": [ + "handler.py", + "storage.py", + "nosql.py", + "setup.py" ], "packages": [], "module_packages": { @@ -227,8 +413,8 @@ "nodejs": { "base_images": { "x64": { - "18": "ubuntu:22.04", - "20": "ubuntu:22.04" + "20": "node:20-slim", + "22": "node:22-slim" } }, "images": [ @@ -252,8 +438,14 @@ "username": "docker_user" } }, - "architecture": ["x64"], - "deployments": ["package"] + "architecture": [ + "x64", + "arm64" + ], + "deployments": [ + "package", + "container" + ] }, "openwhisk": { "languages": { @@ -313,7 +505,11 @@ } } }, - "architecture": ["x64"], - "deployments": ["container"] + "architecture": [ + "x64" + ], + "deployments": [ + "container" + ] } -} +} \ No newline at end of file diff --git a/dockerfiles/aws/java/Dockerfile.build b/dockerfiles/aws/java/Dockerfile.build new file mode 100644 index 000000000..bd977d694 --- /dev/null +++ b/dockerfiles/aws/java/Dockerfile.build @@ -0,0 +1,27 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} +ARG VERSION +ENV JAVA_VERSION=${VERSION} + +# useradd, groupmod, build tooling +RUN yum install -y shadow-utils unzip tar gzip zip +# Install Maven 3.x (maven package may be old, install from Apache directly) +RUN curl -fsSL https://archive.apache.org/dist/maven/maven-3/3.9.6/binaries/apache-maven-3.9.6-bin.tar.gz | tar -xz -C /opt && \ + ln -s /opt/apache-maven-3.9.6 /opt/maven && \ + ln -s /opt/maven/bin/mvn /usr/local/bin/mvn +ENV PATH=/opt/maven/bin:$PATH +ENV GOSU_VERSION 1.14 +# https://github.com/tianon/gosu/releases/tag/1.14 +# key https://keys.openpgp.org/search?q=tianon%40debian.org +RUN curl -o /usr/local/bin/gosu -SL "https://github.com/tianon/gosu/releases/download/${GOSU_VERSION}/gosu-amd64" \ + && chmod +x /usr/local/bin/gosu +RUN mkdir -p /sebs/ +COPY dockerfiles/java_installer.sh /sebs/installer.sh +COPY dockerfiles/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh /sebs/installer.sh + +# useradd and groupmod is installed in /usr/sbin which is not in PATH +ENV PATH=/usr/sbin:$PATH +ENV SCRIPT_FILE=/mnt/function/package.sh +CMD /bin/bash /sebs/installer.sh +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/dockerfiles/aws/java/Dockerfile.function b/dockerfiles/aws/java/Dockerfile.function new file mode 100644 index 000000000..aa94525b1 --- /dev/null +++ b/dockerfiles/aws/java/Dockerfile.function @@ -0,0 +1,30 @@ +ARG BASE_IMAGE +FROM $BASE_IMAGE +ARG VERSION +ENV JAVA_VERSION=${VERSION} +ARG TARGET_ARCHITECTURE + +COPY . function/ + +# Ensure packaged jar is present for the Lambda base image +# function.jar should exist (created by java_installer.sh), but if not, copy from target/ +# Prefer the shaded/fat JAR (exclude "original" JARs created by maven-shade-plugin) +RUN if [ -f "function/function.jar" ]; then \ + cp function/function.jar function.jar; \ + elif [ -d "function/target" ] && ls function/target/*.jar >/dev/null 2>&1; then \ + JAR_FILE=$(ls function/target/*.jar 2>/dev/null | grep -v "original-" | head -n1); \ + if [ -z "$JAR_FILE" ]; then \ + JAR_FILE=$(ls function/target/*.jar | head -n1); \ + fi; \ + cp "$JAR_FILE" function.jar; \ + else \ + echo "Error: function.jar not found"; \ + echo "Contents of function/:"; \ + ls -la function/ 2>/dev/null || true; \ + exit 1; \ + fi \ + && test -f function.jar \ + && (unzip function.jar || jar xf function.jar) \ + && rm function.jar + +CMD ["org.serverlessbench.Handler::handleRequest"] diff --git a/dockerfiles/aws/pypy/Dockerfile.build b/dockerfiles/aws/pypy/Dockerfile.build new file mode 100644 index 000000000..20d50e6ba --- /dev/null +++ b/dockerfiles/aws/pypy/Dockerfile.build @@ -0,0 +1,17 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} +ARG VERSION +ENV PYTHON_VERSION=${VERSION} + +COPY --from=tianon/gosu:1.19-debian /usr/local/bin/gosu /usr/local/bin/gosu + +RUN mkdir -p /sebs/ +COPY dockerfiles/pypy_installer.sh /sebs/installer.sh +COPY dockerfiles/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh + +# useradd and groupmod is installed in /usr/sbin which is not in PATH +ENV PATH=/usr/sbin:$PATH +ENV SCRIPT_FILE=/mnt/function/package.sh +CMD /bin/bash /sebs/installer.sh +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/dockerfiles/aws/pypy/Dockerfile.function b/dockerfiles/aws/pypy/Dockerfile.function new file mode 100644 index 000000000..a130164b0 --- /dev/null +++ b/dockerfiles/aws/pypy/Dockerfile.function @@ -0,0 +1,58 @@ +ARG BASE_IMAGE +ARG TARGET_ARCHITECTURE + +FROM --platform=linux/amd64 ${BASE_IMAGE} AS base-x64 +FROM --platform=linux/arm64/v8 ${BASE_IMAGE} AS base-arm64 + +FROM base-${TARGET_ARCHITECTURE} +ARG TARGET_ARCHITECTURE +ARG VERSION +ENV PYTHON_VERSION=${VERSION} + +COPY . function/ +ENV PLATFORM_ARG="" + +# Non-wheel images +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + libc-dev \ + libjpeg-dev \ + zlib1g-dev \ + libtiff-dev \ + libfreetype6-dev \ + liblcms2-dev \ + libwebp-dev \ + g++ \ + cmake \ + make \ + && rm -rf /var/lib/apt/lists/* + +RUN touch function/__init__.py + # Probably remove this conditional, might break pypy builds, might lead to installation of CPython libraries +RUN if [ "${TARGET_ARCHITECTURE}" = "arm64" ]; then \ + export PLATFORM_ARG="--platform manylinux_2_17_aarch64 --only-binary=:all:"; \ + fi + +RUN ls -la function/ +RUN if [ "${TARGET_ARCHITECTURE}" = "arm64" ] && test -f "function/requirements.txt.arm.${PYTHON_VERSION}"; then \ + pip install --no-cache-dir ${PLATFORM_ARG} --target . \ + -r function/requirements.txt \ + -r function/requirements.txt.arm.${PYTHON_VERSION} \ + function/ && \ + pip cache purge; \ + elif test -f "function/requirements.txt.${PYTHON_VERSION}"; then \ + pip install --no-cache-dir ${PLATFORM_ARG} --target . \ + -r function/requirements.txt \ + -r function/requirements.txt.${PYTHON_VERSION} \ + function/ && \ + pip cache purge; \ + else \ + pip install --no-cache-dir ${PLATFORM_ARG} --target . \ + -r function/requirements.txt \ + function/ && \ + pip cache purge; \ + fi + +RUN pip install awslambdaric +ENTRYPOINT ["/opt/pypy/bin/python", "-m", "awslambdaric"] +CMD ["function/handler.handler"] diff --git a/dockerfiles/aws/rust/Dockerfile.build b/dockerfiles/aws/rust/Dockerfile.build new file mode 100644 index 000000000..020cc9c7c --- /dev/null +++ b/dockerfiles/aws/rust/Dockerfile.build @@ -0,0 +1,32 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} +ARG VERSION +ENV RUST_VERSION=${VERSION} + +# Install required build tools +RUN yum install -y gcc gcc-c++ make openssl-devel shadow-utils zip + +# Install Rust +ENV RUSTUP_HOME=/usr/local/rustup \ + CARGO_HOME=/usr/local/cargo \ + PATH=/usr/local/cargo/bin:$PATH + +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain ${RUST_VERSION} \ + && chmod -R a+w $RUSTUP_HOME $CARGO_HOME + +# Install gosu for user management +ENV GOSU_VERSION 1.14 +RUN curl -o /usr/local/bin/gosu -SL "https://github.com/tianon/gosu/releases/download/${GOSU_VERSION}/gosu-amd64" \ + && chmod +x /usr/local/bin/gosu + +# Setup SEBS scripts +RUN mkdir -p /sebs/ +COPY dockerfiles/rust_installer.sh /sebs/installer.sh +COPY dockerfiles/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh /sebs/installer.sh + +# useradd and groupmod is installed in /usr/sbin which is not in PATH +ENV PATH=/usr/sbin:$PATH +ENV SCRIPT_FILE=/mnt/function/package.sh +CMD /bin/bash /sebs/installer.sh +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/dockerfiles/aws/rust/Dockerfile.function b/dockerfiles/aws/rust/Dockerfile.function new file mode 100644 index 000000000..c7523e48d --- /dev/null +++ b/dockerfiles/aws/rust/Dockerfile.function @@ -0,0 +1,12 @@ +ARG BASE_IMAGE +FROM $BASE_IMAGE + +# Copy the pre-built bootstrap binary +# The binary is already built by the build container +COPY bootstrap /var/runtime/bootstrap + +# Make sure it's executable +RUN chmod +x /var/runtime/bootstrap + +# Set the CMD to the handler (AWS Lambda will execute this) +CMD ["/var/runtime/bootstrap"] diff --git a/dockerfiles/azure/java/Dockerfile.build b/dockerfiles/azure/java/Dockerfile.build new file mode 100644 index 000000000..6d7a36972 --- /dev/null +++ b/dockerfiles/azure/java/Dockerfile.build @@ -0,0 +1,18 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} +ARG VERSION +ENV JAVA_VERSION=${VERSION} + +RUN apt-get update && apt-get install -y gosu maven unzip zip \ + && apt-get clean + +RUN mkdir -p /sebs/ +COPY dockerfiles/java_installer.sh /sebs/installer.sh +COPY dockerfiles/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh /sebs/installer.sh + +# useradd and groupmod are in /usr/sbin which is not in PATH +ENV PATH=/usr/sbin:$PATH +ENV SCRIPT_FILE=/mnt/function/package.sh +CMD /bin/bash /sebs/installer.sh +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/dockerfiles/azure/pypy/Dockerfile.build b/dockerfiles/azure/pypy/Dockerfile.build new file mode 100644 index 000000000..ea0ed7e78 --- /dev/null +++ b/dockerfiles/azure/pypy/Dockerfile.build @@ -0,0 +1,19 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} +ARG VERSION +ENV PYTHON_VERSION=${VERSION} + +RUN apt-get update && apt-get install -y gosu curl tar bzip2 gzip libc6 libgcc-s1 && \ + mkdir -p /lib64 && \ + (ln -sf /lib/x86_64-linux-gnu/ld-linux-x86-64.so.2 /lib64/ld-linux-x86-64.so.2 || true) + +RUN mkdir -p /sebs/ +COPY dockerfiles/pypy_installer.sh /sebs/installer.sh +COPY dockerfiles/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh + +# useradd and groupmod is installed in /usr/sbin which is not in PATH +ENV PATH=/usr/sbin:$PATH +ENV SCRIPT_FILE=/mnt/function/package.sh +CMD /bin/bash /sebs/installer.sh +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/dockerfiles/entrypoint.sh b/dockerfiles/entrypoint.sh index c8e24cd4e..91f3e56d9 100755 --- a/dockerfiles/entrypoint.sh +++ b/dockerfiles/entrypoint.sh @@ -6,7 +6,7 @@ USER=${CONTAINER_USER} useradd --non-unique -m -u ${USER_ID} ${USER} groupmod --non-unique -g ${GROUP_ID} ${USER} -mkdir -p /mnt/function && chown -R ${USER}:${USER} /mnt/function +mkdir -p /mnt/function && chown -R ${USER_ID}:${GROUP_ID} /mnt/function 2>/dev/null || true export HOME=/home/${USER} echo "Running as ${USER}, with ${USER_ID} and ${GROUP_ID}" diff --git a/dockerfiles/gcp/nodejs/Dockerfile.build b/dockerfiles/gcp/nodejs/Dockerfile.build index 477f236bc..d98780c50 100755 --- a/dockerfiles/gcp/nodejs/Dockerfile.build +++ b/dockerfiles/gcp/nodejs/Dockerfile.build @@ -1,12 +1,8 @@ ARG BASE_IMAGE FROM ${BASE_IMAGE} ARG VERSION -ENV NVM_DIR=/nvm -#RUN install_node --ignore-verification-failure v${VERSION} -RUN apt-get update && apt-get install -y gosu wget -RUN mkdir -p ${NVM_DIR} && wget -qO- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash -RUN . ${NVM_DIR}/nvm.sh && nvm install ${VERSION} && nvm alias default ${VERSION} && nvm use default +COPY --from=tianon/gosu:1.19-debian /usr/local/bin/gosu /usr/local/bin/gosu RUN mkdir -p /sebs/ COPY dockerfiles/nodejs_installer.sh /sebs/installer.sh diff --git a/dockerfiles/gcp/pypy/Dockerfile.build b/dockerfiles/gcp/pypy/Dockerfile.build new file mode 100755 index 000000000..c2c918dcf --- /dev/null +++ b/dockerfiles/gcp/pypy/Dockerfile.build @@ -0,0 +1,26 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} +ARG VERSION +ENV PYTHON_VERSION=${VERSION} +ENV DEBIAN_FRONTEND="noninteractive" +ENV TZ="Europe/Zurich" + +RUN apt-get update\ + && apt-get install -y --no-install-recommends gosu gcc build-essential libxml2 libxml2-dev zlib1g-dev gpg-agent zip\ + && apt-get update\ + && apt-get purge -y --auto-remove + +#RUN export PATH=/opt/python3.7/bin:/opt/python3.6/bin:/opt/python3.5/bin:/opt/python3.4/bin:$PATH +RUN python -mvenv /sebs/env +ENV PATH /sebs/env/bin:${PATH} +ENV VIRTUAL_ENV /sebs/env + +RUN mkdir -p /sebs/ +COPY dockerfiles/pypy_installer.sh /sebs/installer.sh +COPY dockerfiles/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh + +# useradd and groupmod is installed in /usr/sbin which is not in PATH +ENV SCRIPT_FILE=/mnt/function/package.sh +CMD /bin/bash /sebs/installer.sh +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/dockerfiles/gcp/pypy/Dockerfile.function b/dockerfiles/gcp/pypy/Dockerfile.function new file mode 100644 index 000000000..18b53467c --- /dev/null +++ b/dockerfiles/gcp/pypy/Dockerfile.function @@ -0,0 +1,59 @@ +ARG BASE_IMAGE +ARG TARGET_ARCHITECTURE + +FROM --platform=linux/amd64 ${BASE_IMAGE} AS base-x64 +FROM --platform=linux/arm64/v8 ${BASE_IMAGE} AS base-arm64 + +FROM base-${TARGET_ARCHITECTURE} +ARG TARGET_ARCHITECTURE +ARG VERSION +ENV PYTHON_VERSION=${VERSION} + +COPY . function/ + +ENV PLATFORM_ARG="" + +RUN pip install functions-framework + +# Non-wheel images +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + libc-dev \ + libjpeg-dev \ + zlib1g-dev \ + libtiff-dev \ + libfreetype6-dev \ + liblcms2-dev \ + libwebp-dev \ + g++ \ + cmake \ + make \ + && rm -rf /var/lib/apt/lists/* + +RUN touch function/__init__.py \ + && if [[ "${TARGET_ARCHITECTURE}" == "arm64" ]]; then \ + export PLATFORM_ARG="--platform manylinux_2_17_aarch64 --only-binary=:all:"; \ + fi \ + && if [[ "${TARGET_ARCHITECTURE}" == "arm64" ]] && test -f "function/requirements.txt.arm.${PYTHON_VERSION}"; then \ + pip install --no-cache-dir ${PLATFORM_ARG} --target . \ + -r function/requirements.txt \ + -r function/requirements.txt.arm.${PYTHON_VERSION} \ + function/ && \ + pip cache purge; \ + elif test -f "function/requirements.txt.${PYTHON_VERSION}"; then \ + pip install --no-cache-dir ${PLATFORM_ARG} --target . \ + -r function/requirements.txt \ + -r function/requirements.txt.${PYTHON_VERSION} \ + function/ && \ + pip cache purge; \ + else \ + pip install --no-cache-dir ${PLATFORM_ARG} --target . \ + -r function/requirements.txt \ + function/ && \ + pip cache purge; \ + fi + + +ENV PYTHONPATH="/" + +CMD ["functions-framework", "--target=handler", "--source=function/handler.py"] \ No newline at end of file diff --git a/dockerfiles/gcp/python/Dockerfile.build b/dockerfiles/gcp/python/Dockerfile.build index 88554d230..ae52345b1 100755 --- a/dockerfiles/gcp/python/Dockerfile.build +++ b/dockerfiles/gcp/python/Dockerfile.build @@ -5,11 +5,10 @@ ENV PYTHON_VERSION=${VERSION} ENV DEBIAN_FRONTEND="noninteractive" ENV TZ="Europe/Zurich" +COPY --from=tianon/gosu:1.19-debian /usr/local/bin/gosu /usr/local/bin/gosu + RUN apt-get update\ - && apt-get install -y --no-install-recommends gosu gcc build-essential libxml2 libxml2-dev zlib1g-dev software-properties-common gpg-agent zip\ - && add-apt-repository -y ppa:deadsnakes/ppa\ - && apt-get update\ - && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-venv python${PYTHON_VERSION}-dev\ + && apt-get install -y --no-install-recommends gcc build-essential libxml2 libxml2-dev zlib1g-dev gpg-agent zip\ && apt-get purge -y --auto-remove #RUN export PATH=/opt/python3.7/bin:/opt/python3.6/bin:/opt/python3.5/bin:/opt/python3.4/bin:$PATH diff --git a/dockerfiles/gcp/python/Dockerfile.function b/dockerfiles/gcp/python/Dockerfile.function new file mode 100644 index 000000000..f165437e6 --- /dev/null +++ b/dockerfiles/gcp/python/Dockerfile.function @@ -0,0 +1,44 @@ +ARG BASE_IMAGE +ARG TARGET_ARCHITECTURE + +FROM --platform=linux/amd64 ${BASE_IMAGE} AS base-x64 +FROM --platform=linux/arm64/v8 ${BASE_IMAGE} AS base-arm64 + +FROM base-${TARGET_ARCHITECTURE} +ARG TARGET_ARCHITECTURE +ARG VERSION +ENV PYTHON_VERSION=${VERSION} + +COPY . function/ + +ENV PLATFORM_ARG="" + +RUN pip install functions-framework + +RUN touch function/__init__.py \ + && if [[ "${TARGET_ARCHITECTURE}" == "arm64" ]]; then \ + export PLATFORM_ARG="--platform manylinux_2_17_aarch64 --only-binary=:all:"; \ + fi \ + && if [[ "${TARGET_ARCHITECTURE}" == "arm64" ]] && test -f "function/requirements.txt.arm.${PYTHON_VERSION}"; then \ + pip install --no-cache-dir ${PLATFORM_ARG} --target . \ + -r function/requirements.txt \ + -r function/requirements.txt.arm.${PYTHON_VERSION} \ + function/ && \ + pip cache purge; \ + elif test -f "function/requirements.txt.${PYTHON_VERSION}"; then \ + pip install --no-cache-dir ${PLATFORM_ARG} --target . \ + -r function/requirements.txt \ + -r function/requirements.txt.${PYTHON_VERSION} \ + function/ && \ + pip cache purge; \ + else \ + pip install --no-cache-dir ${PLATFORM_ARG} --target . \ + -r function/requirements.txt \ + function/ && \ + pip cache purge; \ + fi + + +ENV PYTHONPATH="/" + +CMD ["functions-framework", "--target=handler", "--source=function/handler.py"] \ No newline at end of file diff --git a/dockerfiles/java_installer.sh b/dockerfiles/java_installer.sh new file mode 100644 index 000000000..d2ff97a90 --- /dev/null +++ b/dockerfiles/java_installer.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +set -euo pipefail + +cd /mnt/function + +# Find pom.xml recursively +POM_PATH=$(find . -maxdepth 3 -name "pom.xml" | head -n1) + +if [[ -n "${POM_PATH}" ]]; then + echo "Found pom.xml at ${POM_PATH}" + POM_DIR=$(dirname "${POM_PATH}") + cd "${POM_DIR}" + + # Note: -q flag causes issues in Docker, removed for reliable builds + mvn -DskipTests clean package + + if ls target/*.jar >/dev/null 2>&1; then + # Prefer the shaded/fat JAR (exclude "original" JARs created by maven-shade-plugin) + # The shaded JAR contains all dependencies and is the one we want to use + JAR_PATH=$(ls target/*.jar 2>/dev/null | grep -v "original-" | head -n1) + if [[ -z "${JAR_PATH}" ]]; then + # Fallback to any JAR if no non-original JAR found + JAR_PATH=$(ls target/*.jar | head -n1) + fi + echo "Found built jar at ${JAR_PATH}" + cp "${JAR_PATH}" /mnt/function/function.jar + fi + + cd /mnt/function +else + echo "No pom.xml found!" +fi + +if [[ -f "${SCRIPT_FILE:-}" ]]; then + /bin/bash "${SCRIPT_FILE}" . +fi diff --git a/dockerfiles/local/pypy/Dockerfile.build b/dockerfiles/local/pypy/Dockerfile.build new file mode 100755 index 000000000..6edb0bbac --- /dev/null +++ b/dockerfiles/local/pypy/Dockerfile.build @@ -0,0 +1,18 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} +ARG VERSION +ENV PYTHON_VERSION=${VERSION} + +RUN apt-get update\ + && apt-get install -y --no-install-recommends gcc build-essential python3-dev libxml2 libxml2-dev zlib1g-dev gosu\ + && apt-get purge -y --auto-remove + +RUN mkdir -p /sebs/ +COPY dockerfiles/pypy_installer.sh /sebs/installer.sh +COPY dockerfiles/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh + +# useradd and groupmod is installed in /usr/sbin which is not in PATH +ENV SCRIPT_FILE=/mnt/function/package.sh +CMD /bin/bash /sebs/installer.sh +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/dockerfiles/local/pypy/Dockerfile.run b/dockerfiles/local/pypy/Dockerfile.run new file mode 100755 index 000000000..25fa2ebdb --- /dev/null +++ b/dockerfiles/local/pypy/Dockerfile.run @@ -0,0 +1,25 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} + +RUN deps=''\ + && apt-get update\ + # for route and sudo + && apt-get install --no-install-recommends -y curl gosu net-tools sudo ${deps}\ + && apt-get purge -y --auto-remove ${deps}\ + && pip3 install cffi minio bottle + +RUN mkdir -p /sebs +COPY dockerfiles/local/run.sh /sebs/ +COPY dockerfiles/local/*.py /sebs/ +COPY dockerfiles/local/pypy/*.py /sebs/ +COPY dockerfiles/local/pypy/run_server.sh /sebs/ +COPY dockerfiles/local/pypy/timeit.sh /sebs/ +COPY dockerfiles/local/pypy/runners.json /sebs/ +ADD third-party/pypapi/pypapi /sebs/pypapi +ENV PYTHONPATH=/sebs/.python_packages/lib/site-packages:$PYTHONPATH + +COPY dockerfiles/local/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh +RUN chmod +x /sebs/run.sh + +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/dockerfiles/local/pypy/analyzer-runner.py b/dockerfiles/local/pypy/analyzer-runner.py new file mode 100644 index 000000000..624459795 --- /dev/null +++ b/dockerfiles/local/pypy/analyzer-runner.py @@ -0,0 +1,64 @@ + +import datetime, json, sys, subprocess, os +ip_address = os.environ['DOCKER_HOST_IP'] +cfg = json.load(open(sys.argv[1], 'r')) +ret = subprocess.run(['curl', '-X', 'POST', + '{}:{}/start'.format(ip_address, cfg['benchmark']['analyzer']['analyzer_port']), + '-d', + '{{"uuid": "{}" }}'.format(sys.argv[2])], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) +if ret.returncode != 0: + import sys + print('Analyzer initialization failed!') + print(ret.stderr.decode('utf-8')) + sys.exit(100) + + +from utils import * +from tools import * +# imported function +from function import function + +repetitions = cfg['benchmark']['repetitions'] +disable_gc = cfg['benchmark']['disable_gc'] +input_data = cfg['input'] + +timedata = [0] * repetitions +try: + start = start_benchmarking(disable_gc) + for i in range(0, repetitions): + begin = datetime.datetime.now() + res = function.handler(input_data) + stop = datetime.datetime.now() + print(res, file = open( + get_result_prefix(LOGS_DIR, 'output', 'txt'), + 'w' + )) + timedata[i] = [begin, stop] + end = stop_benchmarking() + + ret = subprocess.run( + [ + 'curl', '-X', 'POST', + '{}:{}/stop'.format(ip_address, cfg['benchmark']['analyzer']['analyzer_port']), + '-d', + '{{"uuid": "{}" }}'.format(sys.argv[2]) + ], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if ret.returncode != 0: + import sys + print('Analyzer deinitialization failed!') + print(ret.stderr.decode('utf-8')) + sys.exit(101) + experiment_data = {} + experiment_data['repetitions'] = repetitions + experiment_data['timestamps'] = process_timestamps(timedata) + experiment_data['start'] = str(start) + experiment_data['end'] = str(end) + print(json.dumps({'experiment': experiment_data, 'runtime': get_config()}, indent=2)) +except Exception as e: + print('Exception caught!') + print(e) + sys.exit(102) +sys.exit(0) diff --git a/dockerfiles/local/pypy/config.py b/dockerfiles/local/pypy/config.py new file mode 100644 index 000000000..e7115cc73 --- /dev/null +++ b/dockerfiles/local/pypy/config.py @@ -0,0 +1,5 @@ +import json + +from tools import * + +print(json.dumps(get_config())) diff --git a/dockerfiles/local/pypy/papi-runner.py b/dockerfiles/local/pypy/papi-runner.py new file mode 100644 index 000000000..0c82d476d --- /dev/null +++ b/dockerfiles/local/pypy/papi-runner.py @@ -0,0 +1,104 @@ + +import datetime, json, sys, traceback, csv + +from utils import * +from tools import * + +# imported function +from function import function + +import pypapi.exceptions + +class papi_benchmarker: + from pypapi import papi_low as papi + from pypapi import events as papi_events + + def __init__(self, papi_cfg): + self.events = [] + self.events_names = [] + self.count = 0 + + self.papi.library_init() + self.events = self.papi.create_eventset() + for event in papi_cfg['events']: + try: + self.papi.add_event(self.events, getattr(self.papi_events, event)) + except pypapi.exceptions.PapiInvalidValueError as err: + print('Adding event {event} failed!'.format(event=event)) + sys.exit(100) + + self.events_names = papi_cfg['events'] + self.count = len(papi_cfg['events']) + self.results = [] + + self.ins_granularity = papi_cfg['overflow_instruction_granularity'] + self.buffer_size = papi_cfg['overflow_buffer_size'] + self.start_time = datetime.datetime.now() + + self.papi.overflow_sampling(self.events, self.papi_events.PAPI_TOT_INS, + int(self.ins_granularity), int(self.buffer_size)) + + def start_overflow(self): + self.papi.start(self.events) + + def stop_overflow(self): + self.papi.stop(self.events) + + def get_results(self): + data = self.papi.overflow_sampling_results(self.events) + for vals in data: + for i in range(0, len(vals), self.count + 1): + chunks = vals[i:i+self.count+1] + measurement_time = datetime.datetime.fromtimestamp(chunks[0]/1e6) + time = (measurement_time - self.start_time) / datetime.timedelta(microseconds = 1) + self.results.append([measurement_time.strftime("%s.%f"), time] + list(chunks[1:])) + + def finish(self): + self.papi.cleanup_eventset(self.events) + self.papi.destroy_eventset(self.events) + + +cfg = json.load(open(sys.argv[1], 'r')) +repetitions = cfg['benchmark']['repetitions'] +disable_gc = cfg['benchmark']['disable_gc'] +input_data = cfg['input'] +papi_experiments = papi_benchmarker(cfg['benchmark']['papi']) + +timedata = [0] * repetitions +try: + start = start_benchmarking(disable_gc) + for i in range(0, repetitions): + begin = datetime.datetime.now() + papi_experiments.start_overflow() + res = function.handler(input_data) + papi_experiments.stop_overflow() + stop = datetime.datetime.now() + print(res, file = open( + get_result_prefix(LOGS_DIR, 'output', 'txt'), + 'w' + )) + timedata[i] = [begin, stop] + end = stop_benchmarking() +except Exception as e: + print('Exception caught!') + print(e) + traceback.print_exc() + + +papi_experiments.get_results() +papi_experiments.finish() +result = get_result_prefix(RESULTS_DIR, cfg['benchmark']['name'], 'csv') +with open(result, 'w') as f: + csv_writer = csv.writer(f) + csv_writer.writerow( + ['Time','RelativeTime'] + papi_experiments.events_names + ) + for val in papi_experiments.results: + csv_writer.writerow(val) + +experiment_data = {} +experiment_data['repetitions'] = repetitions +experiment_data['timestamps'] = process_timestamps(timedata) +experiment_data['start'] = str(start) +experiment_data['end'] = str(end) +print(json.dumps({'experiment': experiment_data, 'runtime': get_config()}, indent=2)) diff --git a/dockerfiles/local/pypy/run_server.sh b/dockerfiles/local/pypy/run_server.sh new file mode 100755 index 000000000..ab09238e3 --- /dev/null +++ b/dockerfiles/local/pypy/run_server.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +pypy /sebs/server.py "$@" diff --git a/dockerfiles/local/pypy/runners.json b/dockerfiles/local/pypy/runners.json new file mode 100644 index 000000000..3ef17d1f4 --- /dev/null +++ b/dockerfiles/local/pypy/runners.json @@ -0,0 +1,7 @@ +{ + "papi": "papi-runner.py", + "time" : {"warm" : "time-in-proc.py", "cold" : "time-out-proc.py"}, + "memory": "analyzer-runner.py", + "disk-io": "analyzer-runner.py", + "config": ["pypy", "config.py"] +} diff --git a/dockerfiles/local/pypy/server.py b/dockerfiles/local/pypy/server.py new file mode 100644 index 000000000..4ed1314f2 --- /dev/null +++ b/dockerfiles/local/pypy/server.py @@ -0,0 +1,38 @@ +import datetime +import os +import sys +import uuid + +import bottle +from bottle import route, run, template, request + +CODE_LOCATION='/function' + +@route('/alive', method='GET') +def alive(): + return { + "result:" "ok" + } + +@route('/', method='POST') +def process_request(): + begin = datetime.datetime.now() + from function import function + end = datetime.datetime.now() + # FIXME: measurements? + ret = function.handler(request.json) + + return { + 'begin': begin.strftime('%s.%f'), + 'end': end.strftime('%s.%f'), + "request_id": str(uuid.uuid4()), + "is_cold": False, + "result": { + "output": ret + } + } + +sys.path.append(os.path.join(CODE_LOCATION)) +sys.path.append(os.path.join(CODE_LOCATION, '.python_packages/lib/site-packages/')) +run(host='0.0.0.0', port=int(sys.argv[1]), debug=True) + diff --git a/dockerfiles/local/pypy/time-in-proc.py b/dockerfiles/local/pypy/time-in-proc.py new file mode 100644 index 000000000..962da527a --- /dev/null +++ b/dockerfiles/local/pypy/time-in-proc.py @@ -0,0 +1,59 @@ + +import datetime, json, sys, traceback, csv, resource + +from utils import * +from tools import * + +# imported function +from function import function + + +cfg = json.load(open(sys.argv[1], 'r')) +repetitions = cfg['benchmark']['repetitions'] +disable_gc = cfg['benchmark']['disable_gc'] +input_data = cfg['input'] + +timedata = [0] * repetitions +os_times = [0] * repetitions +try: + start = start_benchmarking(disable_gc) + for i in range(0, repetitions): + begin = datetime.datetime.now() + begin_times = resource.getrusage(resource.RUSAGE_SELF) + res = function.handler(input_data) + end_times = resource.getrusage(resource.RUSAGE_SELF) + stop = datetime.datetime.now() + print(res, file = open( + get_result_prefix(LOGS_DIR, 'output', 'txt'), + 'w' + )) + timedata[i] = [begin, stop] + os_times[i] = [begin_times, end_times] + end = stop_benchmarking() +except Exception as e: + print('Exception caught!') + print(e) + traceback.print_exc() + + +result = get_result_prefix(RESULTS_DIR, cfg['benchmark']['name'], 'csv') +with open(result, 'w') as f: + csv_writer = csv.writer(f) + csv_writer.writerow(['#Seconds from epoch.microseconds; CPU times are in microseconds']) + csv_writer.writerow(['Begin','End','Duration','User','Sys']) + for i in range(0, len(timedata)): + csv_writer.writerow([ + timedata[i][0].strftime('%s.%f'), + timedata[i][1].strftime('%s.%f'), + (timedata[i][1] - timedata[i][0]) / + datetime.timedelta(microseconds=1), + (os_times[i][1].ru_utime - os_times[i][0].ru_utime) * 1e6, + (os_times[i][1].ru_stime - os_times[i][0].ru_stime) * 1e6 + ]) + +experiment_data = {} +experiment_data['repetitions'] = repetitions +experiment_data['timestamps'] = process_timestamps(timedata) +experiment_data['start'] = str(start) +experiment_data['end'] = str(end) +print(json.dumps({'experiment': experiment_data, 'runtime': get_config()}, indent=2)) diff --git a/dockerfiles/local/pypy/timeit.sh b/dockerfiles/local/pypy/timeit.sh new file mode 100755 index 000000000..73e6e5eaf --- /dev/null +++ b/dockerfiles/local/pypy/timeit.sh @@ -0,0 +1,5 @@ +#!/bin/bash +#ts=$(date +%s%N); +export TIMEFORMAT='%3R,%3U,%3S' +time pypy -c "from json import load; from function import function; print(function.handler(load(open('input.json', 'r'))))" > $1 +#tt=$((($(date +%s%N) - $ts)/1000)) ; echo $tt diff --git a/dockerfiles/local/pypy/tools.py b/dockerfiles/local/pypy/tools.py new file mode 100644 index 000000000..0413489e3 --- /dev/null +++ b/dockerfiles/local/pypy/tools.py @@ -0,0 +1,21 @@ + +import datetime, gc, platform, os, sys + +def start_benchmarking(disable_gc): + if disable_gc: + gc.disable() + return datetime.datetime.now() + +def stop_benchmarking(): + end = datetime.datetime.now() + gc.enable() + return end + +def get_config(): + # get currently loaded modules + # https://stackoverflow.com/questions/4858100/how-to-list-imported-modules + modulenames = set(sys.modules) & set(globals()) + allmodules = [sys.modules[name] for name in modulenames] + return {'name': 'pypy', + 'version': platform.python_version(), + 'modules': str(allmodules)} diff --git a/dockerfiles/local/runner.py b/dockerfiles/local/runner.py index 96261fc33..b1d0ca423 100644 --- a/dockerfiles/local/runner.py +++ b/dockerfiles/local/runner.py @@ -4,7 +4,7 @@ from utils import * def get_language(lang): - languages = {'python': 'python3', 'nodejs': 'nodejs'} + languages = {'python': 'python3', 'nodejs': 'nodejs', 'pypy': 'pypy'} return languages[lang] def get_runner(experiment, options=None): diff --git a/dockerfiles/pypy_installer.sh b/dockerfiles/pypy_installer.sh new file mode 100644 index 000000000..7c6aafcc5 --- /dev/null +++ b/dockerfiles/pypy_installer.sh @@ -0,0 +1,44 @@ +#!/bin/bash +set -euo pipefail + +cd /mnt/function + +# Download and unpack PyPy +curl -L -o pypy.tar.bz2 https://downloads.python.org/pypy/pypy3.11-v7.3.20-linux64.tar.bz2 +tar -xjf pypy.tar.bz2 +mv pypy3.11-v7.3.20-linux64 pypy +rm pypy.tar.bz2 +chmod -R +x pypy/bin +export PATH=/mnt/function/pypy/bin:$PATH + +# Ensure pip is available +python -m ensurepip +python -mpip install -U pip wheel + +# Where to place dependencies for Azure/AWS +REQ_TARGET=".python_packages/lib/site-packages" +mkdir -p "${REQ_TARGET}" + +# Platform pin for arm64 if needed +# WARNING: Removing the conditional might break PyPy builds or install CPython-only libs. +PLATFORM_ARG="" +if [[ "${TARGET_ARCHITECTURE:-}" == "arm64" ]]; then + PLATFORM_ARG="--platform manylinux_2_17_aarch64 --only-binary=:all:" +fi + +# Pick the best matching requirements file +if [[ "${TARGET_ARCHITECTURE:-}" == "arm64" && -f "requirements.txt.arm.${PYTHON_VERSION}" ]]; then + REQ_FILE="requirements.txt.arm.${PYTHON_VERSION}" +elif [[ -f "requirements.txt.${PYTHON_VERSION}" ]]; then + REQ_FILE="requirements.txt.${PYTHON_VERSION}" +else + REQ_FILE="requirements.txt" +fi + +# Install benchmark deps into the target directory +python -mpip install ${PLATFORM_ARG} -r "${REQ_FILE}" -t "${REQ_TARGET}" + +# Run optional benchmark packaging hook +if [[ -f "${SCRIPT_FILE:-}" ]]; then + /bin/bash "${SCRIPT_FILE}" "${REQ_TARGET}" +fi \ No newline at end of file diff --git a/dockerfiles/rust_installer.sh b/dockerfiles/rust_installer.sh new file mode 100755 index 000000000..87a95887f --- /dev/null +++ b/dockerfiles/rust_installer.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +cd /mnt/function + +# Install Rust target for AWS Lambda +if [ "${TARGET_ARCHITECTURE}" == "arm64" ]; then + RUST_TARGET="aarch64-unknown-linux-gnu" +elif [ "${TARGET_ARCHITECTURE}" == "x64" ]; then + RUST_TARGET="x86_64-unknown-linux-gnu" +else + echo "Unsupported architecture: $TARGET_ARCHITECTURE" + exit 1 +fi + +# Ensure Cargo.toml exists +if [ ! -f "Cargo.toml" ]; then + echo "Error: Cargo.toml not found" + exit 1 +fi + +# Add the target if not already added +rustup target add ${RUST_TARGET} + +# Build the release binary +cargo build --release --target ${RUST_TARGET} + +# Copy the binary to the root as 'bootstrap' (required by AWS Lambda custom runtime) +cp target/${RUST_TARGET}/release/bootstrap bootstrap || \ + cp target/${RUST_TARGET}/release/handler bootstrap || \ + (ls target/${RUST_TARGET}/release/ && exit 1) + +chmod +x bootstrap diff --git a/docs/rust-support.md b/docs/rust-support.md new file mode 100644 index 000000000..83267e1a5 --- /dev/null +++ b/docs/rust-support.md @@ -0,0 +1,239 @@ +# Rust Support for AWS Lambda Benchmarks + +This document describes the Rust support implementation for AWS Lambda benchmarks in the SeBS framework. + +## Overview + +As of November 2025, AWS Lambda officially supports Rust as a Generally Available runtime. This implementation adds full Rust support to the SeBS benchmarking framework for AWS Lambda. + +## Requirements + +- Rust toolchain (version specified in benchmark configuration) +- AWS Lambda Runtime for Rust (`lambda_runtime` crate) +- Cargo build system + +## Architecture + +### Runtime Configuration + +Rust functions on AWS Lambda use the `provided.al2023` custom runtime. The compiled binary must be named `bootstrap` to be recognized by the Lambda execution environment. + +### Build Process + +1. Rust code is compiled using Cargo with the appropriate target architecture: + - `x86_64-unknown-linux-gnu` for x64 architecture + - `aarch64-unknown-linux-gnu` for ARM64 architecture + +2. The compiled binary is renamed to `bootstrap` if necessary + +3. The bootstrap binary is packaged in a ZIP file for deployment + +## Creating a Rust Benchmark + +### 1. Project Structure + +Create a Rust directory in your benchmark folder: + +``` +benchmarks/ + └── YOUR_BENCHMARK/ + └── rust/ + ├── Cargo.toml + ├── src/ + │ └── main.rs + └── .gitignore +``` + +### 2. Cargo.toml Configuration + +Your `Cargo.toml` must specify the binary name as `bootstrap`: + +```toml +[package] +name = "your-benchmark" +version = "0.1.0" +edition = "2021" + +[[bin]] +name = "bootstrap" +path = "src/main.rs" + +[dependencies] +lambda_runtime = "0.13" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +tokio = { version = "1", features = ["full"] } +``` + +### 3. Handler Implementation + +Your Rust handler must follow the Lambda Runtime API format: + +```rust +use lambda_runtime::{service_fn, Error, LambdaEvent}; +use serde::{Deserialize, Serialize}; +use std::time::{SystemTime, UNIX_EPOCH}; + +#[derive(Deserialize)] +struct Request { + // Your input fields +} + +#[derive(Serialize)] +struct Response { + result: YourResultType, + begin: f64, + end: f64, + is_cold: bool, + request_id: String, +} + +static mut IS_COLD: bool = true; + +async fn handler(event: LambdaEvent) -> Result { + let (payload, context) = event.into_parts(); + + let begin = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs_f64(); + + let is_cold = unsafe { + let cold = IS_COLD; + IS_COLD = false; + cold + }; + + // Your benchmark logic here + + let end = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs_f64(); + + Ok(Response { + result: your_result, + begin, + end, + is_cold, + request_id: context.request_id, + }) +} + +#[tokio::main] +async fn main() -> Result<(), Error> { + lambda_runtime::run(service_fn(handler)).await +} +``` + +### 4. Update Benchmark Configuration + +Add `"rust"` to the languages array in your benchmark's `config.json`: + +```json +{ + "timeout": 120, + "memory": 128, + "languages": ["python", "nodejs", "rust"], + "modules": [] +} +``` + +## Running Rust Benchmarks + +Use the standard SeBS command-line interface: + +```bash +# Deploy and run a Rust benchmark +./sebs.py benchmark invoke 010.sleep --language rust --language-version 1.80 --deployment aws + +# Use container deployment (recommended for consistent builds) +./sebs.py benchmark invoke 010.sleep --language rust --language-version 1.80 --deployment aws --container-deployment +``` + +## Implementation Details + +### Dockerfiles + +Two Dockerfiles are provided for Rust: + +1. **Dockerfile.build**: Used for building the function code with dependencies + - Installs Rust toolchain + - Configures cross-compilation targets + - Runs the build process + +2. **Dockerfile.function**: Used for container-based Lambda deployment + - Contains the compiled binary + - Minimal runtime environment + +### Build Script + +The `rust_installer.sh` script handles: +- Target architecture selection +- Rust target installation +- Cargo build execution +- Binary extraction and naming + +### AWS Integration + +The AWS deployment module (`sebs/aws/aws.py`) has been updated to: +- Recognize Rust as a language option +- Map Rust to the `provided.al2023` runtime +- Use `bootstrap` as the Lambda handler +- Package Rust binaries correctly + +## Example: Sleep Benchmark + +A complete example is available at: +`benchmarks/000.microbenchmarks/010.sleep/rust/` + +This benchmark demonstrates: +- Basic Lambda Runtime usage +- Cold start detection +- Request/response handling +- Timing measurements + +## Performance Considerations + +Rust provides several advantages for Lambda functions: + +1. **Fast Execution**: Compiled, optimized native code +2. **Low Memory Usage**: No runtime overhead +3. **Fast Cold Starts**: Smaller binary size compared to some runtimes +4. **Predictable Performance**: No garbage collection pauses + +## Troubleshooting + +### Binary Size Issues + +If your binary is too large for direct ZIP upload (>50MB): +- The framework will automatically use S3 upload +- Consider using container deployment for large binaries + +### Architecture Mismatch + +Ensure you're building for the correct architecture: +- Use `--architecture x64` or `--architecture arm64` flag +- The build system will automatically select the correct Rust target + +### Dependencies Not Building + +For dependencies with native code: +- Ensure they support Linux targets +- Consider using container deployment for consistent builds + +## Additional Resources + +- [AWS Lambda Rust Support Announcement](https://aws.amazon.com/about-aws/whats-new/2025/11/aws-lambda-rust/) +- [AWS Lambda Rust Runtime Documentation](https://docs.aws.amazon.com/lambda/latest/dg/lambda-rust.html) +- [Rust Lambda Runtime Crate](https://github.com/awslabs/aws-lambda-rust-runtime) + +## Contributing + +When adding new Rust benchmarks: + +1. Follow the project structure outlined above +2. Include appropriate error handling +3. Document any special dependencies or requirements +4. Test on both x64 and ARM64 architectures if possible +5. Update this documentation if you encounter issues or have suggestions diff --git a/experiments.json b/experiments.json new file mode 100644 index 000000000..53eb51946 --- /dev/null +++ b/experiments.json @@ -0,0 +1,120 @@ +{ + "_invocations": { + "sebs_bf659d4e_110_dynamic_html_rust_1_87_x64_docker": { + "unknown": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": { + "begin": 1767609674.294732, + "end": 1767609674.2947633, + "is_cold": false, + "request_id": "unknown", + "result": "\n\n\n \n Randomly generated data.\n \n \n \n \n \n
    \n

    Welcome testname!

    \n

    Data generated at: 2026-01-05 10:41:14.294 UTC!

    \n

    Requested random numbers:

    \n
      \n \n
    • 903514
    • \n \n
    • 445253
    • \n \n
    • 723144
    • \n \n
    • 167336
    • \n \n
    • 998709
    • \n \n
    • 523251
    • \n \n
    • 807246
    • \n \n
    • 350991
    • \n \n
    • 429441
    • \n \n
    • 321223
    • \n \n
    \n
    \n \n\n" + }, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "unknown", + "stats": { + "cold_start": false, + "failure": false, + "memory_used": null + }, + "times": { + "benchmark": 31, + "client": 132183, + "client_begin": "2026-01-05 11:41:14.159588", + "client_end": "2026-01-05 11:41:14.291771", + "http_first_byte_return": 0.132009, + "http_startup": 0.072621, + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767609665.552273, + "config": { + "deployment": { + "credentials": { + "account_id": "119764645837" + }, + "name": "aws", + "region": "eu-north-1", + "resources": { + "benchmarks": "sebs-benchmarks-bf659d4e", + "container_repository": "sebs-benchmarks-bf659d4e", + "docker": { + "registry": "119764645837.dkr.ecr.eu-north-1.amazonaws.com/sebs-benchmarks-bf659d4e", + "username": "AWS" + }, + "http-apis": { + "sebs_bf659d4e_110_dynamic_html_rust_1_87_x64_docker-http-api": { + "arn": "arn:aws:execute-api:eu-north-1:119764645837:m4mj1j425a", + "endpoint": "https://m4mj1j425a.execute-api.eu-north-1.amazonaws.com" + } + }, + "lambda-role": "arn:aws:iam::119764645837:role/sebs-lambda-role", + "resources_id": "bf659d4e" + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": true, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "rust", + "version": "1.87" + }, + "update_code": true, + "update_storage": false + } + }, + "end_time": 1767609674.294158, + "result_bucket": null +} \ No newline at end of file diff --git a/requirements.visualization.txt b/requirements.visualization.txt new file mode 100644 index 000000000..37389e6d3 --- /dev/null +++ b/requirements.visualization.txt @@ -0,0 +1,7 @@ +# Additional requirements for benchmark visualization tools + +matplotlib>=3.5.0 +seaborn>=0.12.0 +pandas>=1.5.0 +numpy>=1.23.0 + diff --git a/scripts/run_experiments.py b/scripts/run_experiments.py index c18b96c0a..011d2f3ee 100755 --- a/scripts/run_experiments.py +++ b/scripts/run_experiments.py @@ -410,7 +410,7 @@ def clean(self): objects = self.connection.list_objects_v2(bucket) objects = [obj.object_name for obj in objects] for err in self.connection.remove_objects(bucket, objects): - logging.error("Deletion Error: {}".format(del_err)) + logging.error("Deletion Error: {}".format(err)) def download_results(self, result_dir): result_dir = os.path.join(result_dir, 'storage_output') diff --git a/sebs.py b/sebs.py index 80fb11ed3..c97cfbb1b 100755 --- a/sebs.py +++ b/sebs.py @@ -64,7 +64,7 @@ def simplified_common_params(func): @click.option( "--language", default=None, - type=click.Choice(["python", "nodejs"]), + type=click.Choice(["python", "nodejs", "java", "rust", "pypy"]), help="Benchmark language", ) @click.option("--language-version", default=None, type=str, help="Benchmark language version") diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 243a6f0f9..c1c2d3430 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -137,6 +137,8 @@ def package_code( CONFIG_FILES = { "python": ["handler.py", "requirements.txt", ".python_packages"], "nodejs": ["handler.js", "package.json", "node_modules"], + "rust": ["bootstrap", "Cargo.toml", "Cargo.lock", "target"], + "java": ["function.jar"], } package_config = CONFIG_FILES[language_name] function_dir = os.path.join(directory, "function") @@ -174,6 +176,9 @@ def _map_language_runtime(self, language: str, runtime: str): # For example, it's 12.x instead of 12. if language == "nodejs": return f"{runtime}.x" + # Rust uses provided.al2023 runtime (custom runtime) + elif language == "rust": + return "provided.al2023" return runtime def create_function( @@ -251,10 +256,15 @@ def create_function( "S3Key": code_prefix, } - create_function_params["Runtime"] = "{}{}".format( - language, self._map_language_runtime(language, language_runtime) - ) - create_function_params["Handler"] = "handler.handler" + # Rust uses custom runtime with different handler + if language == "rust": + create_function_params["Runtime"] = self._map_language_runtime(language, language_runtime) + create_function_params["Handler"] = "bootstrap" + else: + create_function_params["Runtime"] = "{}{}".format( + language, self._map_language_runtime(language, language_runtime) + ) + create_function_params["Handler"] = "handler.handler" create_function_params = { k: v for k, v in create_function_params.items() if v is not None @@ -641,4 +651,4 @@ def wait_function_updated(self, func: LambdaFunction): self.logging.info("Lambda function has been updated.") def disable_rich_output(self): - self.ecr_client.disable_rich_output = True + self.ecr_client.disable_rich_output = True \ No newline at end of file diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index d848d724a..859227179 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -33,12 +33,23 @@ class Azure(System): _config: AzureConfig # runtime mapping - AZURE_RUNTIMES = {"python": "python", "nodejs": "node"} + AZURE_RUNTIMES = {"python": "python", "nodejs": "node", "java": "java", "pypy": "custom"} @staticmethod def name(): return "azure" + @staticmethod + def _normalize_runtime_version(language: str, version: str) -> str: + """ + Azure Functions Java expects versions with a minor component + (e.g. 17.0 instead of 17). Other languages can keep the version + as-is. + """ + if language == "java" and re.match(r"^\d+$", str(version)): + return f"{version}.0" + return version + @property def config(self) -> AzureConfig: return self._config @@ -133,36 +144,90 @@ def package_code( # In previous step we ran a Docker container which installed packages # Python packages are in .python_packages because this is expected by Azure - EXEC_FILES = {"python": "handler.py", "nodejs": "handler.js"} + EXEC_FILES = {"python": "handler.py", "nodejs": "handler.js", "java": "../function.jar", "pypy": "handler.py"} CONFIG_FILES = { "python": ["requirements.txt", ".python_packages"], "nodejs": ["package.json", "node_modules"], + "java": ["function.jar"], + # Keep .python_packages at the root so custom handler can import deps. + "pypy": ["requirements.txt", ".python_packages", "pypy"], } package_config = CONFIG_FILES[language_name] handler_dir = os.path.join(directory, "handler") os.makedirs(handler_dir) + + # For Java, create lib directory for JARs and exclude build artifacts + if language_name == "java": + lib_dir = os.path.join(directory, "lib") + os.makedirs(lib_dir, exist_ok=True) + # Move function.jar to lib directory + if os.path.exists(os.path.join(directory, "function.jar")): + shutil.move(os.path.join(directory, "function.jar"), os.path.join(lib_dir, "function.jar")) + # For Java, we want to keep lib and exclude source files/build artifacts + package_config = ["lib", "src", "pom.xml", "target", ".mvn", "mvnw", "mvnw.cmd"] + # move all files to 'handler' except package config + # For pypy custom handlers, handler.py must stay at root level + files_to_exclude = package_config.copy() + if language_name == "pypy": + files_to_exclude.append(EXEC_FILES[language_name]) for f in os.listdir(directory): - if f not in package_config: + if f not in files_to_exclude: source_file = os.path.join(directory, f) shutil.move(source_file, handler_dir) + + # For Java, clean up build artifacts that we don't want to deploy + if language_name == "java": + for artifact in ["src", "pom.xml", "target", ".mvn", "mvnw", "mvnw.cmd"]: + artifact_path = os.path.join(directory, artifact) + if os.path.exists(artifact_path): + if os.path.isdir(artifact_path): + shutil.rmtree(artifact_path) + else: + os.remove(artifact_path) # generate function.json # TODO: extension to other triggers than HTTP - default_function_json = { - "scriptFile": EXEC_FILES[language_name], - "bindings": [ - { - "authLevel": "anonymous", - "type": "httpTrigger", - "direction": "in", - "name": "req", - "methods": ["get", "post"], - }, - {"type": "http", "direction": "out", "name": "$return"}, - ], - } + if language_name == "java": + # Java Azure Functions - For annotation-based functions, function.json + # should include scriptFile and entryPoint + # The @FunctionName annotation determines the function name + default_function_json = { + "scriptFile": "../lib/function.jar", + "entryPoint": "org.serverlessbench.Handler.handleRequest", + "bindings": [ + { + "type": "httpTrigger", + "direction": "in", + "name": "req", + "methods": ["get", "post"], + "authLevel": "anonymous" + }, + { + "type": "http", + "direction": "out", + "name": "$return" + } + ] + } + else: + default_function_json = { + "bindings": [ + { + "authLevel": "anonymous", + "type": "httpTrigger", + "direction": "in", + "name": "req", + "methods": ["get", "post"], + }, + {"type": "http", "direction": "out", "name": "$return"}, + ], + } + # PyPy uses custom handler, no scriptFile needed + if language_name != "pypy": + default_function_json["scriptFile"] = EXEC_FILES[language_name] + json_out = os.path.join(directory, "handler", "function.json") json.dump(default_function_json, open(json_out, "w"), indent=2) @@ -174,6 +239,14 @@ def package_code( "version": "[4.0.0, 5.0.0)", }, } + if language_name == "pypy": + default_host_json["customHandler"] = { + "description": { + "defaultExecutablePath": "pypy/bin/pypy", + "arguments": ["handler.py"], + }, + "enableForwardingHttpRequest": True, + } json.dump(default_host_json, open(os.path.join(directory, "host.json"), "w"), indent=2) code_size = Benchmark.directory_size(directory) @@ -418,7 +491,13 @@ def create_function( raise NotImplementedError("Container deployment is not supported in Azure") language = code_package.language_name - language_runtime = code_package.language_version + language_runtime = self._normalize_runtime_version( + language, code_package.language_version + ) + # ensure string form is passed to Azure CLI + language_runtime = str(language_runtime) + if language == "java" and "." not in language_runtime: + language_runtime = f"{language_runtime}.0" resource_group = self.config.resources.resource_group(self.cli_instance) region = self.config.region function_cfg = FunctionConfig.from_benchmark(code_package) @@ -457,11 +536,16 @@ def create_function( while True: try: # create function app + # Custom runtime doesn't support --runtime-version parameter + runtime_version_param = "" + if config["runtime"] != "custom": + runtime_version_param = " --runtime-version {runtime_version} " + self.cli_instance.execute( ( " az functionapp create --resource-group {resource_group} " " --os-type Linux --consumption-plan-location {region} " - " --runtime {runtime} --runtime-version {runtime_version} " + " --runtime {runtime}" + runtime_version_param + " --name {func_name} --storage-account {storage_account}" " --functions-version 4 " ).format(**config) diff --git a/sebs/benchmark.py b/sebs/benchmark.py index f159e820c..7d82f34c9 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -252,23 +252,58 @@ def hash_directory(directory: str, deployment: str, language: str): FILES = { "python": ["*.py", "requirements.txt*"], "nodejs": ["*.js", "package.json"], + "rust": ["*.rs", "Cargo.toml", "Cargo.lock"], + "java": ["src", "pom.xml"], + "pypy": ["*.py", "requirements.txt*"], + } + WRAPPERS = { + "python": ["*.py"], + "nodejs": ["*.js"], + "rust": None, + "java": ["src", "pom.xml"], + "pypy": ["*.py"], } - WRAPPERS = {"python": "*.py", "nodejs": "*.js"} NON_LANG_FILES = ["*.sh", "*.json"] selected_files = FILES[language] + NON_LANG_FILES for file_type in selected_files: for f in glob.glob(os.path.join(directory, file_type)): path = os.path.join(directory, f) - with open(path, "rb") as opened_file: - hash_sum.update(opened_file.read()) - # wrappers - wrappers = project_absolute_path( - "benchmarks", "wrappers", deployment, language, WRAPPERS[language] - ) - for f in glob.glob(wrappers): - path = os.path.join(directory, f) - with open(path, "rb") as opened_file: - hash_sum.update(opened_file.read()) + if os.path.isdir(path): + for root, _, files in os.walk(path): + for file in sorted(files): + file_path = os.path.join(root, file) + with open(file_path, "rb") as opened_file: + hash_sum.update(opened_file.read()) + else: + with open(path, "rb") as opened_file: + hash_sum.update(opened_file.read()) + # For rust, also hash the src directory recursively + if language == "rust": + src_dir = os.path.join(directory, "src") + if os.path.exists(src_dir): + for root, dirs, files in os.walk(src_dir): + for file in sorted(files): + if file.endswith('.rs'): + path = os.path.join(root, file) + with open(path, "rb") as opened_file: + hash_sum.update(opened_file.read()) + # wrappers (Rust doesn't use wrapper files) + if WRAPPERS[language] is not None: + wrapper_patterns = WRAPPERS[language] if isinstance(WRAPPERS[language], list) else [WRAPPERS[language]] + for pattern in wrapper_patterns: + wrappers = project_absolute_path( + "benchmarks", "wrappers", deployment, language, pattern + ) + for f in glob.glob(wrappers): + if os.path.isdir(f): + for root, _, files in os.walk(f): + for file in files: + path = os.path.join(root, file) + with open(path, "rb") as opened_file: + hash_sum.update(opened_file.read()) + else: + with open(f, "rb") as opened_file: + hash_sum.update(opened_file.read()) return hash_sum.hexdigest() def serialize(self) -> dict: @@ -316,11 +351,31 @@ def copy_code(self, output_dir): FILES = { "python": ["*.py", "requirements.txt*"], "nodejs": ["*.js", "package.json"], + "rust": ["Cargo.toml", "Cargo.lock"], + "java": [], + "pypy": ["*.py", "requirements.txt*"], } path = os.path.join(self.benchmark_path, self.language_name) + if self.language_name == "java": + shutil.copytree(path, output_dir, dirs_exist_ok=True) + return + self.logging.info(f"copy_code: Looking for files in {path} for language {self.language_name}") for file_type in FILES[self.language_name]: - for f in glob.glob(os.path.join(path, file_type)): - shutil.copy2(os.path.join(path, f), output_dir) + matches = glob.glob(os.path.join(path, file_type)) + self.logging.info(f"copy_code: Pattern {file_type} matched {len(matches)} files: {matches}") + for f in matches: + self.logging.info(f"copy_code: Copying {f} to {output_dir}") + shutil.copy2(f, output_dir) + + # For Rust, copy the entire src directory + if self.language_name == "rust": + src_path = os.path.join(path, "src") + if os.path.exists(src_path): + dest_src = os.path.join(output_dir, "src") + if os.path.exists(dest_src): + shutil.rmtree(dest_src) + shutil.copytree(src_path, dest_src) + # support node.js benchmarks with language specific packages nodejs_package_json = os.path.join(path, f"package.json.{self.language_version}") if os.path.exists(nodejs_package_json): @@ -345,6 +400,106 @@ def add_benchmark_data(self, output_dir): stderr=subprocess.STDOUT, ) + def _merge_rust_cargo_toml(self, wrapper_cargo_path: str, benchmark_cargo_path: str, output_dir: str): + """ + Merge benchmark Cargo.toml dependencies into wrapper Cargo.toml. + The wrapper Cargo.toml is the base, and benchmark dependencies are added/merged. + Uses simple string-based approach to extract and merge [dependencies] sections. + """ + import re + + # Ensure output_dir is absolute for consistent path handling + output_dir = os.path.abspath(output_dir) + + with open(wrapper_cargo_path, 'r') as f: + wrapper_content = f.read() + + with open(benchmark_cargo_path, 'r') as f: + benchmark_content = f.read() + + # Extract dependencies from benchmark Cargo.toml + deps_match = re.search(r'\[dependencies\](.*?)(?=\n\[|\Z)', benchmark_content, re.DOTALL) + if not deps_match: + # No dependencies in benchmark, just copy wrapper + output_cargo = os.path.join(output_dir, "Cargo.toml") + with open(output_cargo, 'w') as f: + f.write(wrapper_content) + return + + benchmark_deps_lines = deps_match.group(1).strip().split('\n') + + # Extract existing dependency names from wrapper to avoid duplicates + wrapper_deps_match = re.search(r'\[dependencies\](.*?)(?=\n\[|\Z)', wrapper_content, re.DOTALL) + existing_deps = set() + if wrapper_deps_match: + for line in wrapper_deps_match.group(1).split('\n'): + line = line.strip() + if line and not line.startswith('#'): + # Extract dependency name (before = or {) + dep_name = re.split(r'[=\s{]+', line)[0].strip() + if dep_name: + existing_deps.add(dep_name) + + # Add benchmark dependencies that aren't already in wrapper + new_deps = [] + for line in benchmark_deps_lines: + line = line.strip() + if line and not line.startswith('#'): + dep_name = re.split(r'[=\s{]+', line)[0].strip() + if dep_name and dep_name not in existing_deps: + new_deps.append(line) + existing_deps.add(dep_name) + + # Merge dependencies into wrapper content + if new_deps: + if wrapper_deps_match: + # Insert new dependencies before the end of [dependencies] section + deps_section_start = wrapper_deps_match.start() + deps_section_end = wrapper_deps_match.end() + deps_content = wrapper_deps_match.group(1) + + # Build merged dependencies section + merged_deps = deps_content.rstrip() + for dep_line in new_deps: + merged_deps += '\n' + dep_line + merged_deps += '\n' + + # Reconstruct wrapper content with merged dependencies + merged_content = ( + wrapper_content[:deps_section_start] + + '[dependencies]' + merged_deps + + wrapper_content[deps_section_end:] + ) + else: + # Add [dependencies] section if it doesn't exist + if not wrapper_content.endswith('\n'): + wrapper_content += '\n' + merged_content = wrapper_content + '\n[dependencies]\n' + for dep_line in new_deps: + merged_content += dep_line + '\n' + else: + merged_content = wrapper_content + + # Write merged Cargo.toml (output_dir is already absolute) + output_cargo = os.path.join(output_dir, "Cargo.toml") + # Ensure directory exists + os.makedirs(output_dir, exist_ok=True) + with open(output_cargo, 'w') as f: + f.write(merged_content) + f.flush() + os.fsync(f.fileno()) # Force write to disk + # Verify it was written (with a small delay for filesystem sync) + import time + time.sleep(0.01) # Small delay for filesystem to sync + if not os.path.exists(output_cargo): + # Try to get more info about what went wrong + parent_dir = os.path.dirname(output_cargo) + raise RuntimeError( + f"Failed to write merged Cargo.toml to {output_cargo}. " + f"Parent directory exists: {os.path.exists(parent_dir)}, " + f"Parent directory contents: {os.listdir(parent_dir) if os.path.exists(parent_dir) else 'N/A'}" + ) + def add_deployment_files(self, output_dir): handlers_dir = project_absolute_path( "benchmarks", "wrappers", self._deployment_name, self.language_name @@ -355,8 +510,59 @@ def add_deployment_files(self, output_dir): self._deployment_name, self.language_name ) ] + + # Copy wrapper files first (except Cargo.toml for Rust, which we'll merge) for file in handlers: - shutil.copy2(file, os.path.join(output_dir)) + destination = os.path.join(output_dir, os.path.basename(file)) + if os.path.basename(file) == "Cargo.toml" and self.language_name == "rust": + # Skip copying wrapper Cargo.toml directly - we'll merge it instead + continue + if os.path.isdir(file): + shutil.copytree(file, destination, dirs_exist_ok=True) + else: + if not os.path.exists(destination): + shutil.copy2(file, destination) + + # For Rust, merge Cargo.toml files after copying other wrapper files + if self.language_name == "rust": + # Ensure output_dir is absolute for consistent path handling + output_dir_abs = os.path.abspath(output_dir) + wrapper_cargo = os.path.join(handlers_dir, "Cargo.toml") + benchmark_cargo = os.path.join(output_dir_abs, "Cargo.toml") + self.logging.info(f"Rust Cargo.toml merge: wrapper={wrapper_cargo} (exists: {os.path.exists(wrapper_cargo)}), benchmark={benchmark_cargo} (exists: {os.path.exists(benchmark_cargo)})") + if os.path.exists(wrapper_cargo) and os.path.exists(benchmark_cargo): + # Merge dependencies from benchmark Cargo.toml into wrapper Cargo.toml + self.logging.info("Merging Rust Cargo.toml files") + # The merge function reads benchmark_cargo and writes merged content to output_dir/Cargo.toml + # Since benchmark_cargo IS output_dir/Cargo.toml, the merge overwrites it + # So we don't need to remove benchmark_cargo - it's already been overwritten with merged content + self._merge_rust_cargo_toml(wrapper_cargo, benchmark_cargo, output_dir_abs) + merged_path = os.path.join(output_dir_abs, "Cargo.toml") + # The merge function should have raised an error if it failed, but verify anyway + if not os.path.exists(merged_path): + # List directory contents for debugging + dir_contents = os.listdir(output_dir_abs) if os.path.exists(output_dir_abs) else [] + raise RuntimeError( + f"Merged Cargo.toml was not created at {merged_path}. " + f"Directory contents: {dir_contents}" + ) + self.logging.info(f"Merged Cargo.toml successfully written to {merged_path}") + elif os.path.exists(wrapper_cargo): + # Only wrapper Cargo.toml exists, just copy it + wrapper_dest = os.path.join(output_dir_abs, "Cargo.toml") + self.logging.info(f"Only wrapper Cargo.toml exists, copying to {wrapper_dest}") + shutil.copy2(wrapper_cargo, wrapper_dest) + elif os.path.exists(benchmark_cargo): + # Only benchmark Cargo.toml exists, copy it (shouldn't happen normally) + benchmark_dest = os.path.join(output_dir_abs, "Cargo.toml") + self.logging.warning(f"Only benchmark Cargo.toml exists, copying to {benchmark_dest}") + # Keep it as-is since wrapper should always exist + else: + self.logging.error(f"Neither wrapper nor benchmark Cargo.toml found! Wrapper: {wrapper_cargo}, Benchmark: {benchmark_cargo}") + raise RuntimeError( + f"Cargo.toml not found: wrapper at {wrapper_cargo} or benchmark at {benchmark_cargo}. " + "Both should exist for Rust builds." + ) def add_deployment_package_python(self, output_dir): @@ -372,6 +578,8 @@ def add_deployment_package_python(self, output_dir): ) for package in packages: out.write(package) + if not package.endswith('\n'): + out.write('\n') module_packages = self._system_config.deployment_module_packages( self._deployment_name, self.language_name @@ -380,6 +588,8 @@ def add_deployment_package_python(self, output_dir): if bench_module.value in module_packages: for package in module_packages[bench_module.value]: out.write(package) + if not package.endswith('\n'): + out.write('\n') def add_deployment_package_nodejs(self, output_dir): # modify package.json @@ -402,10 +612,16 @@ def add_deployment_package_nodejs(self, output_dir): def add_deployment_package(self, output_dir): from sebs.faas.function import Language - if self.language == Language.PYTHON: + if self.language == Language.PYTHON or self.language == Language.PYPY: self.add_deployment_package_python(output_dir) elif self.language == Language.NODEJS: self.add_deployment_package_nodejs(output_dir) + elif self.language == Language.RUST: + # Rust dependencies are managed by Cargo, no additional packages needed + pass + elif self.language == Language.JAVA: + # Java dependencies are handled by Maven in the wrapper + return else: raise NotImplementedError @@ -419,15 +635,26 @@ def directory_size(directory: str): def install_dependencies(self, output_dir): # do we have docker image for this run and language? - if "build" not in self._system_config.docker_image_types( + image_types = self._system_config.docker_image_types( self._deployment_name, self.language_name - ): + ) + self.logging.info( + f"Docker image types for {self._deployment_name}/{self.language_name}: {image_types}" + ) + if "build" not in image_types: self.logging.info( ( "There is no Docker build image for {deployment} run in {language}, " "thus skipping the Docker-based installation of dependencies." ).format(deployment=self._deployment_name, language=self.language_name) ) + # For Rust, this is a fatal error - we need the build image + if self.language_name == "rust": + raise RuntimeError( + f"Docker build image is required for Rust but not configured for " + f"{self._deployment_name}/{self.language_name}. " + "Please ensure 'build' is in the 'images' list in config/systems.json" + ) else: repo_name = self._system_config.docker_repository() unversioned_image_name = "build.{deployment}.{language}.{runtime}".format( @@ -481,11 +708,30 @@ def ensure_image(name: str) -> None: "bind": "/mnt/function/package.sh", "mode": "ro", } + + # Mount updated java_installer.sh if language is java + if self.language_name == "java": + installer_path = os.path.abspath("dockerfiles/java_installer.sh") + if os.path.exists(installer_path): + volumes[installer_path] = { + "bind": "/sebs/installer.sh", + "mode": "ro", + } # run Docker container to install packages - PACKAGE_FILES = {"python": "requirements.txt", "nodejs": "package.json"} + PACKAGE_FILES = {"python": "requirements.txt", "nodejs": "package.json", "rust": "Cargo.toml", "java": "pom.xml", "pypy": "requirements.txt"} file = os.path.join(output_dir, PACKAGE_FILES[self.language_name]) + + # For Java, check recursively if pom.xml exists + if self.language_name == "java" and not os.path.exists(file): + for root, _, files in os.walk(output_dir): + if "pom.xml" in files: + file = os.path.join(root, "pom.xml") + break + + self.logging.info(f"Checking for package file: {file} (exists: {os.path.exists(file)})") if os.path.exists(file): + self.logging.info(f"Found package file {file}, proceeding with Docker build") try: self.logging.info( "Docker build of benchmark dependencies in container " @@ -565,14 +811,51 @@ def ensure_image(name: str) -> None: # Pass to output information on optimizing builds. # Useful for AWS where packages have to obey size limits. - for line in stdout.decode("utf-8").split("\n"): - if "size" in line: + build_output = "" + if isinstance(stdout, bytes): + build_output = stdout.decode("utf-8") + elif isinstance(stdout, tuple): + # exec_run returns (exit_code, output) + exit_code, output = stdout + build_output = output.decode("utf-8") if isinstance(output, bytes) else str(output) + if exit_code != 0: + self.logging.error(f"Docker build exited with code {exit_code}") + else: + build_output = str(stdout) + + for line in build_output.split("\n"): + if "size" in line or "error" in line.lower() or "Error" in line or "failed" in line.lower(): self.logging.info("Docker build: {}".format(line)) + + # For Rust, check if bootstrap binary was created + if self.language_name == "rust": + bootstrap_path = os.path.join(output_dir, "bootstrap") + if not os.path.exists(bootstrap_path): + self.logging.error("Rust build failed: bootstrap binary not found!") + self.logging.error("Build output:\n{}".format(build_output[-2000:])) # Last 2000 chars + raise RuntimeError("Rust build failed: bootstrap binary not created") except docker.errors.ContainerError as e: self.logging.error("Package build failed!") self.logging.error(e) self.logging.error(f"Docker mount volumes: {volumes}") + # For Rust, also check bootstrap even if ContainerError occurred + if self.language_name == "rust": + bootstrap_path = os.path.join(output_dir, "bootstrap") + if not os.path.exists(bootstrap_path): + self.logging.error("Rust bootstrap binary not found after Docker build failure!") raise e + else: + # Package file doesn't exist + error_msg = f"Package file {file} not found in {output_dir}" + self.logging.error(error_msg) + if self.language_name == "rust": + # List files in output_dir for debugging + files_in_dir = os.listdir(output_dir) if os.path.exists(output_dir) else [] + self.logging.error(f"Files in output_dir: {files_in_dir}") + raise RuntimeError( + f"{error_msg}. For Rust, Cargo.toml must exist after merging wrapper and benchmark files. " + "Check that Cargo.toml merge completed successfully." + ) def recalculate_code_size(self): self._code_size = Benchmark.directory_size(self._output_dir) @@ -613,7 +896,25 @@ def build( self.add_benchmark_data(self._output_dir) self.add_deployment_files(self._output_dir) self.add_deployment_package(self._output_dir) + + # For Rust, remove any existing Cargo.lock to ensure it's regenerated with correct constraints + if self.language_name == "rust": + cargo_lock = os.path.join(self._output_dir, "Cargo.lock") + if os.path.exists(cargo_lock): + self.logging.info(f"Removing existing Cargo.lock at {cargo_lock} to ensure regeneration with correct dependency versions") + os.remove(cargo_lock) + self.install_dependencies(self._output_dir) + + # For Rust, verify bootstrap binary exists after dependency installation + if self.language_name == "rust": + bootstrap_path = os.path.join(self._output_dir, "bootstrap") + if not os.path.exists(bootstrap_path): + self.logging.error(f"Rust bootstrap binary not found at {bootstrap_path} after install_dependencies!") + raise RuntimeError( + f"Rust build failed: bootstrap binary not created at {bootstrap_path}. " + "Check Docker build logs above for compilation errors." + ) self._code_location, self._code_size, self._container_uri = deployment_build_step( os.path.abspath(self._output_dir), diff --git a/sebs/experiments/perf_cost.py b/sebs/experiments/perf_cost.py index 7b940f8df..998d4db45 100644 --- a/sebs/experiments/perf_cost.py +++ b/sebs/experiments/perf_cost.py @@ -342,10 +342,12 @@ def process( for func in experiments.functions(): for id, invoc in experiments.invocations(func).items(): # FIXME: compatibility with old results - if "output" in invoc.output["result"]: - del invoc.output["result"]["output"] - elif "result" in invoc.output["result"]: - del invoc.output["result"]["result"] + # Only process if result is a dict (some languages return primitives directly) + if isinstance(invoc.output["result"], dict): + if "output" in invoc.output["result"]: + del invoc.output["result"]["output"] + elif "result" in invoc.output["result"]: + del invoc.output["result"]["result"] name, extension = os.path.splitext(f) with open( diff --git a/sebs/faas/container.py b/sebs/faas/container.py index b17525f7b..bb87646ad 100644 --- a/sebs/faas/container.py +++ b/sebs/faas/container.py @@ -169,6 +169,15 @@ def build_base_image( build_dir = os.path.join(directory, "build") os.makedirs(build_dir, exist_ok=True) + # For Rust, ensure bootstrap binary exists before building container image + if language_name == "rust": + bootstrap_path = os.path.join(directory, "bootstrap") + if not os.path.exists(bootstrap_path): + raise RuntimeError( + f"Rust bootstrap binary not found at {bootstrap_path}. " + "The Rust build must complete successfully before container image creation." + ) + shutil.copy( os.path.join(DOCKER_DIR, self.name(), language_name, "Dockerfile.function"), os.path.join(build_dir, "Dockerfile"), @@ -201,8 +210,9 @@ def build_base_image( "BASE_IMAGE": builder_image, "TARGET_ARCHITECTURE": architecture, } + docker_platform = "linux/arm64" if architecture == "arm64" else "linux/amd64" image, _ = self.docker_client.images.build( - tag=image_uri, path=build_dir, buildargs=buildargs + tag=image_uri, path=build_dir, buildargs=buildargs, platform=docker_platform ) self.logging.info( diff --git a/sebs/faas/function.py b/sebs/faas/function.py index 0fab7bcf4..fa46df621 100644 --- a/sebs/faas/function.py +++ b/sebs/faas/function.py @@ -263,6 +263,9 @@ def deserialize(cached_config: dict) -> "Trigger": class Language(Enum): PYTHON = "python" NODEJS = "nodejs" + RUST = "rust" + JAVA = "java" + PYPY = "pypy" # FIXME: 3.7+ python with future annotations @staticmethod @@ -270,7 +273,7 @@ def deserialize(val: str) -> Language: for member in Language: if member.value == val: return member - raise Exception(f"Unknown language type {member}") + raise Exception(f"Unknown language type {val}") class Architecture(Enum): @@ -299,7 +302,7 @@ def serialize(self) -> dict: @staticmethod def deserialize(config: dict) -> Runtime: - languages = {"python": Language.PYTHON, "nodejs": Language.NODEJS} + languages = {"python": Language.PYTHON, "nodejs": Language.NODEJS, "rust": Language.RUST, "java": Language.JAVA, "pypy": Language.PYPY} return Runtime(language=languages[config["language"]], version=config["version"]) diff --git a/sebs/gcp/config.py b/sebs/gcp/config.py index 56d3b5c41..0ae550be3 100644 --- a/sebs/gcp/config.py +++ b/sebs/gcp/config.py @@ -1,6 +1,8 @@ import json import os from typing import cast, List, Optional, Tuple +import time +from googleapiclient.errors import HttpError from sebs.cache import Cache from sebs.faas.config import Config, Credentials, Resources @@ -108,6 +110,7 @@ def update_cache(self, cache: Cache): class GCPResources(Resources): def __init__(self): super().__init__(name="gcp") + self._container_repository = None @staticmethod def initialize(res: Resources, dct: dict): @@ -120,7 +123,9 @@ def initialize(res: Resources, dct: dict): """ def serialize(self) -> dict: - return super().serialize() + out = super().serialize() + out["container_repository"] = self._container_repository + return out @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Resources": @@ -147,6 +152,61 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Resou def update_cache(self, cache: Cache): super().update_cache(cache) + @property + def container_repository(self) -> str: + return self._container_repository + + def check_container_repository_exists(self, config: Config, ar_client): + try: + parent = f"projects/{config.credentials.project_name}/locations/{config.region}" + repo_full_name = f"{parent}/repositories/{self._container_repository}" + self.logging.info("Checking if container repository exists...") + ar_client.projects().locations().repositories().get(name=repo_full_name).execute() + return True + except HttpError as e: + if e.resp.status == 404: + self.logging.error("Container repository does not exist.") + return False + else: + raise e + + def create_container_repository(self, ar_client, parent): + request_body = { + "format": "DOCKER", + "description": "Container repository for SEBS" + } + self._container_repository = f"sebs-benchmarks-{self._resources_id}" + operation = ar_client.projects().locations().repositories().create( + parent=parent, + body=request_body, + repositoryId=self._container_repository + ).execute() + + while True: + # Operations for AR are global or location specific + op_name = operation['name'] + op = ar_client.projects().locations().operations().get(name=op_name).execute() + + if op.get('done'): + if 'error' in op: + raise Exception(f"Failed to create repo: {op['error']}") + self.logging.info("Repository created successfully.") + break + time.sleep(2) + + def get_container_repository(self, config: Config, ar_client): + if self._container_repository is not None: + return self._container_repository + + self._container_repository = f"sebs-benchmarks-{self._resources_id}" + if self.check_container_repository_exists(config, ar_client): + return self._container_repository + + parent = f"projects/{config.credentials.project_name}/locations/{config.region}" + self.create_container_repository(ar_client, parent) + return self._container_repository + + """ FaaS system config defining cloud region (if necessary), credentials and diff --git a/sebs/gcp/container.py b/sebs/gcp/container.py new file mode 100644 index 000000000..9afcc7664 --- /dev/null +++ b/sebs/gcp/container.py @@ -0,0 +1,80 @@ +import docker +from typing import Tuple + +from sebs.gcp.config import GCPConfig +from sebs.config import SeBSConfig +from sebs.faas.container import DockerContainer +from googleapiclient.discovery import build +from google.oauth2 import service_account +from googleapiclient.errors import HttpError +from google.auth.transport.requests import Request + + +class GCRContainer(DockerContainer): + @staticmethod + def name(): + return "gcp" + + @staticmethod + def typename() -> str: + return "GCP.GCRContainer" + + def __init__( + self, + system_config: SeBSConfig, + config: GCPConfig, + docker_client: docker.client.DockerClient, + ): + super().__init__(system_config, docker_client) + self.config = config + self.creds = service_account.Credentials.from_service_account_file(self.config.credentials.gcp_credentials, scopes=["https://www.googleapis.com/auth/cloud-platform"]) + self.ar_client = build("artifactregistry", "v1", credentials=self.creds) + + def registry_name( + self, benchmark: str, language_name: str, language_version: str, architecture: str + ) -> Tuple[str, str, str, str]: + + project_id = self.config.credentials.project_name + region = self.config.region + registry_name = f"{region}-docker.pkg.dev/{project_id}" + repository_name = self.config.resources.get_container_repository(self.config, self.ar_client) + + image_tag = self.system_config.benchmark_image_tag( + self.name(), benchmark, language_name, language_version, architecture + ) + image_uri = f"{registry_name}/{repository_name}/{benchmark}:{image_tag}" + + return registry_name, repository_name, image_tag, image_uri + + def find_image(self, repository_name, image_tag) -> bool: + try: + response = self.ar_client.projects().locations().repositories().dockerImages().list( + parent=f"projects/{self.config.credentials.project_id}/locations/{self.config.region}/repositories/{repository_name}" + ) + if "dockerImages" in response: + for image in response["dockerImages"]: + if "latest" in image["tags"] and image_tag in image["tags"]: + return True + except HttpError as e: + if (e.content.code == 404): + return False + raise e + return False + + def push_image(self, repository_uri, image_tag): + self.logging.info("Authenticating Docker against Artifact Registry...") + self.creds.refresh(Request()) + auth_token = self.creds.token + + try: + self.docker_client.login( + username="oauth2accesstoken", + password=auth_token, + registry=repository_uri + ) + super().push_image(repository_uri, image_tag) + self.logging.info(f"Successfully pushed the image to registry {repository_uri}.") + except docker.errors.DockerException as e: + self.logging.error(f"Failed to push the image to registry {repository_uri}.") + self.logging.error(f"Error: {str(e)}") + raise RuntimeError("Couldn't push to registry.") diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 6525034c2..8e722a9c3 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -23,6 +23,7 @@ from sebs.gcp.resources import GCPSystemResources from sebs.gcp.storage import GCPStorage from sebs.gcp.function import GCPFunction +from sebs.gcp.container import GCRContainer from sebs.utils import LoggingHandlers """ @@ -77,10 +78,14 @@ def function_type() -> "Type[Function]": :param config: systems-specific parameters """ - def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None): self.function_client = build("cloudfunctions", "v1", cache_discovery=False) + # Container-based functions are created via run-client + self.run_client = build("run", "v2", cache_discovery=False) self.initialize_resources(select_prefix=resource_prefix) + self.gcr_client = GCRContainer( + self.system_config, self.config, self.docker_client + ) def get_function_client(self): return self.function_client @@ -90,13 +95,14 @@ def default_function_name( ) -> str: # Create function name resource_id = resources.resources_id if resources else self.config.resources.resources_id - func_name = "sebs-{}-{}-{}-{}".format( + func_name = "sebs-{}-{}-{}-{}-{}".format( resource_id, code_package.benchmark, code_package.language_name, code_package.language_version, + code_package.architecture ) - return GCP.format_function_name(func_name) + return GCP.format_function_name(func_name) if not code_package.container_deployment else func_name.replace(".", "-") @staticmethod def format_function_name(func_name: str) -> str: @@ -133,17 +139,22 @@ def package_code( ) -> Tuple[str, int, str]: container_uri = "" - + if container_deployment: - raise NotImplementedError("Container Deployment is not supported in GCP") + # build base image and upload to GCR + _, container_uri = self.gcr_client.build_base_image( + directory, language_name, language_version, architecture, benchmark, is_cached + ) CONFIG_FILES = { "python": ["handler.py", ".python_packages"], "nodejs": ["handler.js", "node_modules"], + "pypy" : ["handler.py", ".python_packages"] } HANDLER = { "python": ("handler.py", "main.py"), "nodejs": ("handler.js", "index.js"), + "pypy": ("handler.py", "main.py"), } package_config = CONFIG_FILES[language_name] function_dir = os.path.join(directory, "function") @@ -154,10 +165,11 @@ def package_code( shutil.move(file, function_dir) # rename handler function.py since in gcp it has to be caled main.py - old_name, new_name = HANDLER[language_name] - old_path = os.path.join(directory, old_name) - new_path = os.path.join(directory, new_name) - shutil.move(old_path, new_path) + if not container_deployment: + old_name, new_name = HANDLER[language_name] + old_path = os.path.join(directory, old_name) + new_path = os.path.join(directory, new_name) + shutil.move(old_path, new_path) """ zip the whole directory (the zip-file gets uploaded to gcp later) @@ -179,7 +191,8 @@ def package_code( logging.info("Zip archive size {:2f} MB".format(mbytes)) # rename the main.py back to handler.py - shutil.move(new_path, old_path) + if not container_deployment: + shutil.move(new_path, old_path) return os.path.join(directory, "{}.zip".format(benchmark)), bytes_size, container_uri @@ -191,8 +204,6 @@ def create_function( container_uri: str, ) -> "GCPFunction": - if container_deployment: - raise NotImplementedError("Container deployment is not supported in GCP") package = code_package.code_location benchmark = code_package.benchmark @@ -206,16 +217,23 @@ def create_function( function_cfg = FunctionConfig.from_benchmark(code_package) architecture = function_cfg.architecture.value - code_package_name = cast(str, os.path.basename(package)) - code_package_name = f"{architecture}-{code_package_name}" - code_bucket = storage_client.get_bucket(Resources.StorageBucketType.DEPLOYMENT) - code_prefix = os.path.join(benchmark, code_package_name) - storage_client.upload(code_bucket, package, code_prefix) + if architecture == "arm64" and not container_deployment: + raise RuntimeError("GCP does not support arm64 for non-container deployments") + - self.logging.info("Uploading function {} code to {}".format(func_name, code_bucket)) + if container_deployment: + full_service_name = GCP.get_full_service_name(project_name, location, func_name) + get_req = self.run_client.projects().locations().services().get(name=full_service_name) + else: + full_func_name = GCP.get_full_function_name(project_name, location, func_name) + code_package_name = cast(str, os.path.basename(package)) + code_package_name = f"{architecture}-{code_package_name}" + code_bucket = storage_client.get_bucket(Resources.StorageBucketType.DEPLOYMENT) + code_prefix = os.path.join(benchmark, code_package_name) + storage_client.upload(code_bucket, package, code_prefix) - full_func_name = GCP.get_full_function_name(project_name, location, func_name) - get_req = self.function_client.projects().locations().functions().get(name=full_func_name) + self.logging.info("Uploading function {} code to {}".format(func_name, code_bucket)) + get_req = self.function_client.projects().locations().functions().get(name=full_func_name) try: get_req.execute() @@ -223,45 +241,95 @@ def create_function( envs = self._generate_function_envs(code_package) - create_req = ( - self.function_client.projects() - .locations() - .functions() - .create( - location="projects/{project_name}/locations/{location}".format( - project_name=project_name, location=location - ), - body={ - "name": full_func_name, - "entryPoint": "handler", - "runtime": code_package.language_name + language_runtime.replace(".", ""), - "availableMemoryMb": memory, - "timeout": str(timeout) + "s", - "httpsTrigger": {}, - "ingressSettings": "ALLOW_ALL", - "sourceArchiveUrl": "gs://" + code_bucket + "/" + code_prefix, - "environmentVariables": envs, - }, + if container_deployment: + # In the service model, envs is a list of objects with attributes name and value + envs = self._transform_service_envs(envs) + self.logging.info("Deploying run container service") + parent = f"projects/{project_name}/locations/{location}" + create_req = ( + self.run_client.projects() + .locations() + .services() + .create( + parent=parent, + serviceId=func_name, + body={ + "template": { + "containers": [ + { + "image": container_uri, + "ports": [{"containerPort": 8080}], + "env": envs, + "resources": { + "limits": { + "memory": f"{memory if memory >= 512 else 512}Mi", + } + } + } + ], + "timeout": f"{timeout}s", + }, + "ingress": "INGRESS_TRAFFIC_ALL" + }, + ) + ) + else: + create_req = ( + self.function_client.projects() + .locations() + .functions() + .create( + location="projects/{project_name}/locations/{location}".format( + project_name=project_name, location=location + ), + body={ + "name": full_func_name, + "entryPoint": "handler", + "runtime": code_package.language_name + language_runtime.replace(".", ""), + "availableMemoryMb": memory, + "timeout": str(timeout) + "s", + "httpsTrigger": {}, + "ingressSettings": "ALLOW_ALL", + "sourceArchiveUrl": "gs://" + code_bucket + "/" + code_prefix, + "environmentVariables": envs, + }, + ) ) - ) create_req.execute() self.logging.info(f"Function {func_name} has been created!") - allow_unauthenticated_req = ( - self.function_client.projects() - .locations() - .functions() - .setIamPolicy( - resource=full_func_name, - body={ - "policy": { - "bindings": [ - {"role": "roles/cloudfunctions.invoker", "members": ["allUsers"]} - ] - } - }, + if container_deployment: + allow_unauthenticated_req = ( + self.run_client.projects() + .locations() + .services() + .setIamPolicy( + resource=full_service_name, + body={ + "policy": { + "bindings": [ + {"role": "roles/run.invoker", "members": ["allUsers"]} + ] + } + }, + ) + ) + else: + allow_unauthenticated_req = ( + self.function_client.projects() + .locations() + .functions() + .setIamPolicy( + resource=full_func_name, + body={ + "policy": { + "bindings": [ + {"role": "roles/cloudfunctions.invoker", "members": ["allUsers"]} + ] + } + }, + ) ) - ) # Avoid infinite loop MAX_RETRIES = 5 @@ -317,25 +385,50 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) location = self.config.region project_name = self.config.project_name - full_func_name = GCP.get_full_function_name(project_name, location, function.name) self.logging.info(f"Function {function.name} - waiting for deployment...") - our_function_req = ( - self.function_client.projects().locations().functions().get(name=full_func_name) - ) - deployed = False - begin = time.time() - while not deployed: - status_res = our_function_req.execute() - if status_res["status"] == "ACTIVE": - deployed = True - else: - time.sleep(3) - if time.time() - begin > 300: # wait 5 minutes; TODO: make it configurable - self.logging.error(f"Failed to deploy function: {function.name}") - raise RuntimeError("Deployment timeout!") - self.logging.info(f"Function {function.name} - deployed!") - invoke_url = status_res["httpsTrigger"]["url"] - + + # Cloud Functions v1 do not have "-" in their name, Cloud Run Services do + if "-" in function.name: + # Cloud Run Service + service_id = function.name.lower() + full_service_name = GCP.get_full_service_name(project_name, self.config.region, service_id) + self.logging.info(f"Waiting for service {full_service_name} to be ready...") + deployed = False + begin = time.time() + while not deployed: + svc = self.run_client.projects().locations().services().get(name=full_service_name).execute() + condition = svc.get("terminalCondition", {}) + if condition.get("type") == "Ready" and condition.get("state") == "CONDITION_SUCCEEDED": + deployed = True + else: + time.sleep(3) + + if time.time() - begin > 300: + self.logging.error(f"Failed to deploy service: {function.name}") + raise RuntimeError("Deployment timeout!") + + self.logging.info(f"Service {function.name} - deployed!") + invoke_url = svc["uri"] + + else: + full_func_name = GCP.get_full_function_name(project_name, location, function.name) + our_function_req = ( + self.function_client.projects().locations().functions().get(name=full_func_name) + ) + deployed = False + begin = time.time() + while not deployed: + status_res = our_function_req.execute() + if status_res["status"] == "ACTIVE": + deployed = True + else: + time.sleep(3) + if time.time() - begin > 300: # wait 5 minutes; TODO: make it configurable + self.logging.error(f"Failed to deploy function: {function.name}") + raise RuntimeError("Deployment timeout!") + self.logging.info(f"Function {function.name} - deployed!") + invoke_url = status_res["httpsTrigger"]["url"] + trigger = HTTPTrigger(invoke_url) else: raise RuntimeError("Not supported!") @@ -363,9 +456,6 @@ def update_function( container_uri: str, ): - if container_deployment: - raise NotImplementedError("Container deployment is not supported in GCP") - function = cast(GCPFunction, function) language_runtime = code_package.language_version @@ -379,60 +469,123 @@ def update_function( storage.upload(bucket, code_package.code_location, code_package_name) envs = self._generate_function_envs(code_package) - - self.logging.info(f"Uploaded new code package to {bucket}/{code_package_name}") - full_func_name = GCP.get_full_function_name( - self.config.project_name, self.config.region, function.name - ) - req = ( - self.function_client.projects() - .locations() - .functions() - .patch( - name=full_func_name, - body={ - "name": full_func_name, - "entryPoint": "handler", - "runtime": code_package.language_name + language_runtime.replace(".", ""), - "availableMemoryMb": function.config.memory, - "timeout": str(function.config.timeout) + "s", - "httpsTrigger": {}, - "sourceArchiveUrl": "gs://" + bucket + "/" + code_package_name, - "environmentVariables": envs, - }, + + if container_deployment: + full_service_name = GCP.get_full_service_name(self.config.project_name, self.config.region, function.name) + + memory = function.config.memory + timeout = function.config.timeout + + # Cloud Run v2 Service Update + service_body = { + "template": { + "containers": [ + { + "image": container_uri, + "resources": { + "limits": { + "memory": f"{memory if memory >= 512 else 512}Mi", + } + }, + "env": [{"name": k, "value": v} for k, v in envs.items()] + } + ], + "timeout": f"{timeout}s" + } + } + + req = self.run_client.projects().locations().services().patch( + name=full_service_name, + body=service_body ) - ) - res = req.execute() - versionId = res["metadata"]["versionId"] - retries = 0 - last_version = -1 - while retries < 100: - is_deployed, last_version = self.is_deployed(function.name, versionId) - if not is_deployed: - time.sleep(5) - retries += 1 - else: - break - if retries > 0 and retries % 10 == 0: - self.logging.info(f"Waiting for function deployment, {retries} retries.") - if retries == 100: - raise RuntimeError( - "Failed to publish new function code after 10 attempts. " - f"Version {versionId} has not been published, last version {last_version}." + + else: + + self.logging.info(f"Uploaded new code package to {bucket}/{code_package_name}") + full_func_name = GCP.get_full_function_name( + self.config.project_name, self.config.region, function.name + ) + req = ( + self.function_client.projects() + .locations() + .functions() + .patch( + name=full_func_name, + body={ + "name": full_func_name, + "entryPoint": "handler", + "runtime": code_package.language_name + language_runtime.replace(".", ""), + "availableMemoryMb": function.config.memory, + "timeout": str(function.config.timeout) + "s", + "httpsTrigger": {}, + "sourceArchiveUrl": "gs://" + bucket + "/" + code_package_name, + "environmentVariables": envs, + }, + ) ) - self.logging.info("Published new function code and configuration.") + + res = req.execute() + + if container_deployment: + self.logging.info(f"Updated Cloud Run service {function.name}, waiting for operation completion...") + + op_name = res["name"] + op_res = self.run_client.projects().locations().operations().wait(name=op_name).execute() + + if "error" in op_res: + raise RuntimeError(f"Cloud Run update failed: {op_res['error']}") + + self.logging.info(f"Cloud Run service {function.name} updated and ready.") + + else: + versionId = res["metadata"]["versionId"] + retries = 0 + last_version = -1 + while retries < 100: + is_deployed, last_version = self.is_deployed(function.name, versionId) + if not is_deployed: + time.sleep(5) + retries += 1 + else: + break + if retries > 0 and retries % 10 == 0: + self.logging.info(f"Waiting for function deployment, {retries} retries.") + if retries == 100: + raise RuntimeError( + "Failed to publish new function code after 10 attempts. " + f"Version {versionId} has not been published, last version {last_version}." + ) + self.logging.info("Published new function code and configuration.") def _update_envs(self, full_function_name: str, envs: dict) -> dict: - get_req = ( - self.function_client.projects().locations().functions().get(name=full_function_name) - ) - response = get_req.execute() + if "/services/" in full_function_name: + # Envs are in template.containers[0].env (list of {name, value}) + get_req = self.run_client.projects().locations().services().get(name=full_function_name) + response = get_req.execute() + + # Extract existing envs + existing_envs = {} + if "template" in response and "containers" in response["template"]: + # Assume single container + container = response["template"]["containers"][0] + if "env" in container: + for e in container["env"]: + existing_envs[e["name"]] = e["value"] + + # Merge: new overrides old + envs = {**existing_envs, **envs} + + else: + get_req = ( + self.function_client.projects().locations().functions().get(name=full_function_name) + ) + response = get_req.execute() - # preserve old variables while adding new ones. - # but for conflict, we select the new one - if "environmentVariables" in response: - envs = {**response["environmentVariables"], **envs} + # preserve old variables while adding new ones. + # but for conflict, we select the new one + if "environmentVariables" in response: + envs = {**response["environmentVariables"], **envs} return envs @@ -450,6 +603,10 @@ def _generate_function_envs(self, code_package: Benchmark) -> dict: return envs + + def _transform_service_envs(self, envs: dict) -> list: + return [{"name": k, "value": v} for k, v in envs.items()] + def update_function_configuration( self, function: Function, code_package: Benchmark, env_variables: dict = {} ): @@ -457,9 +614,16 @@ def update_function_configuration( assert code_package.has_input_processed function = cast(GCPFunction, function) - full_func_name = GCP.get_full_function_name( - self.config.project_name, self.config.region, function.name - ) + if code_package.language_name == "pypy": + full_func_name = GCP.get_full_service_name( + self.config.project_name, + self.config.region, + function.name.replace("_", "-").lower() + ) + else: + full_func_name = GCP.get_full_function_name( + self.config.project_name, self.config.region, function.name + ) envs = self._generate_function_envs(code_package) envs = {**envs, **env_variables} @@ -468,7 +632,45 @@ def update_function_configuration( if len(envs) > 0: envs = self._update_envs(full_func_name, envs) - if len(envs) > 0: + if "/services/" in full_func_name: + # Cloud Run Configuration Update + + # Prepare envs list + env_vars = [{"name": k, "value": v} for k, v in envs.items()] + memory = function.config.memory + timeout = function.config.timeout + + service_body = { + "template": { + "containers": [ + { + "resources": { + "limits": { + "memory": f"{memory}Mi", + } + }, + "env": env_vars + } + ], + "timeout": f"{timeout}s" + } + } + + req = self.run_client.projects().locations().services().patch( + name=full_func_name, + body=service_body + ) + res = req.execute() + + self.logging.info(f"Updated Cloud Run configuration {function.name}, waiting for operation...") + op_name = res["name"] + op_res = self.run_client.projects().locations().operations().wait(name=op_name).execute() + if "error" in op_res: + raise RuntimeError(f"Cloud Run config update failed: {op_res['error']}") + + return 0 + + elif len(envs) > 0: req = ( self.function_client.projects() @@ -527,6 +729,10 @@ def update_function_configuration( def get_full_function_name(project_name: str, location: str, func_name: str): return f"projects/{project_name}/locations/{location}/functions/{func_name}" + @staticmethod + def get_full_service_name(project_name: str, location: str, service_name: str): + return f"projects/{project_name}/locations/{location}/services/{service_name}" + def prepare_experiment(self, benchmark): logs_bucket = self._system_resources.get_storage().add_output_bucket( benchmark, suffix="logs" @@ -720,14 +926,31 @@ def get_functions(self, code_package: Benchmark, function_names: List[str]) -> L return functions def is_deployed(self, func_name: str, versionId: int = -1) -> Tuple[bool, int]: - name = GCP.get_full_function_name(self.config.project_name, self.config.region, func_name) - function_client = self.get_function_client() - status_req = function_client.projects().locations().functions().get(name=name) - status_res = status_req.execute() - if versionId == -1: - return (status_res["status"] == "ACTIVE", status_res["versionId"]) + + if "pypy" in func_name: + # Cloud Run Service + service_name = func_name.replace("_", "-").lower() + name = GCP.get_full_service_name(self.config.project_name, self.config.region, service_name) + try: + svc = self.run_client.projects().locations().services().get(name=name).execute() + conditions = svc.get("status", {}).get("conditions", []) + ready = next((c for c in conditions if c["type"] == "Ready"), None) + is_ready = ready and ready["status"] == "True" + return (is_ready, 0) + except HttpError: + return (False, -1) else: - return (status_res["versionId"] == versionId, status_res["versionId"]) + name = GCP.get_full_function_name(self.config.project_name, self.config.region, func_name) + try: + function_client = self.get_function_client() + status_req = function_client.projects().locations().functions().get(name=name) + status_res = status_req.execute() + if versionId == -1: + return (status_res["status"] == "ACTIVE", status_res["versionId"]) + else: + return (status_res["versionId"] == versionId, status_res["versionId"]) + except HttpError: + return (False, -1) def deployment_version(self, func: Function) -> int: name = GCP.get_full_function_name(self.config.project_name, self.config.region, func.name) diff --git a/sebs/local/local.py b/sebs/local/local.py index 32b9f9ffb..1f40016aa 100644 --- a/sebs/local/local.py +++ b/sebs/local/local.py @@ -124,6 +124,7 @@ def package_code( CONFIG_FILES = { "python": ["handler.py", "requirements.txt", ".python_packages"], "nodejs": ["handler.js", "package.json", "node_modules"], + "pypy": ["handler.py", "requirements.txt", ".python_packages"], } package_config = CONFIG_FILES[language_name] function_dir = os.path.join(directory, "function") diff --git a/third-party/pypapi b/third-party/pypapi deleted file mode 160000 index 2188acab3..000000000 --- a/third-party/pypapi +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 2188acab38a28af0a70a3bd2f36060fa9984e096 diff --git a/tools/build_docker_images.py b/tools/build_docker_images.py index 5336fb485..21a130498 100755 --- a/tools/build_docker_images.py +++ b/tools/build_docker_images.py @@ -13,11 +13,20 @@ "--deployment", default=None, choices=["local", "aws", "azure", "gcp"], action="store" ) parser.add_argument("--type", default=None, choices=["build", "run", "manage"], action="store") -parser.add_argument("--language", default=None, choices=["python", "nodejs"], action="store") +parser.add_argument( + "--language", default=None, choices=["python", "nodejs", "java", "rust", "pypy"], action="store" +) +parser.add_argument( + "--platform", + default=None, + help="Optional Docker platform (e.g., linux/amd64) to override host architecture.", +) parser.add_argument("--language-version", default=None, type=str, action="store") args = parser.parse_args() config = json.load(open(os.path.join(PROJECT_DIR, "config", "systems.json"), "r")) client = docker.from_env() +# Prefer explicit CLI platform, otherwise fall back to environment +PLATFORM = args.platform or os.environ.get("DOCKER_DEFAULT_PLATFORM") def build(image_type, system, language=None, version=None, version_name=None): @@ -51,8 +60,24 @@ def build(image_type, system, language=None, version=None, version_name=None): target, PROJECT_DIR, dockerfile, buildargs ) ) + + # Build kwargs with platform support + build_kwargs = { + "path": PROJECT_DIR, + "dockerfile": dockerfile, + "buildargs": buildargs, + "tag": target, + } + + # Platform selection priority: CLI arg > env var + platform_arg = args.platform or os.environ.get("DOCKER_DEFAULT_PLATFORM") + if platform_arg: + build_kwargs["platform"] = platform_arg + elif PLATFORM: + build_kwargs["platform"] = PLATFORM + try: - client.images.build(path=PROJECT_DIR, dockerfile=dockerfile, buildargs=buildargs, tag=target) + client.images.build(**build_kwargs) except docker.errors.BuildError as exc: print("Error! Build failed!") print(exc)