Skip to content

Commit 7edbf7a

Browse files
test: add container-based OOM proof test
Run s3proxy in a 128MB memory-constrained container and hammer it with concurrent large uploads. If the memory limiter fails, the kernel OOM-kills the process (exit code 137) — a binary pass/fail. - Add s3proxy service to docker-compose with mem_limit=128m (oom profile) - Add test_memory_leak.py with PUT and multipart upload stress tests - Add make test-oom target and CI step
1 parent 300f9a4 commit 7edbf7a

File tree

4 files changed

+240
-1
lines changed

4 files changed

+240
-1
lines changed

.github/workflows/test.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,6 @@ jobs:
3131

3232
- name: Run all tests
3333
run: make test-all
34+
35+
- name: OOM proof test (128MB container)
36+
run: make test-oom

Makefile

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
.PHONY: test test-all test-unit test-run test-memory-bounds e2e cluster lint
1+
.PHONY: test test-all test-unit test-run test-oom e2e cluster lint
22

33
# Lint: ruff check + format check
44
lint:
@@ -33,6 +33,17 @@ test-run:
3333
docker compose -f tests/docker-compose.yml down; \
3434
exit $$EXIT_CODE
3535

36+
# OOM proof test: runs s3proxy in a 128MB container and hammers it
37+
test-oom:
38+
@docker compose -f tests/docker-compose.yml --profile oom down 2>/dev/null || true
39+
@docker compose -f tests/docker-compose.yml --profile oom up -d --build
40+
@sleep 5
41+
@AWS_ACCESS_KEY_ID=minioadmin AWS_SECRET_ACCESS_KEY=minioadmin \
42+
uv run pytest -v tests/integration/test_memory_leak.py; \
43+
EXIT_CODE=$$?; \
44+
docker compose -f tests/docker-compose.yml --profile oom down; \
45+
exit $$EXIT_CODE
46+
3647
# E2E cluster commands
3748
e2e:
3849
./e2e/cluster.sh $(filter-out $@,$(MAKECMDGOALS))

tests/docker-compose.yml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,23 @@ services:
1515
MINIO_ROOT_USER: minioadmin
1616
MINIO_ROOT_PASSWORD: minioadmin
1717
command: server /data --console-address ":9001"
18+
19+
s3proxy:
20+
profiles: ["oom"]
21+
build:
22+
context: ../
23+
dockerfile: Dockerfile
24+
container_name: s3proxy-test-server
25+
mem_limit: 128m
26+
ports:
27+
- "4433:4433"
28+
environment:
29+
S3PROXY_ENCRYPT_KEY: "test-encryption-key-32-bytes!!"
30+
S3PROXY_HOST: "http://minio:9000"
31+
S3PROXY_REGION: "us-east-1"
32+
S3PROXY_MEMORY_LIMIT_MB: "16"
33+
S3PROXY_LOG_LEVEL: "WARNING"
34+
AWS_ACCESS_KEY_ID: minioadmin
35+
AWS_SECRET_ACCESS_KEY: minioadmin
36+
depends_on:
37+
- minio
Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
"""Prove s3proxy doesn't get OOM-killed under real OS memory constraints.
2+
3+
Runs against the s3proxy container defined in tests/docker-compose.yml
4+
which has mem_limit=128m. If the memory limiter fails, the Linux kernel
5+
OOM-kills the process (exit code 137). That's a hard, binary pass/fail.
6+
"""
7+
8+
import concurrent.futures
9+
import contextlib
10+
import json
11+
import subprocess
12+
import time
13+
import uuid
14+
15+
import boto3
16+
import pytest
17+
18+
CONTAINER_NAME = "s3proxy-test-server"
19+
ENDPOINT_URL = "http://localhost:4433"
20+
21+
22+
def container_is_running() -> bool:
23+
"""Check if the s3proxy container is running (not OOM-killed)."""
24+
result = subprocess.run(
25+
["docker", "inspect", "--format", "{{json .State}}", CONTAINER_NAME],
26+
capture_output=True,
27+
text=True,
28+
)
29+
if result.returncode != 0:
30+
return False
31+
state = json.loads(result.stdout.strip())
32+
return state.get("Running", False) and state.get("OOMKilled", False) is False
33+
34+
35+
def container_oom_killed() -> bool:
36+
"""Check if the container was OOM-killed."""
37+
result = subprocess.run(
38+
["docker", "inspect", "--format", "{{.State.OOMKilled}}", CONTAINER_NAME],
39+
capture_output=True,
40+
text=True,
41+
)
42+
return result.stdout.strip() == "true"
43+
44+
45+
@pytest.mark.e2e
46+
class TestOOMProof:
47+
"""Prove s3proxy survives sustained load in a 128MB container."""
48+
49+
@pytest.fixture
50+
def s3_client(self):
51+
return boto3.client(
52+
"s3",
53+
endpoint_url=ENDPOINT_URL,
54+
aws_access_key_id="minioadmin",
55+
aws_secret_access_key="minioadmin",
56+
region_name="us-east-1",
57+
config=boto3.session.Config(
58+
retries={"max_attempts": 0},
59+
connect_timeout=10,
60+
read_timeout=120,
61+
),
62+
)
63+
64+
@pytest.fixture
65+
def bucket(self, s3_client):
66+
name = f"oom-proof-{uuid.uuid4().hex[:8]}"
67+
with contextlib.suppress(Exception):
68+
s3_client.create_bucket(Bucket=name)
69+
yield name
70+
with contextlib.suppress(Exception):
71+
resp = s3_client.list_objects_v2(Bucket=name)
72+
if "Contents" in resp:
73+
objects = [{"Key": o["Key"]} for o in resp["Contents"]]
74+
s3_client.delete_objects(Bucket=name, Delete={"Objects": objects})
75+
s3_client.delete_bucket(Bucket=name)
76+
77+
def test_concurrent_uploads_no_oom_kill(self, s3_client, bucket):
78+
"""6 concurrent 20MB uploads into a 128MB container.
79+
80+
Total data: 120MB. Without memory limiting, this would need
81+
~120MB+ of buffers on top of the ~80MB Python runtime → OOM.
82+
With memory_limit_mb=16, only ~16MB of buffers at a time → safe.
83+
84+
Pass: container alive, no exit code 137.
85+
Fail: OOMKilled=true in docker inspect.
86+
"""
87+
assert container_is_running(), "s3proxy container not running before test"
88+
89+
num_uploads = 6
90+
upload_size = 20 * 1024 * 1024 # 20MB
91+
data = bytes([42]) * upload_size
92+
93+
def upload_one(i: int) -> dict:
94+
import random
95+
96+
key = f"oom-test-{i}.bin"
97+
attempts = 0
98+
max_attempts = 30
99+
100+
while attempts < max_attempts:
101+
attempts += 1
102+
try:
103+
s3_client.put_object(Bucket=bucket, Key=key, Body=data)
104+
return {"index": i, "success": True, "attempts": attempts}
105+
except Exception as e:
106+
err = str(e)
107+
if "503" in err or "SlowDown" in err or "reset" in err.lower():
108+
time.sleep(0.5 + random.uniform(0, 0.5))
109+
continue
110+
return {"index": i, "success": False, "error": err}
111+
112+
return {"index": i, "success": False, "error": "max retries"}
113+
114+
with concurrent.futures.ThreadPoolExecutor(max_workers=num_uploads) as ex:
115+
futures = [ex.submit(upload_one, i) for i in range(num_uploads)]
116+
results = [f.result() for f in concurrent.futures.as_completed(futures)]
117+
118+
succeeded = sum(1 for r in results if r["success"])
119+
120+
# The real assertion: container is still alive (not OOM-killed)
121+
assert not container_oom_killed(), (
122+
"CONTAINER WAS OOM-KILLED! Memory limiter failed to prevent OOM."
123+
)
124+
assert container_is_running(), (
125+
"Container died during test (check docker logs s3proxy-test-server)"
126+
)
127+
assert succeeded == num_uploads, (
128+
f"Only {succeeded}/{num_uploads} uploads succeeded: "
129+
+ str([r for r in results if not r["success"]])
130+
)
131+
132+
def test_multipart_uploads_no_oom_kill(self, s3_client, bucket):
133+
"""4 concurrent 50MB multipart uploads into a 128MB container.
134+
135+
Total data: 200MB via multipart (2 x 25MB parts each).
136+
This tests the multipart code path which has separate buffers.
137+
"""
138+
import io
139+
140+
assert container_is_running(), "s3proxy container not running before test"
141+
142+
num_uploads = 4
143+
part_size = 25 * 1024 * 1024 # 25MB per part
144+
145+
def upload_multipart(i: int) -> dict:
146+
import random
147+
148+
key = f"oom-multipart-{i}.bin"
149+
attempts = 0
150+
max_attempts = 30
151+
152+
while attempts < max_attempts:
153+
attempts += 1
154+
upload_id = None
155+
try:
156+
resp = s3_client.create_multipart_upload(Bucket=bucket, Key=key)
157+
upload_id = resp["UploadId"]
158+
159+
parts = []
160+
for part_num in (1, 2):
161+
part_data = bytes([42 + part_num]) * part_size
162+
part_resp = s3_client.upload_part(
163+
Bucket=bucket,
164+
Key=key,
165+
UploadId=upload_id,
166+
PartNumber=part_num,
167+
Body=io.BytesIO(part_data),
168+
)
169+
parts.append({"PartNumber": part_num, "ETag": part_resp["ETag"]})
170+
171+
s3_client.complete_multipart_upload(
172+
Bucket=bucket,
173+
Key=key,
174+
UploadId=upload_id,
175+
MultipartUpload={"Parts": parts},
176+
)
177+
return {"index": i, "success": True, "attempts": attempts}
178+
except Exception as e:
179+
if upload_id:
180+
with contextlib.suppress(Exception):
181+
s3_client.abort_multipart_upload(
182+
Bucket=bucket, Key=key, UploadId=upload_id
183+
)
184+
err = str(e)
185+
if "503" in err or "SlowDown" in err or "reset" in err.lower():
186+
time.sleep(0.5 + random.uniform(0, 0.5))
187+
continue
188+
return {"index": i, "success": False, "error": err}
189+
190+
return {"index": i, "success": False, "error": "max retries"}
191+
192+
with concurrent.futures.ThreadPoolExecutor(max_workers=num_uploads) as ex:
193+
futures = [ex.submit(upload_multipart, i) for i in range(num_uploads)]
194+
results = [f.result() for f in concurrent.futures.as_completed(futures)]
195+
196+
succeeded = sum(1 for r in results if r["success"])
197+
198+
assert not container_oom_killed(), (
199+
"CONTAINER WAS OOM-KILLED during multipart uploads!"
200+
)
201+
assert container_is_running(), "Container died during multipart test"
202+
assert succeeded == num_uploads, (
203+
f"Only {succeeded}/{num_uploads} multipart uploads succeeded: "
204+
+ str([r for r in results if not r["success"]])
205+
)

0 commit comments

Comments
 (0)