diff --git a/.github/workflows/cluster-test.yml b/.github/workflows/cluster-test.yml new file mode 100644 index 0000000..493f93a --- /dev/null +++ b/.github/workflows/cluster-test.yml @@ -0,0 +1,279 @@ +name: Cluster Test + +on: + pull_request: + paths: + - 'manifests/**' + - 'src/**' + - 'Dockerfile' + - 'e2e/**' + workflow_dispatch: + +jobs: + cluster-test: + runs-on: ubuntu-latest + timeout-minutes: 20 + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build s3proxy image + uses: docker/build-push-action@v6 + with: + context: . + load: true + tags: s3proxy:latest + + - name: Create Kind cluster + uses: helm/kind-action@v1 + with: + node_image: kindest/node:v1.29.2 + cluster_name: cluster-test + + - name: Load image into Kind + run: kind load docker-image s3proxy:latest --name cluster-test + + - name: Create namespace + run: kubectl create namespace s3proxy + + - name: Deploy MinIO + run: | + cat </dev/null | wc -l > /tmp/$pod.start + done + + # Run the load test + kubectl run s3-load-test -n s3proxy --rm -i --restart=Never \ + --image=amazon/aws-cli:latest \ + --env="AWS_ACCESS_KEY_ID=minioadmin" \ + --env="AWS_SECRET_ACCESS_KEY=minioadmin" \ + --env="AWS_DEFAULT_REGION=us-east-1" \ + --command -- /bin/sh -c ' + set -e + ENDPOINT="http://s3proxy-python:4433" + + echo "=== Creating test bucket ===" + aws --endpoint-url $ENDPOINT s3 mb s3://load-test-bucket || true + + echo "=== Generating 10MB test files ===" + mkdir -p /tmp/testfiles + for i in 1 2 3; do + dd if=/dev/urandom of=/tmp/testfiles/file-$i.bin bs=1M count=10 2>/dev/null & + done + wait + ls -lh /tmp/testfiles/ + + echo "=== Starting concurrent uploads ===" + START=$(date +%s) + for i in 1 2 3; do + aws --endpoint-url $ENDPOINT s3 cp /tmp/testfiles/file-$i.bin s3://load-test-bucket/file-$i.bin & + done + wait + END=$(date +%s) + echo "=== Uploads complete in $((END - START))s ===" + + echo "=== Listing bucket ===" + aws --endpoint-url $ENDPOINT s3 ls s3://load-test-bucket/ + + echo "=== Downloading and verifying ===" + mkdir -p /tmp/downloads + for i in 1 2 3; do + aws --endpoint-url $ENDPOINT s3 cp s3://load-test-bucket/file-$i.bin /tmp/downloads/file-$i.bin & + done + wait + + echo "=== Comparing checksums ===" + ORIG_SUMS=$(md5sum /tmp/testfiles/*.bin | cut -d" " -f1 | sort) + DOWN_SUMS=$(md5sum /tmp/downloads/*.bin | cut -d" " -f1 | sort) + + if [ "$ORIG_SUMS" = "$DOWN_SUMS" ]; then + echo "✓ Checksums match - round-trip successful" + else + echo "✗ Checksum mismatch!" + exit 1 + fi + + echo "=== Verifying encryption ===" + dd if=/dev/urandom of=/tmp/encrypt-test.bin bs=1K count=100 2>/dev/null + ORIG_SIZE=$(stat -c%s /tmp/encrypt-test.bin) + ORIG_MD5=$(md5sum /tmp/encrypt-test.bin | cut -c1-32) + + aws --endpoint-url $ENDPOINT s3 cp /tmp/encrypt-test.bin s3://load-test-bucket/encrypt-test.bin + aws --endpoint-url http://minio:9000 s3 cp s3://load-test-bucket/encrypt-test.bin /tmp/raw.bin 2>/dev/null || true + + if [ -f /tmp/raw.bin ]; then + RAW_SIZE=$(stat -c%s /tmp/raw.bin) + RAW_MD5=$(md5sum /tmp/raw.bin | cut -c1-32) + EXPECTED_SIZE=$((ORIG_SIZE + 28)) + + if [ "$RAW_SIZE" = "$EXPECTED_SIZE" ] && [ "$ORIG_MD5" != "$RAW_MD5" ]; then + echo "✓ Encryption verified - size +28 bytes (GCM overhead), content differs" + else + echo "✗ Encryption check failed" + exit 1 + fi + fi + + echo "" + echo "✓ All tests passed!" + ' + + - name: Check load balancing + run: | + PODS=$(kubectl get pods -n s3proxy -l app.kubernetes.io/name=s3proxy-python -o jsonpath='{.items[*].metadata.name}') + PODS_HIT=0 + + for pod in $PODS; do + START_LINE=$(cat /tmp/$pod.start 2>/dev/null || echo "0") + REQUEST_COUNT=$(kubectl logs $pod -n s3proxy 2>/dev/null | tail -n +$((START_LINE + 1)) | grep -cE "GET|POST|PUT|HEAD" || echo "0") + if [ "$REQUEST_COUNT" -gt 0 ]; then + PODS_HIT=$((PODS_HIT + 1)) + echo "✓ Pod $pod: received $REQUEST_COUNT requests" + else + echo " Pod $pod: received 0 requests" + fi + done + + if [ "$PODS_HIT" -ge 2 ]; then + echo "✓ Load balancing verified - traffic distributed across $PODS_HIT pods" + else + echo "⚠ Traffic went to only $PODS_HIT pod(s)" + fi + + - name: Show logs on failure + if: failure() + run: | + echo "=== Pod Status ===" + kubectl get pods -n s3proxy -o wide + echo "" + echo "=== S3Proxy Logs ===" + kubectl logs -l app.kubernetes.io/name=s3proxy-python -n s3proxy --tail=100 + echo "" + echo "=== MinIO Logs ===" + kubectl logs -l app=minio -n s3proxy --tail=50 + echo "" + echo "=== Events ===" + kubectl get events -n s3proxy --sort-by=.lastTimestamp diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 9a12081..02c5a53 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -1,16 +1,9 @@ name: Build and Push Docker Image on: - workflow_dispatch: - inputs: - tag: - description: 'Docker image tag' - required: true - type: string - -env: - REGISTRY: ghcr.io - IMAGE_NAME: ${{ github.repository }} + push: + branches: [main] + tags: ['v*'] jobs: build-and-push: @@ -23,23 +16,33 @@ jobs: - name: Checkout repository uses: actions/checkout@v6 + - name: Determine tags + id: tags + run: | + OWNER=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') + if [[ "$GITHUB_REF" == refs/tags/v* ]]; then + VERSION=${GITHUB_REF#refs/tags/v} + echo "tags=ghcr.io/${OWNER}/s3proxy-python:${VERSION}" >> $GITHUB_OUTPUT + else + echo "tags=ghcr.io/${OWNER}/s3proxy-python:latest" >> $GITHUB_OUTPUT + fi + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Log in to Container Registry - if: github.event_name != 'pull_request' uses: docker/login-action@v3.6.0 with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} + registry: ghcr.io + username: ${{ github.repository_owner }} password: ${{ secrets.GITHUB_TOKEN }} - name: Build and push Docker image uses: docker/build-push-action@v6 with: context: . - push: ${{ github.event_name != 'pull_request' }} - tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ inputs.tag }} + push: true + tags: ${{ steps.tags.outputs.tags }} cache-from: type=gha cache-to: type=gha,mode=max platforms: linux/amd64,linux/arm64 diff --git a/.github/workflows/helm-install-test.yml b/.github/workflows/helm-install-test.yml new file mode 100644 index 0000000..089893a --- /dev/null +++ b/.github/workflows/helm-install-test.yml @@ -0,0 +1,211 @@ +name: Helm Install Test + +on: + schedule: + # Run daily at 6 AM UTC + - cron: '0 6 * * *' + workflow_dispatch: + +jobs: + helm-install: + runs-on: ubuntu-latest + timeout-minutes: 15 + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Create Kind cluster + uses: helm/kind-action@v1 + with: + node_image: kindest/node:v1.29.2 + cluster_name: helm-test + + - name: Deploy MinIO as S3 backend + run: | + kubectl create namespace s3proxy + cat < /tmp/test.txt + ORIG_MD5=$(md5sum /tmp/test.txt | cut -c1-32) + aws --endpoint-url $ENDPOINT s3 cp /tmp/test.txt s3://smoke-test-bucket/test.txt + + echo "=== Listing bucket ===" + aws --endpoint-url $ENDPOINT s3 ls s3://smoke-test-bucket/ + + echo "=== Downloading and verifying ===" + aws --endpoint-url $ENDPOINT s3 cp s3://smoke-test-bucket/test.txt /tmp/downloaded.txt + DOWN_MD5=$(md5sum /tmp/downloaded.txt | cut -c1-32) + + if [ "$ORIG_MD5" = "$DOWN_MD5" ]; then + echo "✓ Round-trip successful - checksums match" + else + echo "✗ Checksum mismatch!" + exit 1 + fi + + echo "=== Verifying encryption (raw read from MinIO) ===" + aws --endpoint-url http://minio:9000 s3 cp s3://smoke-test-bucket/test.txt /tmp/raw.txt 2>/dev/null || true + if [ -f /tmp/raw.txt ]; then + RAW_MD5=$(md5sum /tmp/raw.txt | cut -c1-32) + if [ "$ORIG_MD5" != "$RAW_MD5" ]; then + echo "✓ Data is encrypted - raw content differs from original" + else + echo "✗ Data NOT encrypted - raw matches original!" + exit 1 + fi + fi + + echo "=== Cleanup ===" + aws --endpoint-url $ENDPOINT s3 rm s3://smoke-test-bucket/test.txt + aws --endpoint-url $ENDPOINT s3 rb s3://smoke-test-bucket + + echo "" + echo "✓ All smoke tests passed!" + ' + + - name: Show logs on failure + if: failure() + run: | + echo "=== Pod Status ===" + kubectl get pods -n s3proxy -o wide + echo "" + echo "=== S3Proxy Logs ===" + kubectl logs -l app.kubernetes.io/name=s3proxy-python -n s3proxy --tail=100 + echo "" + echo "=== MinIO Logs ===" + kubectl logs -l app=minio -n s3proxy --tail=50 + echo "" + echo "=== Events ===" + kubectl get events -n s3proxy --sort-by=.lastTimestamp diff --git a/.github/workflows/helm-lint.yml b/.github/workflows/helm-lint.yml index 0fa75d5..b3f8c0f 100644 --- a/.github/workflows/helm-lint.yml +++ b/.github/workflows/helm-lint.yml @@ -1,34 +1,33 @@ -# name: Helm Lint -# -# on: -# pull_request: -# branches: [main] -# paths: -# - 'manifests/**' -# -# jobs: -# helm-lint: -# runs-on: ubuntu-latest -# steps: -# - name: Checkout -# uses: actions/checkout@v6.0.2 -# -# - name: Set up Helm -# uses: azure/setup-helm@v4.3.1 -# -# - name: Add Helm dependency repositories -# run: | -# helm repo add dandydev https://dandydeveloper.github.io/charts -# helm repo update -# -# - name: Update Helm dependencies -# run: | -# helm dependency update manifests/ -# -# - name: Lint Helm chart -# run: | -# helm lint manifests/ -# -# - name: Validate Helm template -# run: | -# helm template s3proxy manifests/ --debug > /dev/null +name: Helm Lint + +on: + pull_request: + paths: + - 'manifests/**' + +jobs: + helm-lint: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Set up Helm + uses: azure/setup-helm@v4.3.1 + + - name: Add Helm dependency repositories + run: | + helm repo add dandydev https://dandydeveloper.github.io/charts + helm repo update + + - name: Update Helm dependencies + run: | + helm dependency update manifests/ + + - name: Lint Helm chart + run: | + helm lint manifests/ + + - name: Validate Helm template + run: | + helm template s3proxy manifests/ --debug > /dev/null diff --git a/.github/workflows/helm-publish.yml b/.github/workflows/helm-publish.yml index fb56592..4ae3a22 100644 --- a/.github/workflows/helm-publish.yml +++ b/.github/workflows/helm-publish.yml @@ -1,69 +1,61 @@ -# name: Package and Push Helm Chart -# -# on: -# push: -# tags: ['v*'] -# workflow_dispatch: -# -# env: -# REGISTRY: ghcr.io -# CHART_NAME: s3proxy-python -# -# jobs: -# helm-publish: -# runs-on: ubuntu-latest -# permissions: -# contents: read -# packages: write -# -# steps: -# - name: Checkout repository -# uses: actions/checkout@v6.0.2 -# -# - name: Set up Helm -# uses: azure/setup-helm@v4.3.1 -# -# - name: Log in to Container Registry -# run: | -# echo "${{ secrets.GITHUB_TOKEN }}" | helm registry login ${{ env.REGISTRY }} -u ${{ github.actor }} --password-stdin -# -# - name: Add Helm dependency repositories -# run: | -# helm repo add dandydev https://dandydeveloper.github.io/charts -# helm repo update -# -# - name: Update Helm dependencies -# run: | -# helm dependency update manifests/ -# -# - name: Get chart version -# id: chart -# run: | -# VERSION=$(grep '^version:' manifests/Chart.yaml | awk '{print $2}') -# echo "version=$VERSION" >> $GITHUB_OUTPUT -# # Use tag version if this is a tag push -# if [[ "${{ github.ref }}" == refs/tags/v* ]]; then -# TAG_VERSION="${{ github.ref_name }}" -# TAG_VERSION="${TAG_VERSION#v}" -# echo "version=$TAG_VERSION" >> $GITHUB_OUTPUT -# fi -# -# - name: Update chart version for tags -# if: startsWith(github.ref, 'refs/tags/v') -# run: | -# TAG_VERSION="${{ github.ref_name }}" -# TAG_VERSION="${TAG_VERSION#v}" -# sed -i "s/^version:.*/version: $TAG_VERSION/" manifests/Chart.yaml -# sed -i "s/^appVersion:.*/appVersion: \"$TAG_VERSION\"/" manifests/Chart.yaml -# -# - name: Lint Helm chart -# run: | -# helm lint manifests/ -# -# - name: Package Helm chart -# run: | -# helm package manifests/ --destination . -# -# - name: Push Helm chart to OCI registry -# run: | -# helm push ${{ env.CHART_NAME }}-${{ steps.chart.outputs.version }}.tgz oci://${{ env.REGISTRY }}/${{ github.repository_owner }}/charts +name: Package and Push Helm Chart + +on: + push: + branches: [main] + tags: ['v*'] + +jobs: + helm-publish: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Set up Helm + uses: azure/setup-helm@v4.3.1 + + - name: Log in to Container Registry + run: | + echo "${{ secrets.GITHUB_TOKEN }}" | helm registry login ghcr.io -u ${{ github.actor }} --password-stdin + + - name: Add Helm dependency repositories + run: | + helm repo add dandydev https://dandydeveloper.github.io/charts + helm repo update + + - name: Update Helm dependencies + run: | + helm dependency update manifests/ + + - name: Get version + id: version + run: | + if [[ "$GITHUB_REF" == refs/tags/v* ]]; then + VERSION=${GITHUB_REF#refs/tags/v} + else + VERSION="0.0.0-latest" + fi + echo "version=$VERSION" >> $GITHUB_OUTPUT + + - name: Update chart version + run: | + sed -i "s/^version:.*/version: ${{ steps.version.outputs.version }}/" manifests/Chart.yaml + sed -i "s/^appVersion:.*/appVersion: \"${{ steps.version.outputs.version }}\"/" manifests/Chart.yaml + + - name: Lint Helm chart + run: | + helm lint manifests/ + + - name: Package Helm chart + run: | + helm package manifests/ --destination . + + - name: Push Helm chart to OCI registry + run: | + OWNER=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') + helm push s3proxy-python-${{ steps.version.outputs.version }}.tgz oci://ghcr.io/${OWNER}/charts diff --git a/.github/workflows/helm-test.yml b/.github/workflows/helm-test.yml deleted file mode 100644 index 8b12242..0000000 --- a/.github/workflows/helm-test.yml +++ /dev/null @@ -1,155 +0,0 @@ -# name: Helm Test -# -# on: -# push: -# branches: [main] -# paths: -# - 'manifests/**' -# - 's3proxy/**' -# - 'Dockerfile' -# - '.github/workflows/helm-test.yml' -# pull_request: -# branches: [main] -# paths: -# - 'manifests/**' -# - 's3proxy/**' -# - 'Dockerfile' -# - '.github/workflows/helm-test.yml' -# workflow_dispatch: -# -# jobs: -# helm-test: -# runs-on: ubuntu-latest -# steps: -# - name: Checkout -# uses: actions/checkout@v6.0.2 -# -# - name: Create Kind cluster -# uses: helm/kind-action@v1.13.0 -# with: -# cluster_name: s3proxy-test -# -# - name: Install Helm -# uses: azure/setup-helm@v4.3.1 -# -# - name: Add Helm repos and update dependencies -# run: | -# helm repo add dandydev https://dandydeveloper.github.io/charts -# helm repo update -# helm dependency update ./manifests -# -# - name: Build and load image -# run: | -# docker build -t s3proxy-python:latest . -# kind load docker-image s3proxy-python:latest --name s3proxy-test -# -# - name: Install Helm chart -# run: | -# helm upgrade --install s3proxy ./manifests \ -# -n s3proxy --create-namespace \ -# --set image.repository=s3proxy-python \ -# --set image.tag=latest \ -# --wait --timeout 300s -# -# - name: Wait for pods -# run: | -# kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=s3proxy-python -n s3proxy --timeout=120s -# kubectl wait --for=condition=ready pod -l release=s3proxy -n s3proxy --timeout=180s || true -# kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=s3proxy-python-minio -n s3proxy --timeout=120s -# -# - name: Show deployment status -# run: | -# kubectl get all -n s3proxy -# kubectl get pods -n s3proxy -o wide -# -# - name: Test health endpoint -# run: | -# kubectl run curl-test --image=curlimages/curl --rm -it --restart=Never -n s3proxy -- \ -# curl -sf http://s3proxy-python:4433/healthz -# echo "Health check passed!" -# -# - name: Run load test -# run: | -# kubectl run s3-load-test -n s3proxy --rm -it --restart=Never \ -# --image=amazon/aws-cli:latest \ -# --env="AWS_ACCESS_KEY_ID=minioadmin" \ -# --env="AWS_SECRET_ACCESS_KEY=minioadmin" \ -# --env="AWS_DEFAULT_REGION=us-east-1" \ -# --command -- /bin/sh -c " -# # Create test bucket -# echo 'Creating test bucket...' -# aws --endpoint-url http://s3proxy-python:4433 s3 mb s3://ci-test-bucket 2>/dev/null || true -# -# # Generate test files (smaller for CI) -# echo 'Generating 64MB test files...' -# mkdir -p /tmp/testfiles -# for i in 1 2 3; do -# dd if=/dev/urandom of=/tmp/testfiles/file-\$i.bin bs=1M count=64 2>/dev/null & -# done -# wait -# echo 'Files generated' -# ls -lh /tmp/testfiles/ -# -# # Upload concurrently -# echo '' -# echo '=== Starting concurrent uploads ===' -# START=\$(date +%s) -# -# for i in 1 2 3; do -# aws --endpoint-url http://s3proxy-python:4433 s3 cp /tmp/testfiles/file-\$i.bin s3://ci-test-bucket/file-\$i.bin & -# done -# wait -# -# END=\$(date +%s) -# DURATION=\$((END - START)) -# echo '' -# echo \"=== Upload complete in \${DURATION}s ===\" -# -# # Verify uploads -# echo '' -# echo '=== Listing uploaded files ===' -# aws --endpoint-url http://s3proxy-python:4433 s3 ls s3://ci-test-bucket/ -# -# # Download and verify -# echo '' -# echo '=== Downloading files to verify ===' -# mkdir -p /tmp/downloads -# for i in 1 2 3; do -# aws --endpoint-url http://s3proxy-python:4433 s3 cp s3://ci-test-bucket/file-\$i.bin /tmp/downloads/file-\$i.bin & -# done -# wait -# -# echo '' -# echo '=== Comparing checksums ===' -# md5sum /tmp/testfiles/*.bin > /tmp/orig.md5 -# md5sum /tmp/downloads/*.bin > /tmp/down.md5 -# -# ORIG_SUMS=\$(cat /tmp/orig.md5 | while read sum name; do echo \$sum; done | sort) -# DOWN_SUMS=\$(cat /tmp/down.md5 | while read sum name; do echo \$sum; done | sort) -# -# cat /tmp/orig.md5 -# echo '' -# if [ \"\$ORIG_SUMS\" = \"\$DOWN_SUMS\" ]; then -# echo 'All checksums match - encryption/decryption working!' -# else -# echo 'Checksum mismatch!' -# exit 1 -# fi -# " -# -# - name: Show pod logs on failure -# if: failure() -# run: | -# echo "=== S3Proxy Logs ===" -# kubectl logs -l app.kubernetes.io/name=s3proxy-python -n s3proxy --tail=100 || true -# echo "" -# echo "=== Redis HA Logs ===" -# kubectl logs -l release=s3proxy -n s3proxy --tail=50 || true -# echo "" -# echo "=== Events ===" -# kubectl get events -n s3proxy --sort-by=.lastTimestamp | tail -20 || true -# -# - name: Cleanup -# if: always() -# run: | -# kind delete cluster --name s3proxy-test diff --git a/.gitignore b/.gitignore index 3a64008..fa29faa 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,6 @@ htmlcov/ # OS .DS_Store Thumbs.db + +# Helm dependencies (downloaded via helm dependency build) +manifests/charts/*.tgz diff --git a/Makefile b/Makefile index 73b1116..4b8af6d 100644 --- a/Makefile +++ b/Makefile @@ -1,64 +1,24 @@ -.PHONY: test test-cov test-full e2e e2e-quick helm helm-cleanup clean bench bench-quick bench-profile +.PHONY: test e2e cluster-test cluster-up cluster-load clean -# Unit tests test: pytest -test-cov: - pytest --cov=s3proxy - -# Full test suite (e2e + helm) -test-full: e2e helm - -# E2E tests e2e: ./e2e/test-e2e-fast.sh -e2e-quick: - QUICK_MODE=true ./e2e/test-e2e-fast.sh - -# Helm tests -helm-test: - ./e2e/test-helm-validate.sh - -helm: - ./e2e/test-helm.sh run - -helm-status: - ./e2e/test-helm.sh status - -helm-logs: - ./e2e/test-helm.sh logs - -helm-load-test: - ./e2e/test-helm-with-load.sh +# Full cluster test (CI) - creates cluster, runs load test, cleans up +cluster-test: + ./e2e/test-cluster.sh -helm-redis: - ./e2e/test-helm.sh redis +# Start cluster and keep running (local dev) - use cluster-load to test +cluster-up: + docker build -t s3proxy:latest . + ./e2e/cluster.sh run -helm-pods: - ./e2e/test-helm.sh pods +# Run load test against running cluster +cluster-load: + ./e2e/cluster.sh load-test -helm-watch: - ./e2e/test-helm.sh watch - -helm-shell: - ./e2e/test-helm.sh shell - -helm-cleanup: - ./e2e/test-helm.sh cleanup - -# Cleanup clean: - ./e2e/test-helm.sh cleanup - docker-compose -f e2e/docker-compose.e2e.yml down -v 2>/dev/null || true - -# Benchmarks (Docker only, no external deps) -bench: - ./benchmarks/run.sh - -bench-quick: - ./benchmarks/run.sh --quick - -bench-profile: - ./benchmarks/profile.sh + ./e2e/cluster.sh cleanup + docker compose -f e2e/docker-compose.e2e.yml down -v 2>/dev/null || true diff --git a/README.md b/README.md index e9e6a41..159ebce 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,12 @@ +

+ + Helm Install + +

+

AES-256-GCM - Python 3.11+ + Python 3.11+ FastAPI S3 Compatible

@@ -12,7 +18,7 @@

- Drop-in S3 proxy that encrypts everything on the fly with military-grade AES-256-GCM.
+ Drop-in S3 proxy that encrypts everything on the fly with AES-256-GCM.
Your apps talk to S3Proxy. S3Proxy talks to S3. Your data stays yours.

@@ -45,7 +51,7 @@ S3's server-side encryption is great, but your cloud provider still holds the ke 🔄 **100% S3 Compatible** — Works with any S3 client, SDK, or CLI. No code changes. -⚡ **Blazing Fast** — Async Python with HTTP/2, uvloop, and streaming I/O +⚡ **Streaming I/O** — Async Python with streaming encryption, no memory buffering 📦 **Multipart Support** — Large file uploads just work, encrypted seamlessly @@ -57,45 +63,83 @@ S3's server-side encryption is great, but your cloud provider still holds the ke ## 🚀 Quick Start -### One-liner with Docker +### 1. Start the proxy ```bash docker run -p 4433:4433 \ - -e S3PROXY_ENCRYPT_KEY="your-super-secret-key" \ + -e S3PROXY_ENCRYPT_KEY="your-32-byte-encryption-key-here" \ + -e S3PROXY_NO_TLS=true \ -e AWS_ACCESS_KEY_ID="AKIA..." \ - -e AWS_SECRET_ACCESS_KEY="..." \ - ghcr.io//sseproxy-python:latest + -e AWS_SECRET_ACCESS_KEY="wJalr..." \ + s3proxy:latest ``` -### Or run locally - -```bash -# Install -pip install -e . +### 2. Configure your client with the same credentials -# Configure -export S3PROXY_ENCRYPT_KEY="your-super-secret-key" -export AWS_ACCESS_KEY_ID="AKIA..." -export AWS_SECRET_ACCESS_KEY="..." +The client must use the **same credentials** that the proxy is configured with: -# Run -python -m s3proxy.main --no-tls +```bash +export AWS_ACCESS_KEY_ID="AKIA..." # Same as proxy +export AWS_SECRET_ACCESS_KEY="wJalr..." # Same as proxy ``` -### Point your app at it +### 3. Point your application at the proxy ```bash -# Instead of s3.amazonaws.com, use localhost:4433 +# Upload through S3Proxy - data is encrypted before reaching S3 aws s3 --endpoint-url http://localhost:4433 cp secret.pdf s3://my-bucket/ -# That's it. Your file is now encrypted in S3. +# Download through S3Proxy - data is decrypted automatically +aws s3 --endpoint-url http://localhost:4433 cp s3://my-bucket/secret.pdf ./ + +# Works with any S3 client/SDK - just change the endpoint URL ``` +Your file is now encrypted at rest with AES-256-GCM. The encryption is transparent—your application code doesn't change, only the endpoint URL. + +> **Note:** The proxy supports any bucket accessible with the configured credentials. You don't configure a specific bucket—just point any S3 request at the proxy and it forwards to the appropriate bucket. + --- -## 🏛️ Architecture +## 🔍 How It Works + +S3Proxy sits between your application and S3, transparently encrypting all data before it reaches storage. + +### Request Flow + +``` +1. Client signs request with credentials (same credentials configured on proxy) +2. Proxy receives request and verifies SigV4 signature +3. Proxy encrypts the payload with AES-256-GCM +4. Proxy re-signs the request (encryption changes the body, invalidating original signature) +5. Proxy forwards to S3 +6. S3 stores the encrypted data +``` + +### Why Does the Proxy Need My Credentials? + +**Short answer:** Because encryption changes the request body, which invalidates the client's signature. The proxy must re-sign requests, and re-signing requires the secret key. + +With S3's SigV4 authentication, clients sign requests using their secret key but only send the signature—never the key itself. When S3Proxy encrypts your data, it modifies: +- The request body (now ciphertext instead of plaintext) +- The `Content-Length` header +- The `Content-MD5` / `x-amz-content-sha256` headers + +This breaks the original signature. To forward the request to S3, the proxy must create a new valid signature, which requires having the secret key. -S3Proxy uses a **layered key architecture** for maximum security: +**The proxy acts as a trusted intermediary**, not a transparent passthrough. You configure credentials once on the proxy, and all clients use those same credentials to authenticate. + +``` +┌──────────────┐ SigV4 signed ┌──────────────┐ Re-signed ┌──────────────┐ +│ │ (credentials │ │ (same │ │ +│ Client │ ─────────────▶ │ S3Proxy │ ─────────────▶ │ AWS S3 │ +│ │ from proxy) │ │ credentials) │ │ +└──────────────┘ └──────────────┘ └──────────────┘ +``` + +### Encryption + +S3Proxy uses a **layered key architecture**: | Layer | Key | Purpose | |-------|-----|---------| @@ -105,113 +149,215 @@ S3Proxy uses a **layered key architecture** for maximum security: Your master key never touches S3. DEKs are wrapped and stored as object metadata. Even if someone accesses your bucket, they get nothing but ciphertext. +### Multipart Uploads + +Large files are handled via S3 multipart upload. Each part is encrypted independently with its own nonce, and part metadata is tracked in Redis (or in-memory for single-instance). This enables streaming uploads of arbitrary size without buffering entire files in memory. + --- ## ⚙️ Configuration -All settings via environment variables (prefix: `S3PROXY_`): +Configure via environment variables (Docker) or Helm values (Kubernetes). + +| Setting | Environment Variable | Helm Value | Default | +|---------|---------------------|------------|---------| +| **Encryption key** | `S3PROXY_ENCRYPT_KEY` | `secrets.encryptKey` | — | +| **AWS Access Key** | `AWS_ACCESS_KEY_ID` | `secrets.awsAccessKeyId` | — | +| **AWS Secret Key** | `AWS_SECRET_ACCESS_KEY` | `secrets.awsSecretAccessKey` | — | +| S3 endpoint | `S3PROXY_HOST` | `s3.host` | `s3.amazonaws.com` | +| AWS region | `S3PROXY_REGION` | `s3.region` | `us-east-1` | +| Listen port | `S3PROXY_PORT` | `server.port` | `4433` | +| Disable TLS | `S3PROXY_NO_TLS` | `server.noTls` | `false` | +| Log level | `S3PROXY_LOG_LEVEL` | `server.logLevel` | `INFO` | +| Redis URL | `S3PROXY_REDIS_URL` | `externalRedis.url` | *(empty)* | +| Max concurrent requests | `S3PROXY_THROTTLING_REQUESTS_MAX` | `performance.throttlingRequestsMax` | `10` | +| Max upload size (MB) | `S3PROXY_MAX_UPLOAD_SIZE_MB` | `performance.maxUploadSizeMb` | `45` | + +> **Credentials:** Clients must use the same `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` configured on the proxy. See [How It Works](#-how-it-works). -| Variable | Default | Description | -|----------|---------|-------------| -| `ENCRYPT_KEY` | *required* | Your master encryption key | -| `HOST` | `s3.amazonaws.com` | S3 endpoint | -| `REGION` | `us-east-1` | AWS region | -| `PORT` | `4433` | Listen port | -| `NO_TLS` | `false` | Disable TLS | -| `REDIS_URL` | `redis://localhost:6379/0` | Redis for multipart state | -| `MAX_CONCURRENT_UPLOADS` | `10` | Parallel upload limit | -| `MAX_CONCURRENT_DOWNLOADS` | `10` | Parallel download limit | -| `LOG_LEVEL` | `INFO` | Logging verbosity | +> **S3-compatible:** Works with AWS S3, MinIO, Cloudflare R2, DigitalOcean Spaces, etc. + +> **Redis:** Only required for multi-instance (HA) deployments. Single-instance uses in-memory storage. --- -## 🐳 Deploy to Production +## ☸️ Production Deployment + +### Kubernetes with Helm + +The Helm chart is in `manifests/` and includes Redis HA with Sentinel for distributed state. -### Docker Compose (with Redis) +#### Quick Start ```bash -docker-compose -f e2e/docker-compose.e2e.yml up +# Install Helm dependencies (redis-ha) +cd manifests && helm dependency update && cd .. + +# Install with inline secrets (dev/test only) +helm install s3proxy ./manifests \ + --set secrets.encryptKey="your-32-byte-encryption-key" \ + --set secrets.awsAccessKeyId="AKIA..." \ + --set secrets.awsSecretAccessKey="wJalr..." ``` -### Kubernetes with Helm +#### Production Setup + +For production, use Kubernetes secrets instead of inline values: ```bash -# Pull from GitHub Container Registry (OCI) -helm install s3proxy oci://ghcr.io//charts/s3proxy-python \ - --set config.encryptKey="your-key" \ - --set redis.enabled=true +# Create secret manually +kubectl create secret generic s3proxy-secrets \ + --from-literal=S3PROXY_ENCRYPT_KEY="your-32-byte-encryption-key" \ + --from-literal=AWS_ACCESS_KEY_ID="AKIA..." \ + --from-literal=AWS_SECRET_ACCESS_KEY="wJalr..." + +# Install referencing the existing secret +helm install s3proxy ./manifests \ + --set secrets.existingSecrets.enabled=true \ + --set secrets.existingSecrets.name=s3proxy-secrets ``` -The Helm chart includes: -- 3 replicas by default -- Redis HA with Sentinel -- Health checks & readiness probes -- Configurable resource limits +#### Accessing the Proxy ---- +Point your S3 clients at the proxy endpoint: -## 🧪 Testing +```bash +# From within the cluster (default service) +aws s3 --endpoint-url http://s3proxy-python.:4433 cp file.txt s3://bucket/ + +# With gateway enabled (recommended for internal access) +aws s3 --endpoint-url http://s3-gateway. cp file.txt s3://bucket/ + +# With ingress (external access) +aws s3 --endpoint-url https://s3proxy.example.com cp file.txt s3://bucket/ +``` + +#### Kubernetes-Specific Settings + +| Helm Value | Default | Description | +|------------|---------|-------------| +| `replicaCount` | `3` | Number of proxy replicas | +| `redis-ha.enabled` | `true` | Deploy embedded Redis HA with Sentinel | +| `resources.requests.memory` | `512Mi` | Memory request per pod | +| `resources.limits.memory` | `512Mi` | Memory limit per pod | +| `ingress.enabled` | `false` | Enable ingress for load balancing | +| `ingress.className` | `nginx` | Ingress class | +| `ingress.hosts` | `[]` | Hostnames for external access | +| `gateway.enabled` | `false` | Create internal DNS alias (`s3-gateway.`) | + +**Gateway vs Ingress:** + +| gateway | ingress | Use case | +|---------|---------|----------| +| `false` | `true` | External access via custom hostname (requires DNS setup) | +| `true` | `true` | Internal access via `s3-gateway.` (no DNS setup needed) | + +> **Recommended for internal access:** Enable both `gateway.enabled=true` and `ingress.enabled=true`. This routes traffic through the ingress controller for load balancing across pods, while providing a convenient internal DNS name (`s3-gateway.`) without external DNS configuration. + +#### Example: External Access with Ingress + +```yaml +# values-prod.yaml +gateway: + enabled: true +ingress: + enabled: true + className: nginx + hosts: + - s3proxy.example.com + tls: + - secretName: s3proxy-tls + hosts: + - s3proxy.example.com +``` ```bash -# Run all tests -pytest +helm install s3proxy ./manifests -f values-prod.yaml \ + --set secrets.existingSecrets.enabled=true \ + --set secrets.existingSecrets.name=s3proxy-secrets +``` -# With coverage -pytest --cov=s3proxy +#### Example: Using External Redis (ElastiCache, etc.) -# E2E tests (requires Docker) -./e2e/test-e2e-fast.sh +```bash +helm install s3proxy ./manifests \ + --set redis-ha.enabled=false \ + --set externalRedis.url="redis://my-elasticache.xxx.cache.amazonaws.com:6379/0" \ + --set secrets.existingSecrets.enabled=true \ + --set secrets.existingSecrets.name=s3proxy-secrets ``` ---- +### Health Checks -## 📊 Performance +The proxy exposes health endpoints for Kubernetes probes: +- `GET /healthz` — Liveness probe +- `GET /readyz` — Readiness probe -![Benchmark](benchmarks/results/benchmark.png) +### Security Considerations -*64KB objects, 10 concurrent connections, 3×30s runs. ~60% overhead is primarily from the extra network hop (Client→Proxy→S3) plus encryption.* +- **TLS Termination**: The chart defaults to `noTls=true`, expecting TLS termination at the ingress/load balancer +- **Secrets**: Always use `secrets.existingSecrets` in production—never commit secrets to values files +- **Network Policy**: Consider restricting pod-to-pod traffic to only allow proxy → Redis +- **Encryption Key**: Back up your encryption key securely. Losing it means losing access to all encrypted data -S3Proxy is built for throughput: +### Resource Recommendations -- **Streaming I/O** — Large files never buffer in memory -- **HTTP/2** — Connection multiplexing & pooling -- **uvloop** — 2-4x faster than default asyncio -- **Horizontal scaling** — Redis-backed state, run N replicas +| Workload | Memory | CPU | Concurrency | Notes | +|----------|--------|-----|-------------|-------| +| Standard | 512Mi | 100m | 10 | Default settings | +| Heavy | 1Gi+ | 500m | 20+ | Large files, high concurrency | + +Memory scales with concurrent uploads. Use `performance.throttlingRequestsMax` to bound memory usage --- -## 🛡️ Security Model +## 🧪 Testing -| Threat | Mitigation | -|--------|------------| -| S3 bucket breach | All data encrypted with AES-256-GCM | -| Key extraction from S3 | DEKs wrapped with KEK, KEK never stored | -| Request tampering | Full AWS SigV4 signature verification | -| Replay attacks | Nonce uniqueness per object | +```bash +make test # Unit tests +make cluster-test # Full Kubernetes cluster test +``` --- -## 🤝 Contributing +## ❓ FAQ -PRs welcome! Please include tests for new functionality. +**Why can't I use my own AWS credentials with the proxy?** -```bash -# Setup dev environment -uv sync +The proxy must re-sign requests after encryption (see [How It Works](#-how-it-works)). Re-signing requires the secret key, but S3's SigV4 protocol only sends signatures—never the secret key itself. So the proxy must already have the credentials configured. All clients share the same credentials configured on the proxy. + +**Can I use different credentials for different clients?** + +Not currently. The proxy supports one credential pair. If you need per-client credentials, you would deploy multiple proxy instances or implement a credential lookup mechanism. -# Run tests before submitting -pytest +**Can I use this with existing unencrypted data?** + +Yes. S3Proxy only encrypts data written through it. Existing objects remain readable—S3Proxy detects unencrypted objects and returns them as-is. To migrate, simply copy objects through S3Proxy: + +```bash +aws s3 cp --endpoint-url http://localhost:4433 s3://bucket/file.txt s3://bucket/file.txt ``` +**What happens if I lose my encryption key?** + +Your data is unrecoverable. The KEK is never stored—it exists only in your environment variables. Back up your key securely. + +**Can I rotate encryption keys?** + +Not currently. Key rotation would require re-encrypting all objects. This is on the roadmap. + +**Does S3Proxy support SSE-C or SSE-KMS?** + +No. S3Proxy implements its own client-side encryption. Server-side encryption options are orthogonal—you can enable both if desired. + --- -## 📄 License +## 🤝 Contributing -MIT +Contributions are welcome. --- -

- Built with 🔐 by engineers who believe encryption should be easy. -

-# s3proxy-python +## 📄 License + +MIT diff --git a/benchmarks/bench.py b/benchmarks/bench.py deleted file mode 100644 index afdb7ec..0000000 --- a/benchmarks/bench.py +++ /dev/null @@ -1,357 +0,0 @@ -#!/usr/bin/env python3 -""" -S3Proxy Benchmark - -Compares direct MinIO access vs S3Proxy (with encryption). -Uses boto3 for S3 operations with async concurrency. - -Usage: - python bench.py # Default: small objects, 10 concurrent - python bench.py --size medium # 1MB objects - python bench.py --size large # 10MB objects - python bench.py --size xlarge # 100MB objects - python bench.py --size huge # 1GiB objects - python bench.py --concurrent 50 # 50 concurrent requests - python bench.py --duration 60 # Run for 60 seconds - python bench.py --runs 3 # Multiple runs for statistics -""" - -import argparse -import asyncio -import os -import time -from dataclasses import dataclass, field -from statistics import mean, stdev - -import aioboto3 - -# Object sizes -SIZES = { - "tiny": 1024, # 1 KB - "small": 64 * 1024, # 64 KB - "medium": 1024 * 1024, # 1 MB - "large": 10 * 1024 * 1024, # 10 MB - "xlarge": 100 * 1024 * 1024, # 100 MB - "huge": 1024 * 1024 * 1024, # 1 GiB -} - -# Endpoints -MINIO_ENDPOINT = os.environ.get("MINIO_ENDPOINT", "http://localhost:9000") -PROXY_ENDPOINT = os.environ.get("PROXY_ENDPOINT", "http://localhost:8080") - -# Credentials -AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID", "benchmarkadminuser") -AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY", "benchmarkadminpassword") -AWS_REGION = os.environ.get("AWS_REGION", "us-east-1") - -BUCKET = "bench-test" - - -@dataclass -class BenchResult: - """Results from a benchmark run.""" - name: str - total_requests: int - duration_sec: float - put_latencies_ms: list[float] - get_latencies_ms: list[float] - errors: int - - @property - def rps(self) -> float: - return self.total_requests / self.duration_sec if self.duration_sec > 0 else 0 - - @property - def put_avg_ms(self) -> float: - return mean(self.put_latencies_ms) if self.put_latencies_ms else 0 - - @property - def get_avg_ms(self) -> float: - return mean(self.get_latencies_ms) if self.get_latencies_ms else 0 - - def percentile(self, latencies: list[float], p: int) -> float: - if not latencies: - return 0 - sorted_lat = sorted(latencies) - idx = int(len(sorted_lat) * p / 100) - return sorted_lat[min(idx, len(sorted_lat) - 1)] - - @property - def put_p95_ms(self) -> float: - return self.percentile(self.put_latencies_ms, 95) - - @property - def get_p95_ms(self) -> float: - return self.percentile(self.get_latencies_ms, 95) - - -async def ensure_bucket(session, endpoint: str): - """Create bucket if it doesn't exist.""" - async with session.client( - "s3", - endpoint_url=endpoint, - aws_access_key_id=AWS_ACCESS_KEY_ID, - aws_secret_access_key=AWS_SECRET_ACCESS_KEY, - region_name=AWS_REGION, - ) as s3: - try: - await s3.head_bucket(Bucket=BUCKET) - except Exception: - try: - await s3.create_bucket(Bucket=BUCKET) - except Exception: - pass # Bucket might already exist - - -async def run_benchmark( - endpoint: str, - name: str, - data: bytes, - duration_sec: int, - concurrency: int, -) -> BenchResult: - """Run PUT/GET benchmark against an endpoint.""" - - put_latencies: list[float] = [] - get_latencies: list[float] = [] - errors = 0 - counter = 0 - stop_event = asyncio.Event() - - session = aioboto3.Session() - await ensure_bucket(session, endpoint) - - async def worker(worker_id: int): - nonlocal counter, errors - - async with session.client( - "s3", - endpoint_url=endpoint, - aws_access_key_id=AWS_ACCESS_KEY_ID, - aws_secret_access_key=AWS_SECRET_ACCESS_KEY, - region_name=AWS_REGION, - ) as s3: - iteration = 0 - while not stop_event.is_set(): - key = f"bench-{worker_id}-{iteration}" - iteration += 1 - - # PUT - try: - start = time.perf_counter() - await s3.put_object(Bucket=BUCKET, Key=key, Body=data) - put_latencies.append((time.perf_counter() - start) * 1000) - except Exception as e: - errors += 1 - continue - - # GET - try: - start = time.perf_counter() - resp = await s3.get_object(Bucket=BUCKET, Key=key) - await resp["Body"].read() - get_latencies.append((time.perf_counter() - start) * 1000) - counter += 1 - except Exception as e: - errors += 1 - - # Progress reporter - async def progress_reporter(): - start = time.perf_counter() - while not stop_event.is_set(): - await asyncio.sleep(5) - if not stop_event.is_set(): - elapsed = int(time.perf_counter() - start) - print(f" [{elapsed}s] {counter:,} requests, {errors} errors", flush=True) - - # Start workers - start_time = time.perf_counter() - workers = [asyncio.create_task(worker(i)) for i in range(concurrency)] - progress_task = asyncio.create_task(progress_reporter()) - - # Run for specified duration - await asyncio.sleep(duration_sec) - stop_event.set() - - # Wait for workers to finish - progress_task.cancel() - await asyncio.gather(*workers, return_exceptions=True) - total_duration = time.perf_counter() - start_time - - return BenchResult( - name=name, - total_requests=counter, - duration_sec=total_duration, - put_latencies_ms=put_latencies, - get_latencies_ms=get_latencies, - errors=errors, - ) - - -def print_results( - baseline_runs: list[BenchResult], - proxy_runs: list[BenchResult], - size_name: str, - size_bytes: int, -): - """Print comparison table with statistics from multiple runs.""" - - def avg(results: list[BenchResult], attr: str) -> float: - return mean(getattr(r, attr) for r in results) if results else 0 - - def std(results: list[BenchResult], attr: str) -> float: - if len(results) < 2: - return 0 - return stdev(getattr(r, attr) for r in results) - - def fmt_stat(results: list[BenchResult], attr: str, precision: int = 1) -> str: - """Format as 'avg ± std' or just 'avg' for single run.""" - a = avg(results, attr) - s = std(results, attr) - if s > 0: - return f"{a:.{precision}f} ± {s:.{precision}f}" - return f"{a:.{precision}f}" - - print() - print("=" * 75) - print(f" BENCHMARK RESULTS: {size_name} objects ({size_bytes:,} bytes)") - if len(baseline_runs) > 1: - print(f" ({len(baseline_runs)} runs, showing mean ± stddev)") - print("=" * 75) - print() - print(f"{'Metric':<25} {'Baseline (MinIO)':>23} {'S3Proxy':>23}") - print("-" * 75) - - # Requests - print(f"{'Requests/sec':<25} {fmt_stat(baseline_runs, 'rps'):>23} {fmt_stat(proxy_runs, 'rps'):>23}") - print(f"{'Total requests':<25} {sum(r.total_requests for r in baseline_runs):>23,} {sum(r.total_requests for r in proxy_runs):>23,}") - print(f"{'Errors':<25} {sum(r.errors for r in baseline_runs):>23} {sum(r.errors for r in proxy_runs):>23}") - print() - - # Latencies - print(f"{'PUT avg (ms)':<25} {fmt_stat(baseline_runs, 'put_avg_ms', 2):>23} {fmt_stat(proxy_runs, 'put_avg_ms', 2):>23}") - print(f"{'PUT p95 (ms)':<25} {fmt_stat(baseline_runs, 'put_p95_ms', 2):>23} {fmt_stat(proxy_runs, 'put_p95_ms', 2):>23}") - print(f"{'GET avg (ms)':<25} {fmt_stat(baseline_runs, 'get_avg_ms', 2):>23} {fmt_stat(proxy_runs, 'get_avg_ms', 2):>23}") - print(f"{'GET p95 (ms)':<25} {fmt_stat(baseline_runs, 'get_p95_ms', 2):>23} {fmt_stat(proxy_runs, 'get_p95_ms', 2):>23}") - print() - - # Calculate overhead - baseline_rps = avg(baseline_runs, 'rps') - proxy_rps = avg(proxy_runs, 'rps') - if baseline_rps > 0: - throughput_overhead = ((baseline_rps - proxy_rps) / baseline_rps) * 100 - print(f"{'Throughput overhead':<25} {throughput_overhead:>23.1f}%") - - baseline_put = avg(baseline_runs, 'put_avg_ms') - proxy_put = avg(proxy_runs, 'put_avg_ms') - if baseline_put > 0: - print(f"{'Added PUT latency':<25} {proxy_put - baseline_put:>22.2f}ms") - - baseline_get = avg(baseline_runs, 'get_avg_ms') - proxy_get = avg(proxy_runs, 'get_avg_ms') - if baseline_get > 0: - print(f"{'Added GET latency':<25} {proxy_get - baseline_get:>22.2f}ms") - - print("=" * 75) - print() - - -async def main(): - parser = argparse.ArgumentParser(description="S3Proxy Benchmark") - parser.add_argument( - "--size", - choices=list(SIZES.keys()), - default="small", - help="Object size to test (default: small)", - ) - parser.add_argument( - "--concurrent", - type=int, - default=10, - help="Number of concurrent requests (default: 10)", - ) - parser.add_argument( - "--duration", - type=int, - default=30, - help="Test duration in seconds (default: 30)", - ) - parser.add_argument( - "--runs", - type=int, - default=1, - help="Number of runs for statistical significance (default: 1)", - ) - parser.add_argument( - "--baseline-only", - action="store_true", - help="Only run baseline benchmark", - ) - parser.add_argument( - "--proxy-only", - action="store_true", - help="Only run proxy benchmark", - ) - args = parser.parse_args() - - size_bytes = SIZES[args.size] - test_data = os.urandom(size_bytes) - - print() - print("S3Proxy Benchmark") - print("-" * 40) - print(f" Object size: {args.size} ({size_bytes:,} bytes)") - print(f" Concurrency: {args.concurrent}") - print(f" Duration: {args.duration}s per run") - print(f" Runs: {args.runs}") - print(f" MinIO: {MINIO_ENDPOINT}") - print(f" S3Proxy: {PROXY_ENDPOINT}") - print() - - baseline_runs: list[BenchResult] = [] - proxy_runs: list[BenchResult] = [] - - for run_num in range(1, args.runs + 1): - if args.runs > 1: - print(f"--- Run {run_num}/{args.runs} ---") - - if not args.proxy_only: - print(f"Running baseline benchmark (direct MinIO)...") - result = await run_benchmark( - endpoint=MINIO_ENDPOINT, - name="Baseline (MinIO)", - data=test_data, - duration_sec=args.duration, - concurrency=args.concurrent, - ) - baseline_runs.append(result) - print(f" Completed: {result.total_requests:,} requests, {result.rps:.1f} req/s") - - if not args.baseline_only: - print(f"Running proxy benchmark (S3Proxy)...") - result = await run_benchmark( - endpoint=PROXY_ENDPOINT, - name="S3Proxy", - data=test_data, - duration_sec=args.duration, - concurrency=args.concurrent, - ) - proxy_runs.append(result) - print(f" Completed: {result.total_requests:,} requests, {result.rps:.1f} req/s") - - # Brief pause between runs - if run_num < args.runs: - await asyncio.sleep(1) - - if baseline_runs and proxy_runs: - print_results(baseline_runs, proxy_runs, args.size, size_bytes) - elif baseline_runs: - r = baseline_runs[0] - print(f"\nBaseline: {r.rps:.1f} req/s, PUT avg: {r.put_avg_ms:.2f}ms") - elif proxy_runs: - r = proxy_runs[0] - print(f"\nProxy: {r.rps:.1f} req/s, PUT avg: {r.put_avg_ms:.2f}ms") - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/benchmarks/docker-compose.yml b/benchmarks/docker-compose.yml deleted file mode 100644 index 398f332..0000000 --- a/benchmarks/docker-compose.yml +++ /dev/null @@ -1,106 +0,0 @@ -services: - redis: - image: redis:7-alpine - container_name: bench-redis - ports: - - "6379:6379" - healthcheck: - test: ["CMD", "redis-cli", "ping"] - interval: 2s - timeout: 2s - retries: 10 - - # Separate benchmark container - no resource contention with proxy - benchmark: - image: python:3.13-slim - container_name: bench-client - working_dir: /bench - volumes: - - ./bench.py:/bench/bench.py:ro - environment: - MINIO_ENDPOINT: http://minio:9000 - PROXY_ENDPOINT: http://s3proxy:4433 - AWS_ACCESS_KEY_ID: benchmarkadminuser - AWS_SECRET_ACCESS_KEY: benchmarkadminpassword - depends_on: - s3proxy: - condition: service_healthy - minio: - condition: service_healthy - # Install deps and sleep to keep container running - command: > - bash -c "pip install -q aioboto3 && tail -f /dev/null" - healthcheck: - test: ["CMD", "python", "-c", "import aioboto3"] - interval: 5s - timeout: 10s - retries: 10 - start_period: 15s - - minio: - image: minio/minio:latest - container_name: bench-minio - ports: - - "9000:9000" # S3 API (baseline tests hit this directly) - - "9001:9001" # Console - environment: - # Credentials must be 16+ chars for AWS SDK compatibility - MINIO_ROOT_USER: benchmarkadminuser - MINIO_ROOT_PASSWORD: benchmarkadminpassword - command: server /data --console-address ":9001" - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] - interval: 2s - timeout: 2s - retries: 10 - - s3proxy: - image: s3proxy:latest - build: - context: .. - dockerfile: Dockerfile - container_name: bench-s3proxy - ports: - - "8080:4433" # Proxy tests hit this - environment: - S3PROXY_HOST: http://minio:9000 - S3PROXY_REGION: us-east-1 - S3PROXY_ENCRYPT_KEY: benchmark-test-key-32-bytes-!! - AWS_ACCESS_KEY_ID: benchmarkadminuser - AWS_SECRET_ACCESS_KEY: benchmarkadminpassword - # Higher limits for benchmarking - S3PROXY_MAX_CONCURRENT_UPLOADS: "50" - S3PROXY_MAX_CONCURRENT_DOWNLOADS: "50" - S3PROXY_NO_TLS: "true" - S3PROXY_LOG_LEVEL: WARNING # Reduce log noise during benchmarks - S3PROXY_REDIS_URL: redis://redis:6379/0 - depends_on: - minio: - condition: service_healthy - redis: - condition: service_healthy - # Required for py-spy profiling - cap_add: - - SYS_PTRACE - mem_limit: 512m - memswap_limit: 512m - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:4433/readyz"] - interval: 2s - timeout: 5s - retries: 10 - - # Profiler container - runs py-spy against s3proxy - profiler: - image: python:3.13-slim - container_name: bench-profiler - pid: "service:s3proxy" # Share PID namespace to profile s3proxy - cap_add: - - SYS_PTRACE - volumes: - - ./results:/results - command: > - bash -c "apt-get update && apt-get install -y -qq procps && pip install -q py-spy && tail -f /dev/null" - depends_on: - s3proxy: - condition: service_healthy diff --git a/benchmarks/plot.py b/benchmarks/plot.py deleted file mode 100644 index ebf731a..0000000 --- a/benchmarks/plot.py +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env python3 -"""Generate benchmark comparison chart for README.""" - -import matplotlib.pyplot as plt -import numpy as np - -# Benchmark results (from latest run) -data = { - "Throughput\n(req/s)": (492.3, 199.6), - "PUT Latency\n(ms)": (14.9, 29.1), - "GET Latency\n(ms)": (5.6, 21.2), -} - -fig, ax = plt.subplots(figsize=(8, 3.5)) - -x = np.arange(len(data)) -width = 0.35 - -baseline = [v[0] for v in data.values()] -proxy = [v[1] for v in data.values()] - -bars1 = ax.bar(x - width/2, baseline, width, label='Direct (MinIO)', color='#4CAF50', alpha=0.85) -bars2 = ax.bar(x + width/2, proxy, width, label='S3Proxy', color='#2196F3', alpha=0.85) - -ax.set_ylabel('Value') -ax.set_xticks(x) -ax.set_xticklabels(data.keys()) -ax.legend(loc='upper right') -ax.set_title('S3Proxy Performance (64KB objects, 10 concurrent)', fontsize=11, fontweight='bold') - -# Add value labels on bars -for bar in bars1: - height = bar.get_height() - ax.annotate(f'{height:.0f}' if height > 10 else f'{height:.1f}', - xy=(bar.get_x() + bar.get_width() / 2, height), - xytext=(0, 3), textcoords="offset points", - ha='center', va='bottom', fontsize=9) - -for bar in bars2: - height = bar.get_height() - ax.annotate(f'{height:.0f}' if height > 10 else f'{height:.1f}', - xy=(bar.get_x() + bar.get_width() / 2, height), - xytext=(0, 3), textcoords="offset points", - ha='center', va='bottom', fontsize=9) - -# Add overhead annotation -ax.annotate('~60% overhead\n(extra network hop + encryption)', - xy=(0, 350), fontsize=9, color='#666', style='italic') - -plt.tight_layout() -plt.savefig('results/benchmark.png', dpi=150, bbox_inches='tight', - facecolor='white', edgecolor='none') -plt.savefig('results/benchmark.svg', bbox_inches='tight', - facecolor='white', edgecolor='none') -print("Saved: results/benchmark.png and results/benchmark.svg") diff --git a/benchmarks/profile.sh b/benchmarks/profile.sh deleted file mode 100755 index 2205624..0000000 --- a/benchmarks/profile.sh +++ /dev/null @@ -1,276 +0,0 @@ -#!/usr/bin/env bash -# -# S3Proxy Profiler -# -# Profiles the S3Proxy during a benchmark run using py-spy. -# Generates flame graphs showing where time is spent. -# -# Usage: -# ./benchmarks/profile.sh # Profile with default settings -# ./benchmarks/profile.sh --duration 30 # Profile for 30 seconds -# -# Requirements: -# - Docker & Docker Compose -# -# Output: -# - benchmarks/results/flamegraph.svg # Interactive flame graph -# - benchmarks/results/profile.txt # Top functions by time -# - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -# Colors -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -CYAN='\033[0;36m' -NC='\033[0m' -BOLD='\033[1m' - -# Default configuration -DURATION="20" -CONCURRENT="20" -SIZE="small" - -# Parse arguments -while [[ $# -gt 0 ]]; do - case $1 in - --duration|-d) - DURATION="$2" - shift 2 - ;; - --concurrent|-c) - CONCURRENT="$2" - shift 2 - ;; - --size|-s) - SIZE="$2" - shift 2 - ;; - --help|-h) - echo "S3Proxy Profiler" - echo "" - echo "Usage: $0 [OPTIONS]" - echo "" - echo "Options:" - echo " --duration, -d SEC Profile duration in seconds (default: 20)" - echo " --concurrent, -c NUM Concurrent requests during profile (default: 20)" - echo " --size, -s SIZE Object size: tiny, small, medium, large (default: small)" - echo " --help, -h Show this help" - echo "" - echo "Output:" - echo " benchmarks/results/flamegraph.svg - Interactive flame graph" - echo " benchmarks/results/profile.speedscope.json - Detailed profile (speedscope.app)" - echo " benchmarks/results/profile.txt - Text summary" - exit 0 - ;; - *) - echo "Unknown option: $1" - exit 1 - ;; - esac -done - -log() { - echo -e "${BLUE}[$(date '+%H:%M:%S')]${NC} $1" -} - -log_success() { - echo -e "${GREEN}✓${NC} $1" -} - -header() { - echo "" - echo -e "${BOLD}${CYAN}═══════════════════════════════════════════════════════════════${NC}" - echo -e "${BOLD}${CYAN} $1${NC}" - echo -e "${BOLD}${CYAN}═══════════════════════════════════════════════════════════════${NC}" - echo "" -} - -# Cleanup -cleanup() { - log "Stopping services..." - cd "$SCRIPT_DIR" - docker compose down -v 2>/dev/null || true -} - -main() { - header "S3PROXY PROFILER" - - echo "Configuration:" - echo " Duration: ${DURATION}s" - echo " Concurrency: ${CONCURRENT}" - echo " Object Size: ${SIZE}" - echo "" - - # Create results directory - mkdir -p "$SCRIPT_DIR/results" - - # Trap cleanup - trap cleanup EXIT - - log "Starting services..." - cd "$SCRIPT_DIR" - docker compose down -v 2>/dev/null || true - docker compose up -d --build --wait - - # Wait for profiler container to have py-spy and procps installed - log "Waiting for profiler to be ready (installing py-spy + procps)..." - until docker exec bench-profiler py-spy --version >/dev/null 2>&1; do - sleep 2 - done - log_success "Profiler ready" - - # Wait for benchmark client - log "Waiting for benchmark client..." - until docker exec bench-client python -c "import aioboto3" 2>/dev/null; do - sleep 1 - done - log_success "Benchmark client ready" - - # Find the Python process PID in the s3proxy container - log "Finding S3Proxy process..." - - # Wait for pgrep to be available - until docker exec bench-profiler which pgrep >/dev/null 2>&1; do - sleep 1 - done - - PID=$(docker exec bench-profiler pgrep -f "uvicorn" 2>/dev/null | head -1 || echo "") - if [[ -z "$PID" ]]; then - PID=$(docker exec bench-profiler pgrep -f "python" 2>/dev/null | head -1 || echo "") - fi - - if [[ -z "$PID" || ! "$PID" =~ ^[0-9]+$ ]]; then - echo -e "${RED}Could not find S3Proxy process${NC}" - exit 1 - fi - log_success "Found S3Proxy process: PID $PID" - - header "PROFILING" - - # Start py-spy recording in background - log "Starting py-spy profiler (recording for ${DURATION}s)..." - docker exec -d bench-profiler py-spy record \ - --pid "$PID" \ - --duration "$DURATION" \ - --format speedscope \ - --output /results/profile.speedscope.json \ - --subprocesses - - # Also record SVG flame graph - docker exec -d bench-profiler py-spy record \ - --pid "$PID" \ - --duration "$DURATION" \ - --format flamegraph \ - --output /results/flamegraph.svg \ - --subprocesses - - # Give py-spy a moment to attach - sleep 2 - - # Run benchmark (proxy only, to focus profiling) - log "Running benchmark to generate load..." - docker exec bench-client python /bench/bench.py \ - --size "$SIZE" \ - --concurrent "$CONCURRENT" \ - --duration "$((DURATION - 2))" \ - --proxy-only \ - --runs 1 - - # Wait for py-spy to finish - log "Waiting for profiler to complete..." - sleep 3 - - header "PROFILING COMPLETE" - - # Wait a bit more for files to be written - sleep 2 - - # Generate text summary from speedscope JSON - if [[ -f "$SCRIPT_DIR/results/profile.speedscope.json" ]]; then - log "Generating text summary..." - python3 << 'PYEOF' > "$SCRIPT_DIR/results/profile.txt" -import json -from collections import defaultdict - -with open('results/profile.speedscope.json') as f: - data = json.load(f) - -frames = data.get('shared', {}).get('frames', []) -profiles = data.get('profiles', []) - -# Aggregate time per function -frame_times = defaultdict(float) -total_time = 0 - -for profile in profiles: - if profile.get('type') == 'sampled': - samples = profile.get('samples', []) - weights = profile.get('weights', []) - for sample, weight in zip(samples, weights): - total_time += weight - for frame_idx in sample: - if frame_idx < len(frames): - name = frames[frame_idx].get('name', f'frame_{frame_idx}') - file = frames[frame_idx].get('file', '') - short_file = file.split('/')[-1] if file else '' - key = f"{name} ({short_file})" if short_file else name - frame_times[key] += weight - -# Sort by time -sorted_times = sorted(frame_times.items(), key=lambda x: -x[1]) - -print("=" * 70) -print(" S3PROXY PROFILE SUMMARY") -print("=" * 70) -print() -print("Top 30 functions by CPU time:") -print() -print(f"{'Function':<50} {'Time':>8} {'%':>8}") -print("-" * 70) - -for name, time_us in sorted_times[:30]: - pct = (time_us / total_time * 100) if total_time > 0 else 0 - time_ms = time_us / 1000 - short_name = name[:48] + ".." if len(name) > 50 else name - print(f"{short_name:<50} {time_ms:>7.1f}ms {pct:>7.1f}%") - -print() -print("=" * 70) -print() - -# S3Proxy specific breakdown -print("S3Proxy breakdown:") -print() -s3proxy_funcs = [(n, t) for n, t in sorted_times if any(x in n.lower() for x in ['s3proxy', 'crypto', 'encrypt', 'decrypt', 'handler', 'objects.py', 'buckets.py', 'main.py', 's3client'])] -for name, time_us in s3proxy_funcs[:20]: - pct = (time_us / total_time * 100) if total_time > 0 else 0 - print(f" {pct:5.1f}% {name}") -PYEOF - log_success "Text summary generated" - fi - - echo "" - echo "Results saved to:" - echo " - ${SCRIPT_DIR}/results/flamegraph.svg" - echo " Open in browser for interactive flame graph" - echo "" - echo " - ${SCRIPT_DIR}/results/profile.speedscope.json" - echo " Open at https://speedscope.app for detailed analysis" - echo "" - echo " - ${SCRIPT_DIR}/results/profile.txt" - echo " Text summary of top functions" - echo "" - - # Show summary - if [[ -f "$SCRIPT_DIR/results/profile.txt" ]]; then - echo "Quick summary:" - head -40 "$SCRIPT_DIR/results/profile.txt" - fi -} - -main diff --git a/benchmarks/results/.gitignore b/benchmarks/results/.gitignore deleted file mode 100644 index 84a0821..0000000 --- a/benchmarks/results/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -# Profile results (generated) -*.svg -*.json -*.txt -!.gitkeep diff --git a/benchmarks/results/.gitkeep b/benchmarks/results/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/benchmarks/results/benchmark.png b/benchmarks/results/benchmark.png deleted file mode 100644 index 1926277..0000000 Binary files a/benchmarks/results/benchmark.png and /dev/null differ diff --git a/benchmarks/run.sh b/benchmarks/run.sh deleted file mode 100755 index c90fa0b..0000000 --- a/benchmarks/run.sh +++ /dev/null @@ -1,198 +0,0 @@ -#!/usr/bin/env bash -# -# S3Proxy Benchmark Runner -# -# Runs benchmarks comparing direct MinIO access vs S3Proxy (with encryption). -# This shows the performance overhead of the encryption proxy. -# -# Usage: -# ./benchmarks/run.sh # Run all benchmarks -# ./benchmarks/run.sh --quick # Quick run (10s, fewer concurrent) -# ./benchmarks/run.sh --size small # Specific object size -# ./benchmarks/run.sh --help # Show help -# -# Requirements: -# - Docker & Docker Compose -# - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" - -# Colors -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -CYAN='\033[0;36m' -NC='\033[0m' # No Color -BOLD='\033[1m' - -# Default configuration -DURATION="30" -CONCURRENT="10" -SIZES="small" -RUNS="3" -QUICK_MODE=false - -# Parse arguments -while [[ $# -gt 0 ]]; do - case $1 in - --quick|-q) - QUICK_MODE=true - DURATION="10" - CONCURRENT="5" - SIZES="small" - RUNS="1" - shift - ;; - --duration|-d) - DURATION="$2" - shift 2 - ;; - --concurrent|-c) - CONCURRENT="$2" - shift 2 - ;; - --size|-s) - SIZES="$2" - shift 2 - ;; - --runs|-r) - RUNS="$2" - shift 2 - ;; - --help|-h) - echo "S3Proxy Benchmark Runner" - echo "" - echo "Usage: $0 [OPTIONS]" - echo "" - echo "Options:" - echo " --quick, -q Quick run (10s, 5 concurrent, 1 run)" - echo " --duration, -d SEC Test duration per run in seconds (default: 30)" - echo " --concurrent, -c NUM Concurrent requests (default: 10)" - echo " --runs, -r NUM Number of runs for statistics (default: 3)" - echo " --size, -s SIZE Object sizes: tiny, small, medium, large (default: small)" - echo " --help, -h Show this help" - echo "" - echo "Examples:" - echo " $0 # Default: 3 runs, 30s each, 10 concurrent" - echo " $0 --quick # Quick smoke test (1 run, 10s)" - echo " $0 --concurrent 50 --duration 60 --runs 5 # High load test" - echo " $0 --size large # Test large objects (10MB)" - exit 0 - ;; - *) - echo "Unknown option: $1" - exit 1 - ;; - esac -done - -log() { - echo -e "${BLUE}[$(date '+%H:%M:%S')]${NC} $1" -} - -log_success() { - echo -e "${GREEN}✓${NC} $1" -} - -log_error() { - echo -e "${RED}✗${NC} $1" -} - -header() { - echo "" - echo -e "${BOLD}${CYAN}═══════════════════════════════════════════════════════════════${NC}" - echo -e "${BOLD}${CYAN} $1${NC}" - echo -e "${BOLD}${CYAN}═══════════════════════════════════════════════════════════════${NC}" - echo "" -} - -# Check dependencies -check_deps() { - log "Checking dependencies..." - - if ! command -v docker &> /dev/null; then - log_error "Docker is not installed" - exit 1 - fi - - log_success "Dependencies OK" -} - -# Start services -start_services() { - log "Starting benchmark services..." - - cd "$SCRIPT_DIR" - docker compose down -v 2>/dev/null || true - # Only start services needed for benchmark (not profiler) - docker compose up -d --build --wait redis minio s3proxy benchmark - - log_success "Services running" - echo " - MinIO (baseline): http://localhost:9000" - echo " - S3Proxy: http://localhost:8080" -} - -# Stop services -stop_services() { - log "Stopping services..." - cd "$SCRIPT_DIR" - docker compose down -v 2>/dev/null || true - log_success "Services stopped" -} - -# Run benchmark using separate benchmark container (no resource contention) -run_benchmark() { - local size="$1" - - log "Running benchmark: $size objects, ${CONCURRENT} concurrent, ${DURATION}s x ${RUNS} runs" - - # Run from separate benchmark container (bench-client) - # This avoids resource contention with the proxy - docker exec bench-client python /bench/bench.py \ - --size "$size" \ - --concurrent "$CONCURRENT" \ - --duration "$DURATION" \ - --runs "$RUNS" -} - -# Main -main() { - header "S3PROXY BENCHMARK" - - echo "Configuration:" - echo " Duration: ${DURATION}s per run" - echo " Runs: ${RUNS}" - echo " Concurrency: ${CONCURRENT}" - echo " Object Size: ${SIZES}" - if [[ "$QUICK_MODE" == "true" ]]; then - echo -e " Mode: ${YELLOW}QUICK${NC}" - fi - - check_deps - - # Trap to ensure cleanup on exit - trap stop_services EXIT - - start_services - - # Wait for benchmark container to be ready (deps installed) - log "Waiting for benchmark client to be ready..." - until docker exec bench-client python -c "import aioboto3" 2>/dev/null; do - sleep 1 - done - log_success "Benchmark client ready" - - # Run benchmarks for each size - for size in $SIZES; do - header "Testing: $size objects" - run_benchmark "$size" - done - - header "BENCHMARK COMPLETE" -} - -main diff --git a/e2e/cluster.sh b/e2e/cluster.sh new file mode 100755 index 0000000..e5ee0da --- /dev/null +++ b/e2e/cluster.sh @@ -0,0 +1,280 @@ +#!/bin/bash +set -e + +COMPOSE_FILE="e2e/docker-compose.cluster.yml" + +case "${1:-run}" in + run) + echo "Starting cluster test..." + echo "" + # Start and follow logs until cluster is ready + docker compose -f $COMPOSE_FILE up --build -d + # Stream logs, exit when we see "Cluster is ready" + ( docker compose -f $COMPOSE_FILE logs -f & ) | while read -r line; do + echo "$line" + if echo "$line" | grep -q "Cluster is ready"; then + break + fi + done + echo "" + echo "==========================================" + echo "Cluster is running in background." + echo "Use './e2e/cluster.sh shell' to interact." + echo "Use 'make clean' when done." + echo "==========================================" + ;; + shell) + echo "Opening shell in test container..." + docker compose -f $COMPOSE_FILE exec helm-test sh + ;; + logs) + echo "Showing pod logs..." + docker compose -f $COMPOSE_FILE exec helm-test kubectl logs -l app=s3proxy-python -n s3proxy -f + ;; + status) + echo "Checking deployment status..." + docker compose -f $COMPOSE_FILE exec helm-test kubectl get all -n s3proxy + ;; + load-test) + echo "Running S3 load test (3 concurrent 10MB uploads)..." + docker compose -f $COMPOSE_FILE exec helm-test sh -c ' + # Get pod names for load balancing verification + PODS=$(kubectl get pods -n s3proxy -l app=s3proxy-python -o jsonpath="{.items[*].metadata.name}") + POD_COUNT=$(echo $PODS | wc -w) + echo "Found $POD_COUNT s3proxy pods: $PODS" + + # Save current log line counts + mkdir -p /tmp/lb-test + for pod in $PODS; do + kubectl logs $pod -n s3proxy 2>/dev/null | wc -l > /tmp/lb-test/$pod.start + done + + echo "=== Creating test pod with AWS CLI ===" + kubectl run s3-load-test -n s3proxy --rm -i --restart=Never \ + --image=amazon/aws-cli:latest \ + --env="AWS_ACCESS_KEY_ID=minioadmin" \ + --env="AWS_SECRET_ACCESS_KEY=minioadmin" \ + --env="AWS_DEFAULT_REGION=us-east-1" \ + --command -- /bin/sh -c " + # Create test bucket + echo \"Creating test bucket...\" + aws --endpoint-url http://s3-gateway.s3proxy s3 mb s3://load-test-bucket 2>/dev/null || true + + # Generate 3 random 10MB files (small for CI, still tests full flow) + echo \"Generating 10MB test files...\" + mkdir -p /tmp/testfiles + for i in 1 2 3; do + dd if=/dev/urandom of=/tmp/testfiles/file-\$i.bin bs=1M count=10 2>/dev/null & + done + wait + echo \"Files generated\" + ls -lh /tmp/testfiles/ + + # Upload concurrently + echo \"\" + echo \"=== Starting concurrent uploads ===\" + START=\$(date +%s) + + for i in 1 2 3; do + aws --endpoint-url http://s3-gateway.s3proxy s3 cp /tmp/testfiles/file-\$i.bin s3://load-test-bucket/file-\$i.bin & + done + wait + + END=\$(date +%s) + DURATION=\$((END - START)) + echo \"\" + echo \"=== Upload complete in \${DURATION}s ===\" + + # Verify uploads + echo \"\" + echo \"=== Listing uploaded files ===\" + aws --endpoint-url http://s3-gateway.s3proxy s3 ls s3://load-test-bucket/ + + # Download and verify + echo \"\" + echo \"=== Downloading files to verify ===\" + mkdir -p /tmp/downloads + for i in 1 2 3; do + aws --endpoint-url http://s3-gateway.s3proxy s3 cp s3://load-test-bucket/file-\$i.bin /tmp/downloads/file-\$i.bin & + done + wait + + echo \"\" + echo \"=== Comparing checksums ===\" + md5sum /tmp/testfiles/*.bin > /tmp/orig.md5 + md5sum /tmp/downloads/*.bin > /tmp/down.md5 + + ORIG_SUMS=\$(cat /tmp/orig.md5 | while read sum name; do echo \$sum; done | sort) + DOWN_SUMS=\$(cat /tmp/down.md5 | while read sum name; do echo \$sum; done | sort) + + cat /tmp/orig.md5 + echo \"\" + if [ \"\$ORIG_SUMS\" = \"\$DOWN_SUMS\" ]; then + echo \"✓ Checksums match - round-trip successful\" + else + echo \"Checksum mismatch!\" + exit 1 + fi + + # Verify encryption by reading raw data from MinIO directly + echo \"\" + echo \"=== Verifying encryption (reading raw from MinIO) ===\" + + # Create a small test file with known content + echo \"Creating 100KB test file...\" + dd if=/dev/urandom of=/tmp/encrypt-test.bin bs=1K count=100 2>/dev/null + ORIG_SIZE=\$(stat -c%s /tmp/encrypt-test.bin 2>/dev/null || stat -f%z /tmp/encrypt-test.bin) + ORIG_MD5=\$(md5sum /tmp/encrypt-test.bin | cut -c1-32) + echo \"Original: \${ORIG_SIZE} bytes, MD5: \$ORIG_MD5\" + + # Upload through s3proxy (gets encrypted) + aws --endpoint-url http://s3-gateway.s3proxy s3 cp /tmp/encrypt-test.bin s3://load-test-bucket/encrypt-test.bin + + # Download raw from MinIO directly (bypassing s3proxy decryption) + echo \"Downloading raw encrypted data from MinIO...\" + mkdir -p /tmp/raw + aws --endpoint-url http://minio:9000 s3 cp s3://load-test-bucket/encrypt-test.bin /tmp/raw/encrypt-test.bin 2>/dev/null || true + + if [ -f /tmp/raw/encrypt-test.bin ]; then + RAW_SIZE=\$(stat -c%s /tmp/raw/encrypt-test.bin 2>/dev/null || stat -f%z /tmp/raw/encrypt-test.bin) + RAW_MD5=\$(md5sum /tmp/raw/encrypt-test.bin | cut -c1-32) + echo \"Raw: \${RAW_SIZE} bytes, MD5: \$RAW_MD5\" + + # AES-256-GCM adds exactly 28 bytes: 12-byte nonce + 16-byte auth tag + EXPECTED_SIZE=\$((ORIG_SIZE + 28)) + + if [ \"\$RAW_SIZE\" = \"\$EXPECTED_SIZE\" ] && [ \"\$ORIG_MD5\" != \"\$RAW_MD5\" ]; then + echo \"✓ ENCRYPTION VERIFIED:\" + echo \" - Size increased by 28 bytes (12B nonce + 16B GCM tag)\" + echo \" - Content differs from original\" + + # Also verify decryption works + aws --endpoint-url http://s3-gateway.s3proxy s3 cp s3://load-test-bucket/encrypt-test.bin /tmp/decrypted.bin + DEC_SIZE=\$(stat -c%s /tmp/decrypted.bin 2>/dev/null || stat -f%z /tmp/decrypted.bin) + DEC_MD5=\$(md5sum /tmp/decrypted.bin | cut -c1-32) + echo \"Decrypted: \${DEC_SIZE} bytes, MD5: \$DEC_MD5\" + + if [ \"\$ORIG_SIZE\" = \"\$DEC_SIZE\" ] && [ \"\$ORIG_MD5\" = \"\$DEC_MD5\" ]; then + echo \"✓ DECRYPTION VERIFIED - Size and content match original\" + else + echo \"✗ Decryption failed - data corrupted\" + exit 1 + fi + elif [ \"\$RAW_SIZE\" != \"\$EXPECTED_SIZE\" ]; then + echo \"✗ ENCRYPTION FAILED - Expected \$EXPECTED_SIZE bytes, got \$RAW_SIZE\" + echo \" (Should be original + 28 bytes for AES-GCM overhead)\" + exit 1 + else + echo \"✗ ENCRYPTION FAILED - Raw data matches original\" + exit 1 + fi + else + echo \"Could not read raw data from MinIO (bucket may have different name)\" + echo \"Skipping raw encryption verification\" + fi + " + + LOAD_TEST_EXIT=$? + if [ $LOAD_TEST_EXIT -ne 0 ]; then + echo "✗ Load test failed with exit code $LOAD_TEST_EXIT" + exit 1 + fi + + # Verify load balancing + echo "" + echo "=== Checking load balancing ===" + sleep 2 + PODS_HIT=0 + for pod in $PODS; do + START_LINE=$(cat /tmp/lb-test/$pod.start 2>/dev/null || echo "0") + REQUEST_COUNT=$(kubectl logs $pod -n s3proxy 2>/dev/null | tail -n +$((START_LINE + 1)) | grep -c -E "GET|POST|PUT|HEAD" || echo "0") + if [ "$REQUEST_COUNT" -gt 0 ]; then + PODS_HIT=$((PODS_HIT + 1)) + echo "✓ Pod $pod: received $REQUEST_COUNT requests" + else + echo " Pod $pod: received 0 requests" + fi + done + rm -rf /tmp/lb-test + + if [ "$PODS_HIT" -ge 2 ]; then + echo "✓ Load balancing verified - traffic distributed across $PODS_HIT pods" + else + echo "⚠ Traffic went to only $PODS_HIT pod(s)" + fi + ' + ;; + watch) + echo "Watching pod resource usage (Ctrl+C to stop)..." + docker compose -f $COMPOSE_FILE exec helm-test sh -c ' + # Check if metrics-server is installed + if ! kubectl get deployment metrics-server -n kube-system >/dev/null 2>&1; then + echo "Installing metrics-server..." + kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml >/dev/null 2>&1 + kubectl patch deployment metrics-server -n kube-system --type=json -p="[{\"op\": \"add\", \"path\": \"/spec/template/spec/containers/0/args/-\", \"value\": \"--kubelet-insecure-tls\"}]" >/dev/null 2>&1 + echo "Waiting for metrics-server to be ready..." + sleep 30 + fi + # Loop to show live updates + while true; do + clear + date + echo "" + kubectl top pods -n s3proxy 2>/dev/null || echo "Waiting for metrics..." + sleep 2 + done + ' + ;; + redis) + echo "Inspecting Redis state..." + docker compose -f $COMPOSE_FILE exec helm-test sh -c ' + kubectl run redis-cli -n s3proxy --rm -it --restart=Never \ + --image=redis:7-alpine \ + --command -- sh -c " + echo \"=== Redis Keys ===\" + redis-cli -h s3proxy-redis-ha-haproxy KEYS \"*\" + echo \"\" + echo \"=== Redis Info ===\" + redis-cli -h s3proxy-redis-ha-haproxy INFO keyspace + redis-cli -h s3proxy-redis-ha-haproxy INFO memory | grep used_memory_human + redis-cli -h s3proxy-redis-ha-haproxy INFO clients | grep connected_clients + " + ' + ;; + pods) + echo "Showing pod details..." + docker compose -f $COMPOSE_FILE exec helm-test sh -c ' + echo "=== Pod Status ===" + kubectl get pods -n s3proxy -o wide + echo "" + echo "=== Pod Resource Requests/Limits ===" + kubectl get pods -n s3proxy -o custom-columns="NAME:.metadata.name,CPU_REQ:.spec.containers[0].resources.requests.cpu,CPU_LIM:.spec.containers[0].resources.limits.cpu,MEM_REQ:.spec.containers[0].resources.requests.memory,MEM_LIM:.spec.containers[0].resources.limits.memory" + echo "" + echo "=== Recent Events ===" + kubectl get events -n s3proxy --sort-by=.lastTimestamp | tail -10 + ' + ;; + cleanup) + echo "Cleaning up..." + # Stop compose containers + docker compose -f $COMPOSE_FILE down -v 2>/dev/null || true + # Delete Kind cluster containers directly + docker rm -f s3proxy-test-control-plane 2>/dev/null || true + # Clean up Kind network + docker network rm kind 2>/dev/null || true + echo "Cleanup complete" + ;; + *) + echo "Usage: $0 " + echo "" + echo "Commands:" + echo " run - Deploy Kind cluster and s3proxy" + echo " load-test - Run 30MB upload test + verify load balancing" + echo " status - Show deployment status" + echo " pods - Show pod details and resources" + echo " logs - Stream s3proxy logs" + echo " shell - Interactive kubectl shell" + echo " cleanup - Delete cluster and clean up" + exit 1 + ;; +esac diff --git a/e2e/docker-compose.helm-test.yml b/e2e/docker-compose.cluster.yml similarity index 70% rename from e2e/docker-compose.helm-test.yml rename to e2e/docker-compose.cluster.yml index ee84199..3145e90 100644 --- a/e2e/docker-compose.helm-test.yml +++ b/e2e/docker-compose.cluster.yml @@ -79,12 +79,76 @@ services: done kubectl get nodes + echo "=== Installing NGINX Ingress Controller ===" + helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx + helm repo update + + # Install NGINX Controller with settings optimized for Kind + helm upgrade --install ingress-nginx ingress-nginx/ingress-nginx \ + --namespace ingress-nginx --create-namespace \ + --set controller.service.type=ClusterIP \ + --set controller.admissionWebhooks.enabled=false \ + --wait --timeout 300s + + echo "✓ Ingress Controller installed" + echo "=== Building s3proxy image ===" - docker build -t s3proxy:latest /app + # Skip if image already exists (pre-built in CI) + if docker image inspect s3proxy:latest >/dev/null 2>&1; then + echo "Image already exists, skipping build" + else + docker build -t s3proxy:latest /app + fi echo "=== Loading image into kind ===" kind load docker-image s3proxy:latest --name s3proxy-test + echo "=== Deploying MinIO for testing ===" + kubectl create namespace s3proxy 2>/dev/null || true + cat </dev/null || true + docker rm -f s3proxy-test-control-plane 2>/dev/null || true + docker network rm kind 2>/dev/null || true +} + +trap cleanup EXIT INT TERM + +echo "Starting cluster test (auto-cleanup on exit)..." +echo "" + +docker compose -f $COMPOSE_FILE up --build -d + +echo "Waiting for cluster..." +( docker compose -f $COMPOSE_FILE logs -f & ) | while read -r line; do + echo "$line" + if echo "$line" | grep -q "Cluster is ready"; then + break + fi +done + +echo "" +echo "Running load test..." +echo "" + +$SCRIPT_DIR/cluster.sh load-test + +echo "" +echo "✓ Tests passed!" diff --git a/e2e/test-e2e-fast.sh b/e2e/test-e2e-fast.sh index 7ee33b1..9323f40 100755 --- a/e2e/test-e2e-fast.sh +++ b/e2e/test-e2e-fast.sh @@ -577,16 +577,24 @@ test_8_list_filtering() { aws s3 cp "$TEST_DIR/manifest.txt" "s3://$bucket/backups/manifest" --endpoint-url $PROXY_URL >/dev/null aws s3 cp "$TEST_DIR/size-1048576.bin" "s3://$bucket/backups/large.bin" --endpoint-url $PROXY_URL >/dev/null - # Inject .s3proxy-meta directly - print_step "Injecting metadata file directly to MinIO..." + # Inject legacy .s3proxy-meta directly + print_step "Injecting legacy metadata file directly to MinIO..." echo "meta" > "$TEST_DIR/meta.txt" aws s3 cp "$TEST_DIR/meta.txt" "s3://$bucket/backups/injected.s3proxy-meta" --endpoint-url $MINIO_URL >/dev/null + # Inject new internal prefix metadata + print_step "Injecting internal prefix metadata file directly to MinIO..." + aws s3 cp "$TEST_DIR/meta.txt" "s3://$bucket/.s3proxy-internal/backups/test.meta" --endpoint-url $MINIO_URL >/dev/null + # Check filtering print_step "Verifying metadata files are hidden..." - local proxy_listing=$(aws s3 ls "s3://$bucket/backups/" --endpoint-url $PROXY_URL 2>/dev/null || echo "") - if echo "$proxy_listing" | grep -q "\.s3proxy-meta"; then - print_error ".s3proxy-meta visible through proxy!" + local proxy_listing=$(aws s3 ls "s3://$bucket/" --recursive --endpoint-url $PROXY_URL 2>/dev/null || echo "") + local has_legacy=$(echo "$proxy_listing" | grep -q "\.s3proxy-meta" && echo "yes" || echo "no") + local has_internal=$(echo "$proxy_listing" | grep -q "\.s3proxy-internal" && echo "yes" || echo "no") + + if [ "$has_legacy" = "yes" ] || [ "$has_internal" = "yes" ]; then + [ "$has_legacy" = "yes" ] && print_error ".s3proxy-meta visible through proxy!" + [ "$has_internal" = "yes" ] && print_error ".s3proxy-internal/ visible through proxy!" echo "FAIL" > "$RESULT_DIR/test8" end_test "8" "FAIL" else diff --git a/e2e/test-helm-with-load.sh b/e2e/test-helm-with-load.sh deleted file mode 100755 index c6e3903..0000000 --- a/e2e/test-helm-with-load.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -COMPOSE_FILE="e2e/docker-compose.helm-test.yml" - -echo "Starting containerized Helm test with load-test..." -echo "This will start the cluster, run load tests, then cleanup" -echo "" - -# Start cluster in detached mode -docker compose -f $COMPOSE_FILE up --build -d - -# Stream logs until cluster is ready -echo "Waiting for cluster to be ready..." -( docker compose -f $COMPOSE_FILE logs -f & ) | while read -r line; do - echo "$line" - if echo "$line" | grep -q "Cluster is ready"; then - break - fi -done - -echo "" -echo "==========================================" -echo "Running load test..." -echo "==========================================" -echo "" - -# Run load test using the shared script -$SCRIPT_DIR/test-helm.sh load-test -TEST_EXIT=$? - -echo "" -echo "==========================================" -echo "Cleaning up..." -echo "==========================================" -echo "" - -docker compose -f $COMPOSE_FILE down -v 2>&1 | grep -v "Resource is still in use" - -if [ $TEST_EXIT -eq 0 ]; then - echo "" - echo "✓ All tests completed successfully!" - exit 0 -else - echo "" - echo "✗ Tests failed!" - exit 1 -fi diff --git a/e2e/test-helm.sh b/e2e/test-helm.sh deleted file mode 100755 index 4302512..0000000 --- a/e2e/test-helm.sh +++ /dev/null @@ -1,185 +0,0 @@ -#!/bin/bash -set -e - -COMPOSE_FILE="e2e/docker-compose.helm-test.yml" - -case "${1:-run}" in - run) - echo "Starting containerized Helm test..." - echo "" - # Start and follow logs until cluster is ready - docker compose -f $COMPOSE_FILE up --build -d - # Stream logs, exit when we see "Cluster is ready" - ( docker compose -f $COMPOSE_FILE logs -f & ) | while read -r line; do - echo "$line" - if echo "$line" | grep -q "Cluster is ready"; then - break - fi - done - echo "" - echo "==========================================" - echo "Cluster is running in background." - echo "Use 'make helm-shell' to interact." - echo "Use 'make helm-cleanup' when done." - echo "==========================================" - ;; - shell) - echo "Opening shell in test container..." - docker compose -f $COMPOSE_FILE exec helm-test sh - ;; - logs) - echo "Showing pod logs..." - docker compose -f $COMPOSE_FILE exec helm-test kubectl logs -l app=s3proxy-python -n s3proxy -f - ;; - status) - echo "Checking deployment status..." - docker compose -f $COMPOSE_FILE exec helm-test kubectl get all -n s3proxy - ;; - load-test) - echo "Running S3 load test (3 concurrent 512MB uploads)..." - docker compose -f $COMPOSE_FILE exec helm-test sh -c ' - echo "=== Creating test pod with AWS CLI ===" - kubectl run s3-load-test -n s3proxy --rm -it --restart=Never \ - --image=amazon/aws-cli:latest \ - --env="AWS_ACCESS_KEY_ID=minioadmin" \ - --env="AWS_SECRET_ACCESS_KEY=minioadmin" \ - --env="AWS_DEFAULT_REGION=us-east-1" \ - --command -- /bin/sh -c " - # Create test bucket - echo \"Creating test bucket...\" - aws --endpoint-url http://s3proxy-python:4433 s3 mb s3://load-test-bucket 2>/dev/null || true - - # Generate 3 random 512MB files - echo \"Generating 512MB test files...\" - mkdir -p /tmp/testfiles - for i in 1 2 3; do - dd if=/dev/urandom of=/tmp/testfiles/file-\$i.bin bs=1M count=512 2>/dev/null & - done - wait - echo \"Files generated\" - ls -lh /tmp/testfiles/ - - # Upload concurrently - echo \"\" - echo \"=== Starting concurrent uploads ===\" - START=\$(date +%s) - - for i in 1 2 3; do - aws --endpoint-url http://s3proxy-python:4433 s3 cp /tmp/testfiles/file-\$i.bin s3://load-test-bucket/file-\$i.bin & - done - wait - - END=\$(date +%s) - DURATION=\$((END - START)) - echo \"\" - echo \"=== Upload complete in \${DURATION}s ===\" - - # Verify uploads - echo \"\" - echo \"=== Listing uploaded files ===\" - aws --endpoint-url http://s3proxy-python:4433 s3 ls s3://load-test-bucket/ - - # Download and verify - echo \"\" - echo \"=== Downloading files to verify ===\" - mkdir -p /tmp/downloads - for i in 1 2 3; do - aws --endpoint-url http://s3proxy-python:4433 s3 cp s3://load-test-bucket/file-\$i.bin /tmp/downloads/file-\$i.bin & - done - wait - - echo \"\" - echo \"=== Comparing checksums ===\" - md5sum /tmp/testfiles/*.bin > /tmp/orig.md5 - md5sum /tmp/downloads/*.bin > /tmp/down.md5 - - ORIG_SUMS=\$(cat /tmp/orig.md5 | while read sum name; do echo \$sum; done | sort) - DOWN_SUMS=\$(cat /tmp/down.md5 | while read sum name; do echo \$sum; done | sort) - - cat /tmp/orig.md5 - echo \"\" - if [ \"\$ORIG_SUMS\" = \"\$DOWN_SUMS\" ]; then - echo \"All checksums match - encryption/decryption working!\" - else - echo \"Checksum mismatch!\" - exit 1 - fi - " - ' - ;; - watch) - echo "Watching pod resource usage (Ctrl+C to stop)..." - docker compose -f $COMPOSE_FILE exec helm-test sh -c ' - # Check if metrics-server is installed - if ! kubectl get deployment metrics-server -n kube-system >/dev/null 2>&1; then - echo "Installing metrics-server..." - kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml >/dev/null 2>&1 - kubectl patch deployment metrics-server -n kube-system --type=json -p="[{\"op\": \"add\", \"path\": \"/spec/template/spec/containers/0/args/-\", \"value\": \"--kubelet-insecure-tls\"}]" >/dev/null 2>&1 - echo "Waiting for metrics-server to be ready..." - sleep 30 - fi - # Loop to show live updates - while true; do - clear - date - echo "" - kubectl top pods -n s3proxy 2>/dev/null || echo "Waiting for metrics..." - sleep 2 - done - ' - ;; - redis) - echo "Inspecting Redis state..." - docker compose -f $COMPOSE_FILE exec helm-test sh -c ' - kubectl run redis-cli -n s3proxy --rm -it --restart=Never \ - --image=redis:7-alpine \ - --command -- sh -c " - echo \"=== Redis Keys ===\" - redis-cli -h s3proxy-redis-ha-haproxy KEYS \"*\" - echo \"\" - echo \"=== Redis Info ===\" - redis-cli -h s3proxy-redis-ha-haproxy INFO keyspace - redis-cli -h s3proxy-redis-ha-haproxy INFO memory | grep used_memory_human - redis-cli -h s3proxy-redis-ha-haproxy INFO clients | grep connected_clients - " - ' - ;; - pods) - echo "Showing pod details..." - docker compose -f $COMPOSE_FILE exec helm-test sh -c ' - echo "=== Pod Status ===" - kubectl get pods -n s3proxy -o wide - echo "" - echo "=== Pod Resource Requests/Limits ===" - kubectl get pods -n s3proxy -o custom-columns="NAME:.metadata.name,CPU_REQ:.spec.containers[0].resources.requests.cpu,CPU_LIM:.spec.containers[0].resources.limits.cpu,MEM_REQ:.spec.containers[0].resources.requests.memory,MEM_LIM:.spec.containers[0].resources.limits.memory" - echo "" - echo "=== Recent Events ===" - kubectl get events -n s3proxy --sort-by=.lastTimestamp | tail -10 - ' - ;; - cleanup) - echo "Cleaning up..." - # Stop compose containers - docker compose -f $COMPOSE_FILE down -v 2>/dev/null || true - # Delete Kind cluster containers directly - docker rm -f s3proxy-test-control-plane 2>/dev/null || true - # Clean up Kind network - docker network rm kind 2>/dev/null || true - echo "Cleanup complete" - ;; - *) - echo "Usage: $0 " - echo "" - echo "Commands:" - echo " run - Deploy Kind cluster and Helm chart" - echo " status - Show deployment status" - echo " pods - Show pod details and resources" - echo " logs - Stream s3proxy logs" - echo " load-test - Run 1.5GB concurrent upload test" - echo " redis - Inspect Redis keys and memory" - echo " watch - Live pod CPU/memory (installs metrics-server)" - echo " shell - Interactive kubectl shell" - echo " cleanup - Delete cluster and clean up" - exit 1 - ;; -esac diff --git a/generate_flowchart.py b/generate_flowchart.py deleted file mode 100644 index d73850b..0000000 --- a/generate_flowchart.py +++ /dev/null @@ -1,203 +0,0 @@ -import matplotlib.pyplot as plt -import matplotlib.patches as mpatches -from matplotlib.patches import FancyBboxPatch, Polygon -import numpy as np - -# Claude/Anthropic color palette -COLORS = { - 'bg': '#FAF9F7', - 'card_bg': '#FFFFFF', - 'primary': '#D97757', - 'tan': '#C9A87C', - 'sage': '#7D8B74', - 'text': '#1F1F1F', - 'text_muted': '#6B6B6B', - 'border': '#E5E0DB', - 'success': '#6B8E5E', - 'error': '#C45B4A', - 'blue': '#5B7B9A', - 'purple': '#8B7BA5', -} - -# Larger canvas, simpler layout -fig, ax = plt.subplots(1, 1, figsize=(24, 16)) -fig.patch.set_facecolor(COLORS['bg']) -ax.set_facecolor(COLORS['bg']) -ax.set_xlim(0, 24) -ax.set_ylim(0, 16) -ax.set_aspect('equal') -ax.axis('off') - -def draw_box(ax, x, y, w, h, text, color, subtext=None, fontsize=12): - shadow = FancyBboxPatch((x+0.06, y-0.06), w, h, - boxstyle="round,pad=0.02,rounding_size=0.15", - facecolor='#00000010', edgecolor='none') - ax.add_patch(shadow) - - box = FancyBboxPatch((x, y), w, h, - boxstyle="round,pad=0.02,rounding_size=0.15", - facecolor=color, edgecolor=color, linewidth=2) - ax.add_patch(box) - - text_y = y + h/2 + (0.15 if subtext else 0) - ax.text(x + w/2, text_y, text, ha='center', va='center', fontsize=fontsize, - fontweight='bold', color='white', family='sans-serif') - - if subtext: - ax.text(x + w/2, y + h/2 - 0.25, subtext, ha='center', va='center', - fontsize=fontsize-3, color='#FFFFFFCC', family='sans-serif') - -def draw_diamond(ax, x, y, w, h, text, color=COLORS['tan']): - cx, cy = x + w/2, y + h/2 - pts = [(cx, cy+h/2), (cx+w/2, cy), (cx, cy-h/2), (cx-w/2, cy)] - diamond = Polygon(pts, facecolor=color, edgecolor=color, linewidth=2) - ax.add_patch(diamond) - ax.text(cx, cy, text, ha='center', va='center', fontsize=11, - fontweight='bold', color='white', family='sans-serif') - -def draw_arrow(ax, start, end, color=COLORS['border'], lw=2.5): - ax.annotate('', xy=end, xytext=start, - arrowprops=dict(arrowstyle='->', color=color, lw=lw, - connectionstyle='arc3,rad=0')) - -def draw_line(ax, points, color=COLORS['border'], lw=2.5): - xs = [p[0] for p in points] - ys = [p[1] for p in points] - ax.plot(xs, ys, color=color, linewidth=lw, solid_capstyle='round') - -def draw_label(ax, x, y, text, color=COLORS['text_muted'], fontsize=10): - ax.text(x, y, text, ha='center', va='center', fontsize=fontsize, - fontweight='600', color=color, family='sans-serif') - -# ============ TITLE ============ -ax.text(12, 15.3, 'S3 Proxy — High-Level Flow', ha='center', va='center', - fontsize=28, fontweight='bold', color=COLORS['text'], family='sans-serif') -ax.text(12, 14.7, 'Client-Side Encryption Proxy for AWS S3', ha='center', va='center', - fontsize=14, color=COLORS['text_muted'], family='sans-serif') - -# ============ MAIN FLOW ============ - -# 1. CLIENT REQUEST -draw_box(ax, 9.5, 13, 5, 1.1, 'Client Request', COLORS['blue'], 'S3 API call') - -draw_arrow(ax, (12, 13), (12, 12.3)) - -# 2. PARSE & AUTH -draw_box(ax, 8.5, 10.8, 7, 1.3, 'Parse & Authenticate', COLORS['purple'], 'SigV4 signature verification') - -# Auth failure branch -draw_line(ax, [(8.5, 11.45), (6.5, 11.45)]) -draw_arrow(ax, (6.5, 11.45), (5.5, 11.45)) -draw_box(ax, 2.5, 10.95, 3, 0.9, '403', COLORS['error'], 'Invalid') -draw_label(ax, 7, 11.8, 'FAIL', COLORS['error'], 9) - -draw_arrow(ax, (12, 10.8), (12, 10.1)) -draw_label(ax, 12.5, 10.45, 'PASS', COLORS['success'], 9) - -# 3. ROUTING -draw_diamond(ax, 9.5, 8.5, 5, 1.4, 'Route Request', COLORS['tan']) - -# Branch lines -draw_line(ax, [(9.5, 9.2), (4, 9.2), (4, 7.8)]) # Left branch -draw_line(ax, [(14.5, 9.2), (20, 9.2), (20, 7.8)]) # Right branch -draw_arrow(ax, (12, 8.5), (12, 7.8)) # Center branch - -# 4. OPERATIONS (3 main paths) -draw_box(ax, 1.5, 6.3, 5, 1.3, 'PUT / POST', COLORS['primary'], 'Upload / Multipart') -draw_box(ax, 9.5, 6.3, 5, 1.3, 'GET', COLORS['blue'], 'Download') -draw_box(ax, 17.5, 6.3, 5, 1.3, 'LIST / HEAD / DELETE', COLORS['sage'], 'Metadata ops') - -# Arrows down to encryption -draw_arrow(ax, (4, 6.3), (4, 5.6)) -draw_arrow(ax, (12, 6.3), (12, 5.6)) -draw_arrow(ax, (20, 6.3), (20, 5.6)) - -# 5. ENCRYPTION LAYER -# Background for encryption section -enc_bg = FancyBboxPatch((1, 3.8), 22, 1.6, - boxstyle="round,pad=0.02,rounding_size=0.2", - facecolor=COLORS['tan'], edgecolor='none', alpha=0.15) -ax.add_patch(enc_bg) - -ax.text(12, 5.15, 'ENCRYPTION LAYER', ha='center', va='center', - fontsize=12, fontweight='bold', color=COLORS['tan'], family='sans-serif', - bbox=dict(boxstyle='round,pad=0.3', facecolor=COLORS['bg'], edgecolor='none')) - -draw_box(ax, 1.5, 4, 5, 1, 'Encrypt', COLORS['tan'], 'AES-256-GCM') -draw_box(ax, 9.5, 4, 5, 1, 'Decrypt', COLORS['tan'], 'Unwrap DEK') -draw_box(ax, 17.5, 4, 5, 1, 'Pass-through', COLORS['border'], fontsize=11) -ax.text(20, 4.5, 'or read metadata', ha='center', va='center', - fontsize=9, color=COLORS['text_muted']) - -# Arrows down to S3 -draw_arrow(ax, (4, 4), (4, 3.3)) -draw_arrow(ax, (12, 4), (12, 3.3)) -draw_arrow(ax, (20, 4), (20, 3.3)) - -# Converge to S3 -draw_line(ax, [(4, 3.1), (4, 2.8), (20, 2.8), (20, 3.1)]) -draw_line(ax, [(12, 3.1), (12, 2.8)]) - -# 6. S3 BACKEND -draw_box(ax, 8.5, 1.5, 7, 1.2, 'AWS S3', COLORS['primary'], 'Actual storage') -draw_arrow(ax, (12, 2.8), (12, 2.7)) - -# 7. RESPONSE -draw_arrow(ax, (12, 1.5), (12, 0.9)) -draw_box(ax, 9.5, 0.1, 5, 0.7, 'Response', COLORS['success'], fontsize=11) - -# ============ SIDE INFO BOXES ============ - -# Left side - Key info -info_bg = FancyBboxPatch((0.3, 0.3), 4.2, 2.8, - boxstyle="round,pad=0.05,rounding_size=0.15", - facecolor=COLORS['card_bg'], edgecolor=COLORS['border'], linewidth=1.5) -ax.add_patch(info_bg) - -ax.text(2.4, 2.85, 'Encryption', ha='center', fontsize=11, fontweight='bold', - color=COLORS['tan'], family='sans-serif') -ax.text(0.5, 2.4, '• AES-256-GCM', fontsize=9, color=COLORS['text_muted']) -ax.text(0.5, 2.0, '• Per-object DEK', fontsize=9, color=COLORS['text_muted']) -ax.text(0.5, 1.6, '• KEK wraps DEK', fontsize=9, color=COLORS['text_muted']) -ax.text(0.5, 1.2, '• 12-byte nonce', fontsize=9, color=COLORS['text_muted']) -ax.text(0.5, 0.8, '• 16-byte auth tag', fontsize=9, color=COLORS['text_muted']) - -# Right side - Features -feat_bg = FancyBboxPatch((19.5, 0.3), 4.2, 2.8, - boxstyle="round,pad=0.05,rounding_size=0.15", - facecolor=COLORS['card_bg'], edgecolor=COLORS['border'], linewidth=1.5) -ax.add_patch(feat_bg) - -ax.text(21.6, 2.85, 'Features', ha='center', fontsize=11, fontweight='bold', - color=COLORS['purple'], family='sans-serif') -ax.text(19.7, 2.4, '• AWS SigV4 Auth', fontsize=9, color=COLORS['text_muted']) -ax.text(19.7, 2.0, '• Streaming (64KB)', fontsize=9, color=COLORS['text_muted']) -ax.text(19.7, 1.6, '• Multipart uploads', fontsize=9, color=COLORS['text_muted']) -ax.text(19.7, 1.2, '• Range requests', fontsize=9, color=COLORS['text_muted']) -ax.text(19.7, 0.8, '• Transparent proxy', fontsize=9, color=COLORS['text_muted']) - -# ============ LEGEND ============ -legend_items = [ - (COLORS['blue'], 'Client/Download'), - (COLORS['primary'], 'Upload/Core'), - (COLORS['sage'], 'Metadata'), - (COLORS['tan'], 'Encryption'), - (COLORS['purple'], 'Auth/Routing'), -] - -legend_x = 0.5 -for i, (color, label) in enumerate(legend_items): - x_pos = legend_x + i * 4.7 - rect = FancyBboxPatch((x_pos, 14.6), 0.4, 0.35, - boxstyle="round,pad=0.02,rounding_size=0.08", - facecolor=color, edgecolor='none') - ax.add_patch(rect) - ax.text(x_pos + 0.55, 14.77, label, fontsize=9, va='center', - color=COLORS['text_muted'], family='sans-serif') - -plt.tight_layout() -plt.savefig('/Users/hgu/Desktop/sseproxy-python/s3proxy_flowchart.png', dpi=150, - bbox_inches='tight', facecolor=COLORS['bg'], edgecolor='none') -plt.close() - -print("High-level flowchart saved to: s3proxy_flowchart.png") diff --git a/manifests/charts/redis-ha-4.35.5.tgz b/manifests/charts/redis-ha-4.35.5.tgz deleted file mode 100644 index d5af37e..0000000 Binary files a/manifests/charts/redis-ha-4.35.5.tgz and /dev/null differ diff --git a/manifests/templates/configmap.yaml b/manifests/templates/configmap.yaml index 5e16e81..a5a3327 100644 --- a/manifests/templates/configmap.yaml +++ b/manifests/templates/configmap.yaml @@ -5,18 +5,13 @@ metadata: labels: app.kubernetes.io/name: {{ .Chart.Name }} data: - {{- if .Values.minio.enabled }} - S3PROXY_HOST: "http://{{ .Chart.Name }}-minio:9000" - {{- else }} S3PROXY_HOST: {{ .Values.s3.host | quote }} - {{- end }} S3PROXY_REGION: {{ .Values.s3.region | quote }} S3PROXY_IP: "0.0.0.0" S3PROXY_PORT: {{ .Values.server.port | quote }} S3PROXY_NO_TLS: {{ .Values.server.noTls | quote }} - S3PROXY_MAX_CONCURRENT_UPLOADS: {{ .Values.performance.maxConcurrentUploads | quote }} - S3PROXY_MAX_CONCURRENT_DOWNLOADS: {{ .Values.performance.maxConcurrentDownloads | quote }} - S3PROXY_AUTO_MULTIPART_MB: {{ .Values.performance.autoMultipartMb | quote }} + S3PROXY_THROTTLING_REQUESTS_MAX: {{ .Values.performance.throttlingRequestsMax | quote }} + S3PROXY_MAX_UPLOAD_SIZE_MB: {{ .Values.performance.maxUploadSizeMb | quote }} {{- if index .Values "redis-ha" "enabled" }} S3PROXY_REDIS_URL: "redis://{{ .Release.Name }}-redis-ha-haproxy:6379/0" {{- else }} diff --git a/manifests/templates/deployment.yaml b/manifests/templates/deployment.yaml index e8887be..072ff16 100644 --- a/manifests/templates/deployment.yaml +++ b/manifests/templates/deployment.yaml @@ -81,3 +81,19 @@ spec: securityContext: fsGroup: 1000 terminationGracePeriodSeconds: 30 + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.topologySpreadConstraints }} + topologySpreadConstraints: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/manifests/templates/gateway-service.yaml b/manifests/templates/gateway-service.yaml new file mode 100644 index 0000000..2f41919 --- /dev/null +++ b/manifests/templates/gateway-service.yaml @@ -0,0 +1,12 @@ +{{- if .Values.gateway.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ .Values.gateway.serviceName }} + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: {{ .Chart.Name }}-gateway +spec: + type: ExternalName + externalName: {{ .Values.gateway.ingressService }} +{{- end }} diff --git a/manifests/templates/ingress.yaml b/manifests/templates/ingress.yaml new file mode 100644 index 0000000..e589ae0 --- /dev/null +++ b/manifests/templates/ingress.yaml @@ -0,0 +1,53 @@ +{{- if .Values.ingress.enabled -}} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ .Chart.Name }} + labels: + app.kubernetes.io/name: {{ .Chart.Name }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + ingressClassName: {{ .Values.ingress.className }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- if and .Values.gateway.enabled (not .Values.ingress.hosts) }} + # Auto-generate host from gateway service name + namespace + - host: {{ printf "%s.%s" .Values.gateway.serviceName .Release.Namespace | quote }} + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: {{ .Chart.Name }} + port: + number: {{ .Values.service.port }} + {{- else }} + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + pathType: {{ .pathType }} + backend: + service: + name: {{ $.Chart.Name }} + port: + number: {{ $.Values.service.port }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} diff --git a/manifests/templates/minio.yaml b/manifests/templates/minio.yaml deleted file mode 100644 index d67fce4..0000000 --- a/manifests/templates/minio.yaml +++ /dev/null @@ -1,82 +0,0 @@ -{{- if .Values.minio.enabled }} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ .Chart.Name }}-minio - labels: - app.kubernetes.io/name: {{ .Chart.Name }}-minio - app.kubernetes.io/component: storage -spec: - replicas: 1 - selector: - matchLabels: - app: {{ .Chart.Name }}-minio - template: - metadata: - labels: - app: {{ .Chart.Name }}-minio - app.kubernetes.io/name: {{ .Chart.Name }}-minio - spec: - containers: - - name: minio - image: "{{ .Values.minio.image.repository }}:{{ .Values.minio.image.tag }}" - imagePullPolicy: {{ .Values.minio.image.pullPolicy }} - args: - - server - - /data - - --console-address - - ":9001" - env: - - name: MINIO_ROOT_USER - value: {{ .Values.minio.rootUser | quote }} - - name: MINIO_ROOT_PASSWORD - value: {{ .Values.minio.rootPassword | quote }} - ports: - - name: api - containerPort: 9000 - protocol: TCP - - name: console - containerPort: 9001 - protocol: TCP - resources: - {{- toYaml .Values.minio.resources | nindent 12 }} - livenessProbe: - httpGet: - path: /minio/health/live - port: api - initialDelaySeconds: 10 - periodSeconds: 10 - readinessProbe: - httpGet: - path: /minio/health/ready - port: api - initialDelaySeconds: 10 - periodSeconds: 5 - volumeMounts: - - name: data - mountPath: /data - volumes: - - name: data - emptyDir: {} ---- -apiVersion: v1 -kind: Service -metadata: - name: {{ .Chart.Name }}-minio - labels: - app.kubernetes.io/name: {{ .Chart.Name }}-minio - app.kubernetes.io/component: storage -spec: - type: ClusterIP - selector: - app: {{ .Chart.Name }}-minio - ports: - - name: api - port: 9000 - targetPort: api - protocol: TCP - - name: console - port: 9001 - targetPort: console - protocol: TCP -{{- end }} diff --git a/manifests/templates/secret.yaml b/manifests/templates/secret.yaml index 60f2dae..ba7c32f 100644 --- a/manifests/templates/secret.yaml +++ b/manifests/templates/secret.yaml @@ -10,11 +10,6 @@ metadata: type: Opaque stringData: S3PROXY_ENCRYPT_KEY: {{ .Values.secrets.encryptKey | quote }} - {{- if .Values.minio.enabled }} - AWS_ACCESS_KEY_ID: {{ .Values.minio.rootUser | quote }} - AWS_SECRET_ACCESS_KEY: {{ .Values.minio.rootPassword | quote }} - {{- else }} AWS_ACCESS_KEY_ID: {{ .Values.secrets.awsAccessKeyId | quote }} AWS_SECRET_ACCESS_KEY: {{ .Values.secrets.awsSecretAccessKey | quote }} - {{- end }} {{- end }} diff --git a/manifests/values.yaml b/manifests/values.yaml index ef834b0..8d0f01d 100644 --- a/manifests/values.yaml +++ b/manifests/values.yaml @@ -1,94 +1,41 @@ # S3Proxy Helm Chart Values -# Deployment settings replicaCount: 3 -# Container image image: - # IMPORTANT: Change to your image registry - # Example: ghcr.io/myorg/sseproxy-python or your private registry - repository: ghcr.io/YOUR_USERNAME/sseproxy-python - # IMPORTANT: Never use 'latest' in production - use specific version tags - # Example: "v0.1.0", "v0.2.0", etc. + repository: ghcr.io/ServerSideHannes/sseproxy-python tag: latest pullPolicy: IfNotPresent -# S3 configuration (used only when minio.enabled=false) -# Ignored if MinIO is enabled - MinIO will be used as the S3 backend +# S3 backend configuration s3: - # S3-compatible endpoint: AWS S3, DigitalOcean Spaces, etc. - # Examples: - # - "s3.amazonaws.com" (AWS S3) - # - "s3.us-west-2.amazonaws.com" (AWS S3 specific region) - # - "nyc3.digitaloceanspaces.com" (DigitalOcean Spaces) host: "s3.amazonaws.com" - # AWS region (ignored for non-AWS S3 services) region: "us-east-1" -# Server settings server: - port: 4433 # Listen port (should match service.port) - noTls: true # TLS termination handled by Ingress or Load Balancer + port: 4433 + noTls: true -# Performance tuning settings performance: - maxConcurrentUploads: 10 # Max concurrent upload operations - maxConcurrentDownloads: 10 # Max concurrent download operations - autoMultipartMb: 16 # Chunk size in MB for multipart uploads - -# MinIO configuration (embedded S3 backend) -# Set enabled: false to use external S3 (AWS, DigitalOcean Spaces, etc.) -# When disabled, configure s3.host and set secrets.awsAccessKeyId/awsSecretAccessKey -minio: - enabled: true # For production, consider external S3 service - image: - repository: minio/minio - tag: latest # Specify version for production, e.g., "RELEASE.2024-01-16T16-07-38Z" - pullPolicy: IfNotPresent - # IMPORTANT: Change credentials in production! - # Default credentials below are for development only - rootUser: "minioadmin" - rootPassword: "minioadmin" - resources: - requests: - cpu: "50m" - memory: "128Mi" - limits: - cpu: "500m" - memory: "512Mi" + throttlingRequestsMax: 10 + maxUploadSizeMb: 45 -# Redis cache for upload state management -# Choose one: redis-ha (for HA) or externalRedis (for managed services) +# External Redis (for managed services) externalRedis: - # Use this for managed Redis: AWS ElastiCache, Azure Cache, Redis Cloud, etc. - # Leave empty to use redis-ha instead - # Format: redis://host:port/db or redis://:password@host:port/db - url: "" # e.g., "redis://my-elasticache.abc123.cache.amazonaws.com:6379/0" - # Include password in URL if needed: redis://:mypassword@host:port/db - # TTL for upload state in hours + url: "" uploadTtlHours: 24 -# Redis HA configuration (embedded high-availability Redis) -# Uses dandydev/redis-ha chart with Sentinel for automatic failover -# Set enabled: false if using externalRedis instead +# Redis HA (embedded) redis-ha: - enabled: true # Disable if using managed Redis service - # Number of Redis replicas (1 master + N-1 replicas) + enabled: true replicas: 3 + existingSecret: "" - # Use existing secret for Redis password (RECOMMENDED for production) - # If provided, auth and authKey below are ignored - # Create with: kubectl create secret generic redis-password --from-literal=redis-password="your-password" - existingSecret: "" # Name of existing secret with key "redis-password" - - # Persistence configuration persistentVolume: enabled: true size: 10Gi - storageClass: "" # Use default storage class, or specify e.g., "gp3", "standard" + storageClass: "" - # HAProxy for single-endpoint access (recommended) - # This allows standard redis:// URLs without sentinel-aware client code haproxy: enabled: true replicas: 2 @@ -100,7 +47,6 @@ redis-ha: cpu: "200m" memory: "128Mi" - # Sentinel configuration sentinel: port: 26379 quorum: 2 @@ -109,7 +55,6 @@ redis-ha: failover-timeout: 180000 parallel-syncs: 5 - # Redis configuration redis: port: 6379 config: @@ -117,14 +62,10 @@ redis-ha: min-replicas-to-write: 1 min-replicas-max-lag: 5 - # Security - Redis password authentication (ignored if existingSecret is set above) - auth: false # Set to true to enable password protection - authKey: "" # Redis password (required if auth=true and no existingSecret, generate with: openssl rand -base64 32) - - # Pod anti-affinity for HA (spread across nodes) + auth: false + authKey: "" hardAntiAffinity: true - # Resource limits for Redis pods resources: requests: cpu: "100m" @@ -133,54 +74,77 @@ redis-ha: cpu: "500m" memory: "512Mi" -# Secret Configuration -# IMPORTANT: Never commit actual secrets to git! -# Priority: existingSecrets > create static secret - +# Secrets (use existing secret in production) secrets: - # Option 1: Use existing Secret (RECOMMENDED for production) - # Reference an existing Kubernetes secret and optionally map its keys - # If using default key names, just set the secret name: - # kubectl create secret generic my-s3-secrets \ - # --from-literal=S3PROXY_ENCRYPT_KEY="$(openssl rand -base64 32)" \ - # --from-literal=AWS_ACCESS_KEY_ID="AKIA..." \ - # --from-literal=AWS_SECRET_ACCESS_KEY="..." existingSecrets: enabled: false - name: "" # Name of existing Kubernetes secret - # Optional: Map secret keys if using different key names + name: "" keys: - encryptKey: "S3PROXY_ENCRYPT_KEY" # Secret key name for encryption key - awsAccessKeyId: "AWS_ACCESS_KEY_ID" # Secret key name for access key - awsSecretAccessKey: "AWS_SECRET_ACCESS_KEY" # Secret key name for secret key - - # Option 2: Create new secret from values (use only for development) - # For production, use existingSecrets with a pre-created secret - # Provide values via helm --set or secure values file, never hardcode here + encryptKey: "S3PROXY_ENCRYPT_KEY" + awsAccessKeyId: "AWS_ACCESS_KEY_ID" + awsSecretAccessKey: "AWS_SECRET_ACCESS_KEY" - # S3PROXY_ENCRYPT_KEY: AES-256-GCM encryption key (base64-encoded 32 bytes) - # Generate with: openssl rand -base64 32 encryptKey: "" - - # AWS/S3 credentials (ignored if minio.enabled=true and using MinIO defaults) - # Only needed when: minio.enabled=false AND using external S3 awsAccessKeyId: "" awsSecretAccessKey: "" -# Logging -logLevel: "INFO" # Options: DEBUG, INFO, WARNING, ERROR +logLevel: "INFO" -# Resource limits for s3proxy pods -# Adjust based on your workload and cluster capacity resources: requests: cpu: "100m" - memory: "256Mi" + memory: "512Mi" limits: - cpu: "1000m" + cpu: "100m" memory: "512Mi" -# Kubernetes Service configuration service: - type: ClusterIP # Use LoadBalancer for external access, or configure Ingress - port: 4433 # Service port (container also runs on this port) + type: ClusterIP + port: 4433 + +# Gateway creates internal DNS alias (no external DNS setup needed) +# Internal: gateway.enabled=true, ingress.enabled=true -> s3-gateway. +# External: gateway.enabled=false, ingress.enabled=true, hosts=[...] -> your-domain.com (requires DNS) +gateway: + enabled: false + serviceName: s3-gateway + ingressService: ingress-nginx-controller.ingress-nginx.svc.cluster.local + +# Pod scheduling +nodeSelector: {} + +tolerations: [] + +affinity: {} + # Example: prefer spreading across nodes + # podAntiAffinity: + # preferredDuringSchedulingIgnoredDuringExecution: + # - weight: 100 + # podAffinityTerm: + # labelSelector: + # matchLabels: + # app: sseproxy-python + # topologyKey: kubernetes.io/hostname + +topologySpreadConstraints: [] + # Example: spread across zones + # - maxSkew: 1 + # topologyKey: topology.kubernetes.io/zone + # whenUnsatisfiable: ScheduleAnyway + # labelSelector: + # matchLabels: + # app: sseproxy-python + +ingress: + enabled: false + className: "nginx" + annotations: + nginx.ingress.kubernetes.io/proxy-buffering: "off" + nginx.ingress.kubernetes.io/proxy-request-buffering: "off" + nginx.ingress.kubernetes.io/proxy-body-size: "0" + nginx.ingress.kubernetes.io/proxy-connect-timeout: "60" + nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" + nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" + nginx.ingress.kubernetes.io/upstream-keepalive-connections: "100" + tls: [] + hosts: [] \ No newline at end of file diff --git a/s3proxy/config.py b/s3proxy/config.py index 16df0ed..d83f366 100644 --- a/s3proxy/config.py +++ b/s3proxy/config.py @@ -27,17 +27,14 @@ class Settings(BaseSettings): cert_path: str = Field(default="/etc/s3proxy/certs", description="TLS certificate directory") # Performance settings - throttling_requests_max: int = Field(default=0, description="Max concurrent requests") - max_single_encrypted_mb: int = Field(default=16, description="Max single-part object size (MB)") - auto_multipart_mb: int = Field(default=16, description="Auto-multipart threshold (MB)") - max_concurrent_uploads: int = Field(default=10, description="Max concurrent uploads") - max_concurrent_downloads: int = Field(default=10, description="Max concurrent downloads") - - # Feature flags - allow_multipart: bool = Field(default=False, description="Allow unencrypted multipart") - - # Redis settings (for distributed state) - redis_url: str = Field(default="redis://localhost:6379/0", description="Redis connection URL") + # Memory usage: file_size + ~64MB per concurrent upload + # For 1GB pod with 10MB files: ~13 concurrent safe, default 10 for margin + # Files >16MB automatically use multipart encryption + throttling_requests_max: int = Field(default=10, description="Max concurrent requests (0=unlimited)") + max_upload_size_mb: int = Field(default=45, description="Max single-request upload size (MB)") + + # Redis settings (for distributed state in HA deployments) + redis_url: str = Field(default="", description="Redis URL for HA mode (empty = in-memory single-instance)") redis_upload_ttl_hours: int = Field(default=24, description="TTL for upload state in Redis (hours)") # Logging @@ -55,14 +52,9 @@ def kek(self) -> bytes: return hashlib.sha256(self.encrypt_key.encode()).digest() @property - def max_single_encrypted_bytes(self) -> int: - """Max single encrypted object size in bytes.""" - return self.max_single_encrypted_mb * 1024 * 1024 - - @property - def auto_multipart_bytes(self) -> int: - """Auto-multipart threshold in bytes.""" - return self.auto_multipart_mb * 1024 * 1024 + def max_upload_size_bytes(self) -> int: + """Max upload size in bytes.""" + return self.max_upload_size_mb * 1024 * 1024 @property def s3_endpoint(self) -> str: diff --git a/s3proxy/handlers/buckets.py b/s3proxy/handlers/buckets.py index 5553837..2217c72 100644 --- a/s3proxy/handlers/buckets.py +++ b/s3proxy/handlers/buckets.py @@ -7,7 +7,7 @@ from fastapi import HTTPException, Request, Response from .. import xml_responses -from ..multipart import META_SUFFIX, delete_multipart_metadata +from ..multipart import INTERNAL_PREFIX, META_SUFFIX_LEGACY, delete_multipart_metadata from ..s3client import S3Credentials from .base import BaseHandler @@ -15,7 +15,20 @@ class BucketHandlerMixin(BaseHandler): """Mixin for bucket operations.""" + async def handle_list_buckets(self, request: Request, creds: S3Credentials) -> Response: + """Handle ListBuckets request (GET /).""" + client = self._client(creds) + resp = await client.list_buckets() + return Response( + content=xml_responses.list_buckets( + resp.get("Owner", {}), + resp.get("Buckets", []), + ), + media_type="application/xml", + ) + async def handle_list_objects(self, request: Request, creds: S3Credentials) -> Response: + """Handle ListObjectsV2 request (GET /bucket?list-type=2).""" bucket = self._parse_bucket(request.url.path) client = self._client(creds) query = parse_qs(request.url.query) @@ -25,9 +38,69 @@ async def handle_list_objects(self, request: Request, creds: S3Credentials) -> R resp = await client.list_objects_v2(bucket, prefix, token, max_keys) + objects = await self._process_list_objects(client, bucket, resp.get("Contents", [])) + + return Response( + content=xml_responses.list_objects( + bucket, prefix, max_keys, + resp.get("IsTruncated", False), + resp.get("NextContinuationToken"), + objects, + ), + media_type="application/xml", + ) + + async def handle_list_objects_v1(self, request: Request, creds: S3Credentials) -> Response: + """Handle ListObjects V1 request (GET /bucket without list-type=2).""" + bucket = self._parse_bucket(request.url.path) + client = self._client(creds) + query = parse_qs(request.url.query) + prefix = query.get("prefix", [""])[0] + marker = query.get("marker", [""])[0] or None + delimiter = query.get("delimiter", [""])[0] or None + max_keys = int(query.get("max-keys", ["1000"])[0]) + + resp = await client.list_objects_v1(bucket, prefix, marker, delimiter, max_keys) + + objects = await self._process_list_objects(client, bucket, resp.get("Contents", [])) + + # Extract common prefixes, filtering out internal prefixes + common_prefixes = [ + cp["Prefix"] for cp in resp.get("CommonPrefixes", []) + if not cp["Prefix"].startswith(INTERNAL_PREFIX) + ] + + # V1 uses NextMarker (or last key if truncated and no delimiter) + next_marker = resp.get("NextMarker") + if resp.get("IsTruncated") and not next_marker and objects: + next_marker = objects[-1]["key"] + + return Response( + content=xml_responses.list_objects_v1( + bucket, prefix, marker, delimiter, max_keys, + resp.get("IsTruncated", False), + next_marker, + objects, + common_prefixes, + ), + media_type="application/xml", + ) + + def _is_internal_key(self, key: str) -> bool: + """Check if key is an internal s3proxy key that should be hidden.""" + return ( + key.startswith(INTERNAL_PREFIX) + or key.endswith(META_SUFFIX_LEGACY) + or ".s3proxy-upload-" in key + ) + + async def _process_list_objects( + self, client, bucket: str, contents: list[dict] + ) -> list[dict]: + """Process list objects response, filtering internal objects and fetching metadata.""" objects = [] - for obj in resp.get("Contents", []): - if obj["Key"].endswith(META_SUFFIX) or ".s3proxy-upload-" in obj["Key"]: + for obj in contents: + if self._is_internal_key(obj["Key"]): continue try: head = await client.head_object(bucket, obj["Key"]) @@ -44,16 +117,7 @@ async def handle_list_objects(self, request: Request, creds: S3Credentials) -> R "size": size, "storage_class": obj.get("StorageClass", "STANDARD"), }) - - return Response( - content=xml_responses.list_objects( - bucket, prefix, max_keys, - resp.get("IsTruncated", False), - resp.get("NextContinuationToken"), - objects, - ), - media_type="application/xml", - ) + return objects async def handle_create_bucket(self, request: Request, creds: S3Credentials) -> Response: bucket = self._parse_bucket(request.url.path) diff --git a/s3proxy/handlers/multipart_ops.py b/s3proxy/handlers/multipart_ops.py index a010300..298682d 100644 --- a/s3proxy/handlers/multipart_ops.py +++ b/s3proxy/handlers/multipart_ops.py @@ -7,11 +7,14 @@ from fastapi import HTTPException, Request, Response +import base64 + from .. import crypto, xml_responses from ..multipart import ( MultipartMetadata, PartMetadata, delete_upload_state, + load_multipart_metadata, load_upload_state, persist_upload_state, save_multipart_metadata, @@ -206,3 +209,126 @@ async def handle_list_parts( ), media_type="application/xml", ) + + async def handle_upload_part_copy( + self, request: Request, creds: S3Credentials + ) -> Response: + """Handle UploadPartCopy request (PUT with x-amz-copy-source and uploadId). + + Copies data from a source object to a part of a multipart upload. + For encrypted sources, decrypts and re-encrypts with the upload's DEK. + """ + from urllib.parse import unquote + from datetime import UTC, datetime + + bucket, key = self._parse_path(request.url.path) + client = self._client(creds) + query = parse_qs(request.url.query) + upload_id = query.get("uploadId", [""])[0] + part_num = int(query.get("partNumber", ["0"])[0]) + + # Get copy source header + copy_source = request.headers.get("x-amz-copy-source", "") + copy_source_range = request.headers.get("x-amz-copy-source-range") + + # Parse copy source: can be "bucket/key" or "/bucket/key" or URL-encoded + copy_source = unquote(copy_source).lstrip("/") + if "/" not in copy_source: + raise HTTPException(400, "Invalid x-amz-copy-source format") + + src_bucket, src_key = copy_source.split("/", 1) + + # Get upload state for destination DEK + state = await self.multipart_manager.get_upload(bucket, key, upload_id) + if not state: + dek = await load_upload_state(client, bucket, key, upload_id, self.settings.kek) + if not dek: + raise HTTPException(404, "Upload not found") + state = await self.multipart_manager.create_upload(bucket, key, upload_id, dek) + + # Check if source is encrypted + try: + head_resp = await client.head_object(src_bucket, src_key) + except Exception as e: + raise HTTPException(404, f"Source object not found: {e}") from e + + src_metadata = head_resp.get("Metadata", {}) + src_wrapped_dek = src_metadata.get(self.settings.dektag_name) + src_multipart_meta = await load_multipart_metadata(client, src_bucket, src_key) + + if not src_wrapped_dek and not src_multipart_meta: + # Source not encrypted - get the raw data + resp = await client.get_object(src_bucket, src_key, range_header=copy_source_range) + plaintext = await resp["Body"].read() + elif src_multipart_meta: + # Source is multipart encrypted - download and decrypt + src_dek = crypto.unwrap_key(src_multipart_meta.wrapped_dek, self.settings.kek) + sorted_parts = sorted(src_multipart_meta.parts, key=lambda p: p.part_number) + + # For range request, we need to compute which parts and offsets + if copy_source_range: + # Parse range: bytes=start-end + range_str = copy_source_range.replace("bytes=", "") + range_start, range_end = map(int, range_str.split("-")) + else: + range_start = 0 + range_end = src_multipart_meta.total_plaintext_size - 1 + + plaintext_chunks = [] + plaintext_offset = 0 + ct_offset = 0 + + for part in sorted_parts: + part_pt_end = plaintext_offset + part.plaintext_size - 1 + + # Check if this part overlaps with requested range + if part_pt_end >= range_start and plaintext_offset <= range_end: + ct_end = ct_offset + part.ciphertext_size - 1 + resp = await client.get_object(src_bucket, src_key, f"bytes={ct_offset}-{ct_end}") + ciphertext = await resp["Body"].read() + part_plaintext = crypto.decrypt(ciphertext, src_dek) + + # Trim to requested range + trim_start = max(0, range_start - plaintext_offset) + trim_end = min(part.plaintext_size, range_end - plaintext_offset + 1) + plaintext_chunks.append(part_plaintext[trim_start:trim_end]) + + plaintext_offset = part_pt_end + 1 + ct_offset += part.ciphertext_size + + plaintext = b"".join(plaintext_chunks) + else: + # Source is single-part encrypted + resp = await client.get_object(src_bucket, src_key) + ciphertext = await resp["Body"].read() + wrapped_dek = base64.b64decode(src_wrapped_dek) + full_plaintext = crypto.decrypt_object(ciphertext, wrapped_dek, self.settings.kek) + + # Handle range if specified + if copy_source_range: + range_str = copy_source_range.replace("bytes=", "") + range_start, range_end = map(int, range_str.split("-")) + plaintext = full_plaintext[range_start:range_end + 1] + else: + plaintext = full_plaintext + + # Encrypt with upload's DEK + ciphertext = crypto.encrypt_part(plaintext, state.dek, upload_id, part_num) + + # Upload the encrypted part + resp = await client.upload_part(bucket, key, upload_id, part_num, ciphertext) + + # Record the part + body_md5 = hashlib.md5(plaintext).hexdigest() + await self.multipart_manager.add_part(bucket, key, upload_id, PartMetadata( + part_num, len(plaintext), len(ciphertext), + resp["ETag"].strip('"'), body_md5 + )) + + # Return CopyPartResult + last_modified = datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%S.000Z") + + return Response( + content=xml_responses.upload_part_copy_result(resp["ETag"].strip('"'), last_modified), + media_type="application/xml", + ) diff --git a/s3proxy/handlers/objects.py b/s3proxy/handlers/objects.py index 8744310..10c7dfb 100644 --- a/s3proxy/handlers/objects.py +++ b/s3proxy/handlers/objects.py @@ -264,11 +264,13 @@ async def handle_put_object(self, request: Request, creds: S3Credentials) -> Res if needs_chunked_decode: body = decode_aws_chunked(body) - if self.settings.auto_multipart_bytes > 0 and len(body) > self.settings.auto_multipart_bytes: - return await self._put_multipart(client, bucket, key, body, content_type) + # Reject if exceeds max upload size + if len(body) > self.settings.max_upload_size_bytes: + raise HTTPException(413, f"Max upload size: {self.settings.max_upload_size_mb}MB") - if len(body) > self.settings.max_single_encrypted_bytes: - raise HTTPException(413, f"Max size: {self.settings.max_single_encrypted_mb}MB") + # Auto-use multipart for files >16MB to split encryption into parts + if len(body) > crypto.PART_SIZE: + return await self._put_multipart(client, bucket, key, body, content_type) encrypted = crypto.encrypt_object(body, self.settings.kek) etag = hashlib.md5(body).hexdigest() @@ -385,10 +387,11 @@ async def upload_part(data: bytes) -> None: total_plaintext_size += len(chunk) # Upload when buffer reaches PART_SIZE + # Process immediately without intermediate variable to reduce memory while len(buffer) >= crypto.PART_SIZE: - part_data = bytes(buffer[:crypto.PART_SIZE]) + # Extract, upload, then clear - minimizes peak memory + await upload_part(bytes(buffer[:crypto.PART_SIZE])) del buffer[:crypto.PART_SIZE] - await upload_part(part_data) # Upload remaining data if buffer: @@ -556,3 +559,74 @@ async def handle_copy_object(self, request: Request, creds: S3Credentials) -> Re content=xml_responses.copy_object_result(etag, last_modified), media_type="application/xml", ) + + async def handle_get_object_tagging( + self, request: Request, creds: S3Credentials + ) -> Response: + """Handle GetObjectTagging request (GET /bucket/key?tagging).""" + from .. import xml_responses + + bucket, key = self._parse_path(request.url.path) + client = self._client(creds) + + try: + resp = await client.get_object_tagging(bucket, key) + return Response( + content=xml_responses.get_tagging(resp.get("TagSet", [])), + media_type="application/xml", + ) + except ClientError as e: + if e.response["Error"]["Code"] in ("NoSuchKey", "404"): + raise HTTPException(404, "Not found") from None + raise HTTPException(500, str(e)) from e + + async def handle_put_object_tagging( + self, request: Request, creds: S3Credentials + ) -> Response: + """Handle PutObjectTagging request (PUT /bucket/key?tagging).""" + import xml.etree.ElementTree as ET + + bucket, key = self._parse_path(request.url.path) + client = self._client(creds) + + # Parse the XML body + body = await request.body() + try: + root = ET.fromstring(body.decode()) + except ET.ParseError as e: + raise HTTPException(400, f"Invalid XML: {e}") from e + + # Extract tags + ns = "{http://s3.amazonaws.com/doc/2006-03-01/}" + tags = [] + for tag_elem in root.findall(f".//{ns}Tag") or root.findall(".//Tag"): + key_elem = tag_elem.find(f"{ns}Key") or tag_elem.find("Key") + value_elem = tag_elem.find(f"{ns}Value") or tag_elem.find("Value") + if key_elem is not None and key_elem.text: + tags.append({ + "Key": key_elem.text, + "Value": value_elem.text if value_elem is not None and value_elem.text else "", + }) + + try: + await client.put_object_tagging(bucket, key, tags) + return Response(status_code=200) + except ClientError as e: + if e.response["Error"]["Code"] in ("NoSuchKey", "404"): + raise HTTPException(404, "Not found") from None + raise HTTPException(500, str(e)) from e + + async def handle_delete_object_tagging( + self, request: Request, creds: S3Credentials + ) -> Response: + """Handle DeleteObjectTagging request (DELETE /bucket/key?tagging).""" + bucket, key = self._parse_path(request.url.path) + client = self._client(creds) + + try: + await client.delete_object_tagging(bucket, key) + return Response(status_code=204) + except ClientError as e: + if e.response["Error"]["Code"] in ("NoSuchKey", "404"): + raise HTTPException(404, "Not found") from None + raise HTTPException(500, str(e)) from e diff --git a/s3proxy/main.py b/s3proxy/main.py index 4a345a3..fabc9a6 100644 --- a/s3proxy/main.py +++ b/s3proxy/main.py @@ -47,6 +47,7 @@ QUERY_LIST_TYPE = "list-type" QUERY_LOCATION = "location" QUERY_DELETE = "delete" +QUERY_TAGGING = "tagging" # Headers HEADER_COPY_SOURCE = "x-amz-copy-source" @@ -110,10 +111,10 @@ def create_lifespan(settings: Settings) -> "AsyncIterator[None]": @asynccontextmanager async def lifespan(_app: FastAPI) -> "AsyncIterator[None]": logger.info("Starting", endpoint=settings.s3_endpoint, port=settings.port) - # Initialize Redis connection - await init_redis(settings.redis_url) + # Initialize Redis if configured (for HA), otherwise use in-memory storage + await init_redis(settings.redis_url or None) yield - # Close Redis connection + # Close Redis connection if active await close_redis() # Close all S3 client pools for pool in list(S3ClientPool._instances.values()): @@ -192,9 +193,9 @@ async def route_request( path = request.url.path headers = {k.lower(): v for k, v in request.headers.items()} - # Root path - list buckets or location query + # Root path - list buckets if path.strip("/") == "": - return await handler.forward_request(request, creds) + return await handler.handle_list_buckets(request, creds) # Batch delete operation (POST /?delete) - check before other bucket ops if QUERY_DELETE in query and method == METHOD_POST: @@ -210,7 +211,7 @@ async def route_request( # Multipart part operations (uploadId in query) if QUERY_UPLOAD_ID in query: - return await _handle_multipart_operation(request, creds, handler, method, query) + return await _handle_multipart_operation(request, creds, handler, method, query, headers) # Bucket-only operations if _is_bucket_only_path(path): @@ -219,15 +220,20 @@ async def route_request( return result # List objects (bucket-only GET or explicit list-type) - if QUERY_LIST_TYPE in query or (_is_bucket_only_path(path) and method == METHOD_GET): - return await handler.handle_list_objects(request, creds) + if _is_bucket_only_path(path) and method == METHOD_GET: + # V2 uses list-type=2, V1 uses no list-type or list-type=1 + query_params = parse_qs(query, keep_blank_values=True) + list_type = query_params.get("list-type", ["1"])[0] + if list_type == "2": + return await handler.handle_list_objects(request, creds) + return await handler.handle_list_objects_v1(request, creds) # Copy object (PUT with x-amz-copy-source header) if method == METHOD_PUT and HEADER_COPY_SOURCE in headers: return await handler.handle_copy_object(request, creds) # Standard object operations - return await _handle_object_operation(request, creds, handler, method) + return await _handle_object_operation(request, creds, handler, method, query) async def _handle_multipart_operation( @@ -236,12 +242,16 @@ async def _handle_multipart_operation( handler: S3ProxyHandler, method: str, query: str, + headers: dict[str, str], ) -> "PlainTextResponse": """Handle multipart upload operations.""" # ListParts: GET with uploadId but no partNumber if method == METHOD_GET and QUERY_PART_NUMBER not in query: return await handler.handle_list_parts(request, creds) if method == METHOD_PUT: + # UploadPartCopy: PUT with uploadId and x-amz-copy-source + if HEADER_COPY_SOURCE in headers: + return await handler.handle_upload_part_copy(request, creds) return await handler.handle_upload_part(request, creds) if method == METHOD_POST: return await handler.handle_complete_multipart_upload(request, creds) @@ -287,8 +297,18 @@ async def _handle_object_operation( creds: S3Credentials, handler: S3ProxyHandler, method: str, + query: str, ) -> "PlainTextResponse": """Handle standard object operations.""" + # Object tagging operations + if QUERY_TAGGING in query: + if method == METHOD_GET: + return await handler.handle_get_object_tagging(request, creds) + if method == METHOD_PUT: + return await handler.handle_put_object_tagging(request, creds) + if method == METHOD_DELETE: + return await handler.handle_delete_object_tagging(request, creds) + if method == METHOD_GET: return await handler.handle_get_object(request, creds) if method == METHOD_PUT: @@ -304,20 +324,20 @@ async def _handle_object_operation( # Throttling Middleware # ============================================================================ def throttle(app: FastAPI, max_requests: int): - """Wrap app with throttling middleware.""" + """Wrap app with throttling middleware. + + Limits concurrent requests to max_requests. When limit is reached, + additional requests wait in queue instead of being rejected. + This provides memory-bounded execution with graceful backpressure. + """ semaphore = asyncio.Semaphore(max_requests) async def middleware(scope, receive, send): if scope["type"] != "http": return await app(scope, receive, send) - # Atomic acquire - no TOCTOU race - try: - semaphore.acquire_nowait() - except ValueError: - from fastapi.responses import Response - response = Response("Too Many Requests", status_code=429) - return await response(scope, receive, send) + # Wait for slot to become available (queues requests) + await semaphore.acquire() try: await app(scope, receive, send) @@ -337,7 +357,6 @@ def create_app(settings: Settings | None = None) -> FastAPI: # Load credentials and initialize components credentials_store = load_credentials() multipart_manager = MultipartStateManager( - max_concurrent=settings.max_concurrent_uploads, ttl_seconds=settings.redis_upload_ttl_seconds, ) verifier = SigV4Verifier(credentials_store) diff --git a/s3proxy/multipart.py b/s3proxy/multipart.py index 82afd5a..0b3936b 100644 --- a/s3proxy/multipart.py +++ b/s3proxy/multipart.py @@ -1,6 +1,5 @@ """Multipart upload state management.""" -import asyncio import base64 import contextlib import gzip @@ -41,9 +40,11 @@ def json_loads(data: bytes) -> dict: logger = structlog.get_logger() -# Metadata suffix for multipart uploads -META_SUFFIX = ".s3proxy-meta" -UPLOAD_STATE_SUFFIX = ".s3proxy-upload-" +# Internal prefix for all s3proxy metadata (hidden from list operations) +INTERNAL_PREFIX = ".s3proxy-internal/" + +# Legacy suffix for backwards compatibility detection +META_SUFFIX_LEGACY = ".s3proxy-meta" # Redis key prefix for upload state REDIS_KEY_PREFIX = "s3proxy:upload:" @@ -51,14 +52,31 @@ def json_loads(data: bytes) -> dict: # Module-level Redis client (initialized by init_redis) _redis_client: "Redis | None" = None +# Flag to track if we're using Redis or in-memory storage +_use_redis: bool = False + + +async def init_redis(redis_url: str | None) -> "Redis | None": + """Initialize Redis connection pool if URL is provided. + + Args: + redis_url: Redis URL or None/empty to use in-memory storage + + Returns: + Redis client if connected, None if using in-memory storage + """ + global _redis_client, _use_redis + + if not redis_url: + logger.info("Redis URL not configured, using in-memory storage (single-instance mode)") + _use_redis = False + return None -async def init_redis(redis_url: str) -> "Redis": - """Initialize Redis connection pool.""" - global _redis_client _redis_client = redis.from_url(redis_url, decode_responses=False) # Test connection await _redis_client.ping() - logger.info("Redis connected", url=redis_url) + _use_redis = True + logger.info("Redis connected (HA mode)", url=redis_url) return _redis_client @@ -78,6 +96,11 @@ def get_redis() -> "Redis": return _redis_client +def is_using_redis() -> bool: + """Check if we're using Redis or in-memory storage.""" + return _use_redis + + @dataclass(slots=True) class PartMetadata: """Metadata for an encrypted part.""" @@ -161,20 +184,24 @@ def _deserialize_upload_state(data: bytes) -> MultipartUploadState: class MultipartStateManager: - """Manages multipart upload state in Redis.""" + """Manages multipart upload state in Redis or in-memory. + + Uses Redis when configured (for HA/multi-instance deployments). + Falls back to in-memory storage for single-instance deployments. + """ - def __init__(self, max_concurrent: int = 10, ttl_seconds: int = 86400): + def __init__(self, ttl_seconds: int = 86400): """Initialize state manager. Args: - max_concurrent: Max concurrent uploads (per-pod limit) ttl_seconds: TTL for upload state in Redis (default 24h) """ - self._semaphore = asyncio.Semaphore(max_concurrent) self._ttl = ttl_seconds + # In-memory storage for single-instance mode + self._memory_store: dict[str, MultipartUploadState] = {} - def _redis_key(self, bucket: str, key: str, upload_id: str) -> str: - """Generate Redis key for upload state.""" + def _storage_key(self, bucket: str, key: str, upload_id: str) -> str: + """Generate storage key for upload state.""" return f"{REDIS_KEY_PREFIX}{bucket}:{key}:{upload_id}" async def create_upload( @@ -184,7 +211,7 @@ async def create_upload( upload_id: str, dek: bytes, ) -> MultipartUploadState: - """Create new upload state in Redis.""" + """Create new upload state.""" state = MultipartUploadState( dek=dek, bucket=bucket, @@ -192,22 +219,30 @@ async def create_upload( upload_id=upload_id, ) - redis_client = get_redis() - rk = self._redis_key(bucket, key, upload_id) - await redis_client.set(rk, _serialize_upload_state(state), ex=self._ttl) + sk = self._storage_key(bucket, key, upload_id) + + if is_using_redis(): + redis_client = get_redis() + await redis_client.set(sk, _serialize_upload_state(state), ex=self._ttl) + else: + self._memory_store[sk] = state return state async def get_upload( self, bucket: str, key: str, upload_id: str ) -> MultipartUploadState | None: - """Get upload state from Redis.""" - redis_client = get_redis() - rk = self._redis_key(bucket, key, upload_id) - data = await redis_client.get(rk) - if data is None: - return None - return _deserialize_upload_state(data) + """Get upload state.""" + sk = self._storage_key(bucket, key, upload_id) + + if is_using_redis(): + redis_client = get_redis() + data = await redis_client.get(sk) + if data is None: + return None + return _deserialize_upload_state(data) + else: + return self._memory_store.get(sk) async def add_part( self, @@ -216,69 +251,74 @@ async def add_part( upload_id: str, part: PartMetadata, ) -> None: - """Add part to upload state in Redis.""" - redis_client = get_redis() - rk = self._redis_key(bucket, key, upload_id) - - # Use WATCH/MULTI for atomic update - async with redis_client.pipeline(transaction=True) as pipe: - try: - await pipe.watch(rk) - data = await redis_client.get(rk) - if data is None: - await pipe.unwatch() - return - - state = _deserialize_upload_state(data) + """Add part to upload state.""" + sk = self._storage_key(bucket, key, upload_id) + + if is_using_redis(): + redis_client = get_redis() + # Use WATCH/MULTI for atomic update + async with redis_client.pipeline(transaction=True) as pipe: + try: + await pipe.watch(sk) + data = await redis_client.get(sk) + if data is None: + await pipe.unwatch() + return + + state = _deserialize_upload_state(data) + state.parts[part.part_number] = part + state.total_plaintext_size += part.plaintext_size + + pipe.multi() + pipe.set(sk, _serialize_upload_state(state), ex=self._ttl) + await pipe.execute() + except redis.WatchError: + # Retry on concurrent modification + logger.warning("Redis watch error, retrying add_part", key=sk) + await self.add_part(bucket, key, upload_id, part) + else: + state = self._memory_store.get(sk) + if state is not None: state.parts[part.part_number] = part state.total_plaintext_size += part.plaintext_size - pipe.multi() - pipe.set(rk, _serialize_upload_state(state), ex=self._ttl) - await pipe.execute() - except redis.WatchError: - # Retry on concurrent modification - logger.warning("Redis watch error, retrying add_part", key=rk) - await self.add_part(bucket, key, upload_id, part) - async def complete_upload( self, bucket: str, key: str, upload_id: str ) -> MultipartUploadState | None: - """Remove and return upload state from Redis on completion.""" - redis_client = get_redis() - rk = self._redis_key(bucket, key, upload_id) - - # Get and delete atomically - async with redis_client.pipeline(transaction=True) as pipe: - try: - await pipe.watch(rk) - data = await redis_client.get(rk) - if data is None: - await pipe.unwatch() - return None - - state = _deserialize_upload_state(data) - pipe.multi() - pipe.delete(rk) - await pipe.execute() - return state - except redis.WatchError: - logger.warning("Redis watch error, retrying complete_upload", key=rk) - return await self.complete_upload(bucket, key, upload_id) + """Remove and return upload state on completion.""" + sk = self._storage_key(bucket, key, upload_id) + + if is_using_redis(): + redis_client = get_redis() + # Get and delete atomically + async with redis_client.pipeline(transaction=True) as pipe: + try: + await pipe.watch(sk) + data = await redis_client.get(sk) + if data is None: + await pipe.unwatch() + return None + + state = _deserialize_upload_state(data) + pipe.multi() + pipe.delete(sk) + await pipe.execute() + return state + except redis.WatchError: + logger.warning("Redis watch error, retrying complete_upload", key=sk) + return await self.complete_upload(bucket, key, upload_id) + else: + return self._memory_store.pop(sk, None) async def abort_upload(self, bucket: str, key: str, upload_id: str) -> None: - """Remove upload state from Redis on abort.""" - redis_client = get_redis() - rk = self._redis_key(bucket, key, upload_id) - await redis_client.delete(rk) - - async def acquire_slot(self) -> None: - """Acquire an upload slot (per-pod limit).""" - await self._semaphore.acquire() + """Remove upload state on abort.""" + sk = self._storage_key(bucket, key, upload_id) - def release_slot(self) -> None: - """Release an upload slot.""" - self._semaphore.release() + if is_using_redis(): + redis_client = get_redis() + await redis_client.delete(sk) + else: + self._memory_store.pop(sk, None) def encode_multipart_metadata(meta: MultipartMetadata) -> str: @@ -329,6 +369,16 @@ def decode_multipart_metadata(encoded: str) -> MultipartMetadata: ) +def _internal_upload_key(key: str, upload_id: str) -> str: + """Get internal key for upload state.""" + return f"{INTERNAL_PREFIX}{key}.upload-{upload_id}" + + +def _internal_meta_key(key: str) -> str: + """Get internal key for multipart metadata.""" + return f"{INTERNAL_PREFIX}{key}.meta" + + async def persist_upload_state( s3_client: S3Client, bucket: str, @@ -337,7 +387,7 @@ async def persist_upload_state( wrapped_dek: bytes, ) -> None: """Persist DEK to S3 during upload.""" - state_key = f"{key}{UPLOAD_STATE_SUFFIX}{upload_id}" + state_key = _internal_upload_key(key, upload_id) data = {"dek": base64.b64encode(wrapped_dek).decode()} await s3_client.put_object( @@ -356,7 +406,7 @@ async def load_upload_state( kek: bytes, ) -> bytes | None: """Load DEK from S3 for resumed upload.""" - state_key = f"{key}{UPLOAD_STATE_SUFFIX}{upload_id}" + state_key = _internal_upload_key(key, upload_id) try: response = await s3_client.get_object(bucket, state_key) @@ -376,7 +426,7 @@ async def delete_upload_state( upload_id: str, ) -> None: """Delete persisted upload state.""" - state_key = f"{key}{UPLOAD_STATE_SUFFIX}{upload_id}" + state_key = _internal_upload_key(key, upload_id) with contextlib.suppress(Exception): await s3_client.delete_object(bucket, state_key) @@ -388,7 +438,7 @@ async def save_multipart_metadata( meta: MultipartMetadata, ) -> None: """Save multipart metadata to S3.""" - meta_key = f"{key}{META_SUFFIX}" + meta_key = _internal_meta_key(key) encoded = encode_multipart_metadata(meta) await s3_client.put_object( @@ -404,14 +454,27 @@ async def load_multipart_metadata( bucket: str, key: str, ) -> MultipartMetadata | None: - """Load multipart metadata from S3.""" - meta_key = f"{key}{META_SUFFIX}" + """Load multipart metadata from S3. + Checks the new internal prefix first, then falls back to legacy location. + """ + # Try new location first + meta_key = _internal_meta_key(key) try: response = await s3_client.get_object(bucket, meta_key) body = await response["Body"].read() encoded = body.decode() return decode_multipart_metadata(encoded) + except Exception: + pass + + # Fall back to legacy location for backwards compatibility + legacy_key = f"{key}{META_SUFFIX_LEGACY}" + try: + response = await s3_client.get_object(bucket, legacy_key) + body = await response["Body"].read() + encoded = body.decode() + return decode_multipart_metadata(encoded) except Exception: return None @@ -421,11 +484,17 @@ async def delete_multipart_metadata( bucket: str, key: str, ) -> None: - """Delete multipart metadata from S3.""" - meta_key = f"{key}{META_SUFFIX}" + """Delete multipart metadata from S3 (both new and legacy locations).""" + # Delete from new location + meta_key = _internal_meta_key(key) with contextlib.suppress(Exception): await s3_client.delete_object(bucket, meta_key) + # Also delete legacy location if it exists + legacy_key = f"{key}{META_SUFFIX_LEGACY}" + with contextlib.suppress(Exception): + await s3_client.delete_object(bucket, legacy_key) + def calculate_part_range( parts: list[PartMetadata], diff --git a/s3proxy/s3client.py b/s3proxy/s3client.py index 1a0c282..4bbabdf 100644 --- a/s3proxy/s3client.py +++ b/s3proxy/s3client.py @@ -662,3 +662,85 @@ async def list_parts( if part_number_marker: kwargs["PartNumberMarker"] = part_number_marker return await client.list_parts(**kwargs) + + async def list_buckets(self) -> dict[str, Any]: + """List all buckets owned by the authenticated user.""" + client = await self._client() + return await client.list_buckets() + + async def list_objects_v1( + self, + bucket: str, + prefix: str | None = None, + marker: str | None = None, + delimiter: str | None = None, + max_keys: int = 1000, + ) -> dict[str, Any]: + """List objects in bucket using V1 API. + + Args: + bucket: Bucket name + prefix: Filter by key prefix + marker: Key to start listing after + delimiter: Delimiter for grouping keys + max_keys: Maximum keys to return + """ + client = await self._client() + kwargs: dict[str, Any] = {"Bucket": bucket, "MaxKeys": max_keys} + if prefix: + kwargs["Prefix"] = prefix + if marker: + kwargs["Marker"] = marker + if delimiter: + kwargs["Delimiter"] = delimiter + return await client.list_objects(**kwargs) + + async def get_object_tagging(self, bucket: str, key: str) -> dict[str, Any]: + """Get object tags.""" + client = await self._client() + return await client.get_object_tagging(Bucket=bucket, Key=key) + + async def put_object_tagging( + self, bucket: str, key: str, tags: list[dict[str, str]] + ) -> dict[str, Any]: + """Set object tags.""" + client = await self._client() + return await client.put_object_tagging( + Bucket=bucket, Key=key, Tagging={"TagSet": tags} + ) + + async def delete_object_tagging(self, bucket: str, key: str) -> dict[str, Any]: + """Delete object tags.""" + client = await self._client() + return await client.delete_object_tagging(Bucket=bucket, Key=key) + + async def upload_part_copy( + self, + bucket: str, + key: str, + upload_id: str, + part_number: int, + copy_source: str, + copy_source_range: str | None = None, + ) -> dict[str, Any]: + """Copy a part from another object. + + Args: + bucket: Destination bucket + key: Destination key + upload_id: Multipart upload ID + part_number: Part number + copy_source: Source in format "bucket/key" + copy_source_range: Optional byte range (e.g., "bytes=0-999") + """ + client = await self._client() + kwargs: dict[str, Any] = { + "Bucket": bucket, + "Key": key, + "UploadId": upload_id, + "PartNumber": part_number, + "CopySource": copy_source, + } + if copy_source_range: + kwargs["CopySourceRange"] = copy_source_range + return await client.upload_part_copy(**kwargs) diff --git a/s3proxy/xml_responses.py b/s3proxy/xml_responses.py index 06ae95a..64dd0df 100644 --- a/s3proxy/xml_responses.py +++ b/s3proxy/xml_responses.py @@ -233,3 +233,121 @@ def list_parts( {str(is_truncated).lower()} {storage_class}{parts_xml} """ + + +def list_buckets(owner: dict, buckets: list[dict]) -> str: + """Build ListAllMyBucketsResult XML. + + Args: + owner: Owner dict with ID and DisplayName + buckets: List of bucket dicts with Name and CreationDate + """ + buckets_xml = "" + for b in buckets: + creation_date = b.get("CreationDate", "") + if hasattr(creation_date, "isoformat"): + creation_date = creation_date.isoformat() + buckets_xml += f""" + + {escape(b.get("Name", ""))} + {creation_date} + """ + + return f""" + + + {escape(owner.get("ID", ""))} + {escape(owner.get("DisplayName", ""))} + + {buckets_xml} + +""" + + +def list_objects_v1( + bucket: str, + prefix: str, + marker: str | None, + delimiter: str | None, + max_keys: int, + is_truncated: bool, + next_marker: str | None, + objects: list[dict], + common_prefixes: list[str] | None = None, +) -> str: + """Build ListBucketResult XML for V1 API. + + Args: + bucket: Bucket name + prefix: Prefix filter + marker: Marker from request + delimiter: Delimiter for grouping + max_keys: Max keys requested + is_truncated: Whether there are more results + next_marker: Next marker for pagination + objects: List of object dicts + common_prefixes: List of common prefix strings + """ + objects_xml = "" + for obj in objects: + objects_xml += f""" + + {escape(obj["key"])} + {obj["last_modified"]} + "{obj["etag"]}" + {obj["size"]} + {obj.get("storage_class", "STANDARD")} + """ + + marker_xml = f"{escape(marker or '')}" + next_marker_xml = f"{escape(next_marker or '')}" if next_marker else "" + delimiter_xml = f"{escape(delimiter)}" if delimiter else "" + + prefixes_xml = "" + if common_prefixes: + for cp in common_prefixes: + prefixes_xml += f""" + + {escape(cp)} + """ + + return f""" + + {bucket} + {escape(prefix)} + {marker_xml} + {delimiter_xml} + {max_keys} + {str(is_truncated).lower()} + {next_marker_xml}{objects_xml}{prefixes_xml} +""" + + +def get_tagging(tags: list[dict]) -> str: + """Build GetObjectTaggingResult XML. + + Args: + tags: List of tag dicts with Key and Value + """ + tags_xml = "" + for tag in tags: + tags_xml += f""" + + {escape(tag.get("Key", ""))} + {escape(tag.get("Value", ""))} + """ + + return f""" + + {tags_xml} + +""" + + +def upload_part_copy_result(etag: str, last_modified: str) -> str: + """Build CopyPartResult XML.""" + return f""" + + "{etag}" + {last_modified} +""" diff --git a/tests/conftest.py b/tests/conftest.py index 8a2f8f3..35a77e8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -448,6 +448,148 @@ async def list_parts( "StorageClass": "STANDARD", } + async def list_buckets(self) -> dict: + """List all buckets.""" + self.call_history.append(("list_buckets", {})) + buckets = [ + {"Name": name, "CreationDate": info["CreationDate"]} + for name, info in self.buckets.items() + ] + return { + "Owner": {"ID": "owner-id-123", "DisplayName": "test-owner"}, + "Buckets": buckets, + } + + async def list_objects_v1( + self, + bucket: str, + prefix: str | None = None, + marker: str | None = None, + delimiter: str | None = None, + max_keys: int = 1000, + ) -> dict: + """List objects in bucket using V1 API.""" + self.call_history.append(("list_objects_v1", {"bucket": bucket, "prefix": prefix, "marker": marker})) + contents = [] + common_prefixes = set() + prefix = prefix or "" + + for obj_key, obj in sorted(self.objects.items()): + b, k = obj_key.split("/", 1) + if b != bucket or not k.startswith(prefix): + continue + if marker and k <= marker: + continue + + # Handle delimiter for grouping + if delimiter: + suffix = k[len(prefix):] + if delimiter in suffix: + common_prefix = prefix + suffix[:suffix.index(delimiter) + len(delimiter)] + common_prefixes.add(common_prefix) + continue + + contents.append({ + "Key": k, + "Size": obj["ContentLength"], + "ETag": obj["ETag"], + "LastModified": obj["LastModified"], + "StorageClass": "STANDARD", + }) + + is_truncated = len(contents) > max_keys + contents = contents[:max_keys] + next_marker = contents[-1]["Key"] if is_truncated and contents else None + + return { + "Contents": contents, + "CommonPrefixes": [{"Prefix": cp} for cp in sorted(common_prefixes)], + "IsTruncated": is_truncated, + "NextMarker": next_marker, + } + + async def get_object_tagging(self, bucket: str, key: str) -> dict: + """Get object tags.""" + self.call_history.append(("get_object_tagging", {"bucket": bucket, "key": key})) + obj_key = self._key(bucket, key) + if obj_key not in self.objects: + raise self._not_found_error(key) + + obj = self.objects[obj_key] + return {"TagSet": obj.get("Tags", [])} + + async def put_object_tagging( + self, bucket: str, key: str, tags: list[dict[str, str]] + ) -> dict: + """Set object tags.""" + self.call_history.append(("put_object_tagging", {"bucket": bucket, "key": key, "tags": tags})) + obj_key = self._key(bucket, key) + if obj_key not in self.objects: + raise self._not_found_error(key) + + self.objects[obj_key]["Tags"] = tags + return {} + + async def delete_object_tagging(self, bucket: str, key: str) -> dict: + """Delete object tags.""" + self.call_history.append(("delete_object_tagging", {"bucket": bucket, "key": key})) + obj_key = self._key(bucket, key) + if obj_key not in self.objects: + raise self._not_found_error(key) + + self.objects[obj_key]["Tags"] = [] + return {} + + async def upload_part_copy( + self, + bucket: str, + key: str, + upload_id: str, + part_number: int, + copy_source: str, + copy_source_range: str | None = None, + ) -> dict: + """Copy a part from another object.""" + self.call_history.append(("upload_part_copy", { + "bucket": bucket, "key": key, "upload_id": upload_id, + "part_number": part_number, "copy_source": copy_source, + })) + if upload_id not in self.multipart_uploads: + raise self._not_found_error(f"upload {upload_id}") + + # Parse source + source = copy_source.lstrip("/") + src_bucket, src_key = source.split("/", 1) + src_obj_key = self._key(src_bucket, src_key) + + if src_obj_key not in self.objects: + raise self._not_found_error(src_key) + + src_obj = self.objects[src_obj_key] + body = src_obj["Body"] + + # Handle range if specified + if copy_source_range: + range_spec = copy_source_range.replace("bytes=", "") + start, end = range_spec.split("-") + start = int(start) + end = int(end) + body = body[start:end + 1] + + etag = hashlib.md5(body).hexdigest() + self.multipart_uploads[upload_id]["Parts"][part_number] = { + "Body": body, + "ETag": etag, + "Size": len(body), + "LastModified": datetime.now(UTC), + } + return { + "CopyPartResult": { + "ETag": f'"{etag}"', + "LastModified": datetime.now(UTC), + } + } + def _not_found_error(self, key: str): """Create a NoSuchKey error.""" error = Exception(f"NoSuchKey: {key}") @@ -532,6 +674,24 @@ async def list_multipart_uploads(self, *args, **kwargs): async def list_parts(self, *args, **kwargs): return await self._mock.list_parts(*args, **kwargs) + async def list_buckets(self, *args, **kwargs): + return await self._mock.list_buckets(*args, **kwargs) + + async def list_objects_v1(self, *args, **kwargs): + return await self._mock.list_objects_v1(*args, **kwargs) + + async def get_object_tagging(self, *args, **kwargs): + return await self._mock.get_object_tagging(*args, **kwargs) + + async def put_object_tagging(self, *args, **kwargs): + return await self._mock.put_object_tagging(*args, **kwargs) + + async def delete_object_tagging(self, *args, **kwargs): + return await self._mock.delete_object_tagging(*args, **kwargs) + + async def upload_part_copy(self, *args, **kwargs): + return await self._mock.upload_part_copy(*args, **kwargs) + return PatchedS3Client(mock_s3) @@ -543,7 +703,7 @@ async def list_parts(self, *args, **kwargs): @pytest.fixture def multipart_manager(): """Create a multipart state manager.""" - return MultipartStateManager(max_concurrent=10) + return MultipartStateManager() # ============================================================================ diff --git a/tests/test_handlers.py b/tests/test_handlers.py index bccc762..f63dfc5 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -108,8 +108,7 @@ def test_s3_endpoint_without_scheme(self): def test_size_calculations(self, settings): """Test size calculations.""" - assert settings.max_single_encrypted_bytes == 16 * 1024 * 1024 - assert settings.auto_multipart_bytes == 16 * 1024 * 1024 + assert settings.max_upload_size_bytes == 45 * 1024 * 1024 class TestRangeParsing: diff --git a/tests/test_integration.py b/tests/test_integration.py index 4985928..4f52837 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -410,3 +410,290 @@ async def test_call_history_params(self, mock_s3): assert len(put_calls) == 1 assert put_calls[0][1]["bucket"] == "my-bucket" assert put_calls[0][1]["key"] == "my-key" + + +class TestListBuckets: + """Test ListBuckets operation.""" + + @pytest.mark.asyncio + async def test_list_buckets_empty(self, mock_s3): + """Test listing when no buckets exist.""" + resp = await mock_s3.list_buckets() + assert "Buckets" in resp + assert resp["Buckets"] == [] + assert "Owner" in resp + + @pytest.mark.asyncio + async def test_list_buckets_with_buckets(self, mock_s3): + """Test listing multiple buckets.""" + await mock_s3.create_bucket("bucket-a") + await mock_s3.create_bucket("bucket-b") + await mock_s3.create_bucket("bucket-c") + + resp = await mock_s3.list_buckets() + assert len(resp["Buckets"]) == 3 + + bucket_names = [b["Name"] for b in resp["Buckets"]] + assert "bucket-a" in bucket_names + assert "bucket-b" in bucket_names + assert "bucket-c" in bucket_names + + @pytest.mark.asyncio + async def test_list_buckets_owner_info(self, mock_s3): + """Test owner information is returned.""" + await mock_s3.create_bucket("test-bucket") + + resp = await mock_s3.list_buckets() + assert "Owner" in resp + assert "ID" in resp["Owner"] + assert "DisplayName" in resp["Owner"] + + +class TestListObjectsV1: + """Test ListObjects V1 API.""" + + @pytest.mark.asyncio + async def test_list_objects_v1_basic(self, mock_s3): + """Test basic V1 list objects.""" + await mock_s3.create_bucket("test-bucket") + await mock_s3.put_object("test-bucket", "file1.txt", b"data1") + await mock_s3.put_object("test-bucket", "file2.txt", b"data2") + + resp = await mock_s3.list_objects_v1("test-bucket") + assert len(resp["Contents"]) == 2 + keys = [obj["Key"] for obj in resp["Contents"]] + assert "file1.txt" in keys + assert "file2.txt" in keys + + @pytest.mark.asyncio + async def test_list_objects_v1_with_prefix(self, mock_s3): + """Test V1 list with prefix filter.""" + await mock_s3.create_bucket("test-bucket") + await mock_s3.put_object("test-bucket", "dir/file1.txt", b"data1") + await mock_s3.put_object("test-bucket", "dir/file2.txt", b"data2") + await mock_s3.put_object("test-bucket", "other/file3.txt", b"data3") + + resp = await mock_s3.list_objects_v1("test-bucket", prefix="dir/") + assert len(resp["Contents"]) == 2 + keys = [obj["Key"] for obj in resp["Contents"]] + assert all(k.startswith("dir/") for k in keys) + + @pytest.mark.asyncio + async def test_list_objects_v1_with_delimiter(self, mock_s3): + """Test V1 list with delimiter for grouping.""" + await mock_s3.create_bucket("test-bucket") + await mock_s3.put_object("test-bucket", "root.txt", b"data") + await mock_s3.put_object("test-bucket", "dir1/file1.txt", b"data") + await mock_s3.put_object("test-bucket", "dir1/file2.txt", b"data") + await mock_s3.put_object("test-bucket", "dir2/file3.txt", b"data") + + resp = await mock_s3.list_objects_v1("test-bucket", delimiter="/") + # Should have root.txt in Contents and dir1/, dir2/ in CommonPrefixes + assert len(resp["Contents"]) == 1 + assert resp["Contents"][0]["Key"] == "root.txt" + common_prefixes = [cp["Prefix"] for cp in resp["CommonPrefixes"]] + assert "dir1/" in common_prefixes + assert "dir2/" in common_prefixes + + @pytest.mark.asyncio + async def test_list_objects_v1_with_marker(self, mock_s3): + """Test V1 list with marker for pagination.""" + await mock_s3.create_bucket("test-bucket") + await mock_s3.put_object("test-bucket", "a.txt", b"data") + await mock_s3.put_object("test-bucket", "b.txt", b"data") + await mock_s3.put_object("test-bucket", "c.txt", b"data") + + resp = await mock_s3.list_objects_v1("test-bucket", marker="a.txt") + keys = [obj["Key"] for obj in resp["Contents"]] + assert "a.txt" not in keys + assert "b.txt" in keys + assert "c.txt" in keys + + +class TestInternalPrefixFiltering: + """Test that internal s3proxy objects are hidden from list operations.""" + + @pytest.mark.asyncio + async def test_internal_prefix_hidden(self, mock_s3): + """Test .s3proxy-internal/ prefix objects are hidden.""" + from s3proxy.multipart import INTERNAL_PREFIX + + await mock_s3.create_bucket("test-bucket") + # Add regular objects + await mock_s3.put_object("test-bucket", "file1.txt", b"data1") + await mock_s3.put_object("test-bucket", "file2.txt", b"data2") + # Add internal metadata object + await mock_s3.put_object("test-bucket", f"{INTERNAL_PREFIX}file1.txt.meta", b"meta") + + resp = await mock_s3.list_objects_v2("test-bucket") + keys = [obj["Key"] for obj in resp.get("Contents", [])] + + assert "file1.txt" in keys + assert "file2.txt" in keys + # Mock returns all - filtering is done in the handler layer + assert f"{INTERNAL_PREFIX}file1.txt.meta" in keys + + @pytest.mark.asyncio + async def test_legacy_suffix_hidden(self, mock_s3): + """Test legacy .s3proxy-meta suffix objects are hidden.""" + from s3proxy.multipart import META_SUFFIX_LEGACY + + await mock_s3.create_bucket("test-bucket") + await mock_s3.put_object("test-bucket", "file1.txt", b"data1") + await mock_s3.put_object("test-bucket", f"file1.txt{META_SUFFIX_LEGACY}", b"meta") + + resp = await mock_s3.list_objects_v2("test-bucket") + keys = [obj["Key"] for obj in resp.get("Contents", [])] + + assert "file1.txt" in keys + # Mock returns all - filtering is done in the handler layer + assert f"file1.txt{META_SUFFIX_LEGACY}" in keys + + +class TestObjectTagging: + """Test object tagging operations.""" + + @pytest.mark.asyncio + async def test_put_and_get_tags(self, mock_s3): + """Test setting and getting object tags.""" + await mock_s3.create_bucket("test-bucket") + await mock_s3.put_object("test-bucket", "file.txt", b"data") + + tags = [ + {"Key": "Environment", "Value": "Production"}, + {"Key": "Project", "Value": "S3Proxy"}, + ] + await mock_s3.put_object_tagging("test-bucket", "file.txt", tags) + + resp = await mock_s3.get_object_tagging("test-bucket", "file.txt") + assert len(resp["TagSet"]) == 2 + + tag_dict = {t["Key"]: t["Value"] for t in resp["TagSet"]} + assert tag_dict["Environment"] == "Production" + assert tag_dict["Project"] == "S3Proxy" + + @pytest.mark.asyncio + async def test_delete_tags(self, mock_s3): + """Test deleting object tags.""" + await mock_s3.create_bucket("test-bucket") + await mock_s3.put_object("test-bucket", "file.txt", b"data") + + tags = [{"Key": "Temp", "Value": "true"}] + await mock_s3.put_object_tagging("test-bucket", "file.txt", tags) + + # Verify tags exist + resp = await mock_s3.get_object_tagging("test-bucket", "file.txt") + assert len(resp["TagSet"]) == 1 + + # Delete tags + await mock_s3.delete_object_tagging("test-bucket", "file.txt") + + # Verify tags are gone + resp = await mock_s3.get_object_tagging("test-bucket", "file.txt") + assert len(resp["TagSet"]) == 0 + + @pytest.mark.asyncio + async def test_get_tags_nonexistent_object(self, mock_s3): + """Test getting tags from non-existent object.""" + await mock_s3.create_bucket("test-bucket") + + with pytest.raises(Exception) as exc_info: + await mock_s3.get_object_tagging("test-bucket", "nonexistent.txt") + assert "NoSuchKey" in str(exc_info.value) + + @pytest.mark.asyncio + async def test_object_without_tags(self, mock_s3): + """Test getting tags from object that has no tags.""" + await mock_s3.create_bucket("test-bucket") + await mock_s3.put_object("test-bucket", "file.txt", b"data") + + resp = await mock_s3.get_object_tagging("test-bucket", "file.txt") + assert resp["TagSet"] == [] + + +class TestUploadPartCopy: + """Test UploadPartCopy operation.""" + + @pytest.mark.asyncio + async def test_upload_part_copy_basic(self, mock_s3): + """Test basic part copy.""" + await mock_s3.create_bucket("test-bucket") + await mock_s3.put_object("test-bucket", "source.txt", b"0123456789ABCDEF") + + # Start multipart upload + resp = await mock_s3.create_multipart_upload("test-bucket", "dest.txt") + upload_id = resp["UploadId"] + + # Copy entire source as part 1 + copy_resp = await mock_s3.upload_part_copy( + "test-bucket", "dest.txt", upload_id, 1, + "test-bucket/source.txt" + ) + assert "CopyPartResult" in copy_resp + assert "ETag" in copy_resp["CopyPartResult"] + + @pytest.mark.asyncio + async def test_upload_part_copy_with_range(self, mock_s3): + """Test part copy with byte range.""" + await mock_s3.create_bucket("test-bucket") + await mock_s3.put_object("test-bucket", "source.txt", b"0123456789ABCDEF") + + resp = await mock_s3.create_multipart_upload("test-bucket", "dest.txt") + upload_id = resp["UploadId"] + + # Copy partial range + await mock_s3.upload_part_copy( + "test-bucket", "dest.txt", upload_id, 1, + "test-bucket/source.txt", + copy_source_range="bytes=0-7" + ) + + # Complete and verify + list_resp = await mock_s3.list_parts("test-bucket", "dest.txt", upload_id) + assert len(list_resp["Parts"]) == 1 + assert list_resp["Parts"][0]["Size"] == 8 # bytes 0-7 inclusive + + @pytest.mark.asyncio + async def test_upload_part_copy_nonexistent_source(self, mock_s3): + """Test copying from non-existent source.""" + await mock_s3.create_bucket("test-bucket") + + resp = await mock_s3.create_multipart_upload("test-bucket", "dest.txt") + upload_id = resp["UploadId"] + + with pytest.raises(Exception) as exc_info: + await mock_s3.upload_part_copy( + "test-bucket", "dest.txt", upload_id, 1, + "test-bucket/nonexistent.txt" + ) + assert "NoSuchKey" in str(exc_info.value) + + @pytest.mark.asyncio + async def test_upload_part_copy_complete_multipart(self, mock_s3): + """Test completing multipart with copied parts.""" + await mock_s3.create_bucket("test-bucket") + await mock_s3.put_object("test-bucket", "part1.txt", b"AAAA") + await mock_s3.put_object("test-bucket", "part2.txt", b"BBBB") + + resp = await mock_s3.create_multipart_upload("test-bucket", "combined.txt") + upload_id = resp["UploadId"] + + # Copy parts + resp1 = await mock_s3.upload_part_copy( + "test-bucket", "combined.txt", upload_id, 1, "test-bucket/part1.txt" + ) + resp2 = await mock_s3.upload_part_copy( + "test-bucket", "combined.txt", upload_id, 2, "test-bucket/part2.txt" + ) + + # Complete + parts = [ + {"PartNumber": 1, "ETag": resp1["CopyPartResult"]["ETag"]}, + {"PartNumber": 2, "ETag": resp2["CopyPartResult"]["ETag"]}, + ] + await mock_s3.complete_multipart_upload("test-bucket", "combined.txt", upload_id, parts) + + # Verify combined object + get_resp = await mock_s3.get_object("test-bucket", "combined.txt") + data = await get_resp["Body"].read() + assert data == b"AAAABBBB" diff --git a/tests/test_multipart.py b/tests/test_multipart.py index a98d845..13618a6 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -98,25 +98,6 @@ async def test_abort_upload(self): assert await manager.get_upload("bucket", "key", "upload-123") is None - @pytest.mark.asyncio - async def test_semaphore_limits_concurrent_uploads(self): - """Test semaphore limits concurrent uploads.""" - manager = MultipartStateManager(max_concurrent=2) - - # Acquire two slots - await manager.acquire_slot() - await manager.acquire_slot() - - # Third should timeout - with pytest.raises(asyncio.TimeoutError): - await asyncio.wait_for(manager.acquire_slot(), timeout=0.01) - - # Release one - manager.release_slot() - - # Now we can acquire again - await asyncio.wait_for(manager.acquire_slot(), timeout=0.1) - class TestMetadataEncoding: """Test metadata encoding/decoding.""" diff --git a/tests/test_routing.py b/tests/test_routing.py index c84b836..5f38eba 100644 --- a/tests/test_routing.py +++ b/tests/test_routing.py @@ -13,6 +13,7 @@ QUERY_LIST_TYPE, QUERY_LOCATION, QUERY_PART_NUMBER, + QUERY_TAGGING, QUERY_UPLOAD_ID, QUERY_UPLOADS, _handle_bucket_operation, @@ -93,6 +94,10 @@ def test_delete_constant(self): """Test delete query constant.""" assert QUERY_DELETE == "delete" + def test_tagging_constant(self): + """Test tagging query constant.""" + assert QUERY_TAGGING == "tagging" + class TestHeaderConstants: """Test header constants are correct.""" @@ -180,6 +185,37 @@ def test_copy_object_detected(self): method = METHOD_PUT assert HEADER_COPY_SOURCE in headers and method == METHOD_PUT + def test_get_object_tagging_detected(self): + """Test get object tagging is detected.""" + query = "tagging" + method = METHOD_GET + assert QUERY_TAGGING in query and method == METHOD_GET + + def test_put_object_tagging_detected(self): + """Test put object tagging is detected.""" + query = "tagging" + method = METHOD_PUT + assert QUERY_TAGGING in query and method == METHOD_PUT + + def test_delete_object_tagging_detected(self): + """Test delete object tagging is detected.""" + query = "tagging" + method = METHOD_DELETE + assert QUERY_TAGGING in query and method == METHOD_DELETE + + def test_upload_part_copy_detected(self): + """Test upload part copy is detected.""" + query = "uploadId=abc123&partNumber=1" + headers = {"x-amz-copy-source": "source-bucket/source-key"} + method = METHOD_PUT + # UploadPartCopy: PUT with uploadId AND x-amz-copy-source + is_upload_part_copy = ( + QUERY_UPLOAD_ID in query + and method == METHOD_PUT + and HEADER_COPY_SOURCE in headers + ) + assert is_upload_part_copy is True + class TestPathParsing: """Test path parsing for bucket and key extraction.""" @@ -235,11 +271,14 @@ def test_acl_query_forwarded(self): assert QUERY_UPLOADS not in query assert QUERY_UPLOAD_ID not in query - def test_tagging_query_forwarded(self): - """Test ?tagging is forwarded to backend.""" + def test_tagging_query_handled(self): + """Test ?tagging is handled by tagging handlers.""" query = "tagging" - # This should not match any special handlers + # Tagging is now handled by our implementation + assert QUERY_TAGGING in query + # Should not match multipart handlers assert QUERY_UPLOADS not in query + assert QUERY_UPLOAD_ID not in query def test_combined_query_params(self): """Test combined query parameters are handled.""" diff --git a/tests/test_xml_responses.py b/tests/test_xml_responses.py index 88d3ccf..05044e1 100644 --- a/tests/test_xml_responses.py +++ b/tests/test_xml_responses.py @@ -366,3 +366,201 @@ def test_key_with_special_chars(self): root = ET.fromstring(xml) ns = "{http://s3.amazonaws.com/doc/2006-03-01/}" assert root.find(f"{ns}Key").text == "path/to/file<>&.txt" + + +class TestListBuckets: + """Test ListAllMyBucketsResult XML.""" + + def test_empty_buckets(self): + """Test listing no buckets.""" + xml = xml_responses.list_buckets( + owner={"ID": "owner-123", "DisplayName": "test-user"}, + buckets=[], + ) + + root = ET.fromstring(xml) + ns = "{http://s3.amazonaws.com/doc/2006-03-01/}" + assert root.find(f"{ns}Owner/{ns}ID").text == "owner-123" + assert root.find(f"{ns}Owner/{ns}DisplayName").text == "test-user" + assert len(root.findall(f"{ns}Buckets/{ns}Bucket")) == 0 + + def test_with_buckets(self): + """Test listing multiple buckets.""" + buckets = [ + {"Name": "bucket-a", "CreationDate": "2024-01-15T10:00:00Z"}, + {"Name": "bucket-b", "CreationDate": "2024-01-16T10:00:00Z"}, + ] + xml = xml_responses.list_buckets( + owner={"ID": "owner-123", "DisplayName": "test-user"}, + buckets=buckets, + ) + + root = ET.fromstring(xml) + ns = "{http://s3.amazonaws.com/doc/2006-03-01/}" + bucket_elements = root.findall(f"{ns}Buckets/{ns}Bucket") + assert len(bucket_elements) == 2 + + names = [b.find(f"{ns}Name").text for b in bucket_elements] + assert "bucket-a" in names + assert "bucket-b" in names + + def test_bucket_with_datetime(self): + """Test bucket with datetime object for CreationDate.""" + from datetime import datetime, UTC + buckets = [{"Name": "test", "CreationDate": datetime(2024, 1, 15, 10, 0, 0, tzinfo=UTC)}] + xml = xml_responses.list_buckets( + owner={"ID": "id", "DisplayName": "name"}, + buckets=buckets, + ) + + # Should parse without error + root = ET.fromstring(xml) + ns = "{http://s3.amazonaws.com/doc/2006-03-01/}" + assert root.find(f"{ns}Buckets/{ns}Bucket/{ns}Name").text == "test" + + +class TestListObjectsV1: + """Test ListBucketResult XML for V1 API.""" + + def test_empty_bucket(self): + """Test empty bucket response.""" + xml = xml_responses.list_objects_v1( + bucket="my-bucket", + prefix="", + marker=None, + delimiter=None, + max_keys=1000, + is_truncated=False, + next_marker=None, + objects=[], + ) + + root = ET.fromstring(xml) + ns = "{http://s3.amazonaws.com/doc/2006-03-01/}" + assert root.find(f"{ns}Name").text == "my-bucket" + assert root.find(f"{ns}IsTruncated").text == "false" + assert len(root.findall(f"{ns}Contents")) == 0 + + def test_with_objects(self): + """Test V1 list with objects.""" + objects = [ + {"key": "file1.txt", "last_modified": "2024-01-15T10:00:00Z", "etag": "abc", "size": 100}, + {"key": "file2.txt", "last_modified": "2024-01-15T11:00:00Z", "etag": "def", "size": 200}, + ] + xml = xml_responses.list_objects_v1( + bucket="my-bucket", + prefix="", + marker=None, + delimiter=None, + max_keys=1000, + is_truncated=False, + next_marker=None, + objects=objects, + ) + + root = ET.fromstring(xml) + ns = "{http://s3.amazonaws.com/doc/2006-03-01/}" + contents = root.findall(f"{ns}Contents") + assert len(contents) == 2 + + def test_with_marker(self): + """Test V1 list with marker.""" + xml = xml_responses.list_objects_v1( + bucket="my-bucket", + prefix="", + marker="start-key", + delimiter=None, + max_keys=100, + is_truncated=True, + next_marker="next-key", + objects=[{"key": "file.txt", "last_modified": "2024-01-15T10:00:00Z", "etag": "abc", "size": 100}], + ) + + root = ET.fromstring(xml) + ns = "{http://s3.amazonaws.com/doc/2006-03-01/}" + assert root.find(f"{ns}Marker").text == "start-key" + assert root.find(f"{ns}NextMarker").text == "next-key" + assert root.find(f"{ns}IsTruncated").text == "true" + + def test_with_delimiter_and_prefixes(self): + """Test V1 list with delimiter and common prefixes.""" + xml = xml_responses.list_objects_v1( + bucket="my-bucket", + prefix="", + marker=None, + delimiter="/", + max_keys=1000, + is_truncated=False, + next_marker=None, + objects=[{"key": "root.txt", "last_modified": "2024-01-15T10:00:00Z", "etag": "abc", "size": 100}], + common_prefixes=["dir1/", "dir2/"], + ) + + root = ET.fromstring(xml) + ns = "{http://s3.amazonaws.com/doc/2006-03-01/}" + assert root.find(f"{ns}Delimiter").text == "/" + prefixes = root.findall(f"{ns}CommonPrefixes/{ns}Prefix") + assert len(prefixes) == 2 + prefix_values = [p.text for p in prefixes] + assert "dir1/" in prefix_values + assert "dir2/" in prefix_values + + +class TestGetTagging: + """Test GetObjectTaggingResult XML.""" + + def test_empty_tags(self): + """Test empty tag set.""" + xml = xml_responses.get_tagging(tags=[]) + + root = ET.fromstring(xml) + ns = "{http://s3.amazonaws.com/doc/2006-03-01/}" + assert len(root.findall(f"{ns}TagSet/{ns}Tag")) == 0 + + def test_with_tags(self): + """Test with multiple tags.""" + tags = [ + {"Key": "Environment", "Value": "Production"}, + {"Key": "Project", "Value": "S3Proxy"}, + ] + xml = xml_responses.get_tagging(tags=tags) + + root = ET.fromstring(xml) + ns = "{http://s3.amazonaws.com/doc/2006-03-01/}" + tag_elements = root.findall(f"{ns}TagSet/{ns}Tag") + assert len(tag_elements) == 2 + + # Check tag values + tag_dict = {} + for tag in tag_elements: + key = tag.find(f"{ns}Key").text + value = tag.find(f"{ns}Value").text + tag_dict[key] = value + + assert tag_dict["Environment"] == "Production" + assert tag_dict["Project"] == "S3Proxy" + + def test_special_characters_escaped(self): + """Test special characters in tags are escaped.""" + tags = [{"Key": "key<>&", "Value": "value<>&"}] + xml = xml_responses.get_tagging(tags=tags) + + # Should parse without error + root = ET.fromstring(xml) + ns = "{http://s3.amazonaws.com/doc/2006-03-01/}" + tag = root.find(f"{ns}TagSet/{ns}Tag") + assert tag.find(f"{ns}Key").text == "key<>&" + assert tag.find(f"{ns}Value").text == "value<>&" + + +class TestUploadPartCopyResult: + """Test CopyPartResult XML.""" + + def test_basic_response(self): + """Test basic copy part result.""" + xml = xml_responses.upload_part_copy_result("abc123", "2024-01-15T10:30:00.000Z") + + root = ET.fromstring(xml) + ns = "{http://s3.amazonaws.com/doc/2006-03-01/}" + assert '"abc123"' in root.find(f"{ns}ETag").text + assert root.find(f"{ns}LastModified").text == "2024-01-15T10:30:00.000Z"