diff --git a/.env.example b/.env.example
index 8d91968..3ed36de 100644
--- a/.env.example
+++ b/.env.example
@@ -32,6 +32,8 @@ API_KEY_CACHE_TTL=300
 RATE_LIMIT_ENABLED=true
 
 # Redis Configuration
+# Deployment mode: standalone (default), cluster, or sentinel
+REDIS_MODE=standalone
 REDIS_HOST=localhost
 REDIS_PORT=6379
 REDIS_PASSWORD=
@@ -42,6 +44,33 @@ REDIS_MAX_CONNECTIONS=20
 REDIS_SOCKET_TIMEOUT=5
 REDIS_SOCKET_CONNECT_TIMEOUT=5
 
+# Optional key prefix — useful when sharing a Redis instance across environments
+# All keys will be stored as <prefix><key>  (e.g. "prod:sessions:abc")
+REDIS_KEY_PREFIX=
+
+# Redis Cluster Mode (REDIS_MODE=cluster)
+# Comma-separated list of host:port pairs for cluster startup nodes
+# REDIS_CLUSTER_NODES=node1:6379,node2:6379,node3:6379
+
+# Redis Sentinel Mode (REDIS_MODE=sentinel)
+# Comma-separated list of host:port pairs for Sentinel instances
+# REDIS_SENTINEL_NODES=sentinel1:26379,sentinel2:26379,sentinel3:26379
+# REDIS_SENTINEL_MASTER=mymaster
+# REDIS_SENTINEL_PASSWORD=
+
+# Redis TLS/SSL Configuration
+# Required for most managed Redis services (GCP Memorystore, AWS ElastiCache, Azure Cache)
+REDIS_TLS_ENABLED=false
+# REDIS_TLS_CA_CERT_FILE=/path/to/ca.crt
+# REDIS_TLS_CERT_FILE=/path/to/client.crt
+# REDIS_TLS_KEY_FILE=/path/to/client.key
+# REDIS_TLS_INSECURE=false
+# Hostname verification is off by default because managed Redis services
+# and Redis Cluster mode expose node IPs that don't match cert CN/SAN.
+# The CA certificate chain is still fully verified. Enable hostname
+# checking when your Redis server hostnames match certificate CN/SAN.
+# REDIS_TLS_CHECK_HOSTNAME=false
+
 # MinIO/S3 Configuration
 MINIO_ENDPOINT=localhost:9000
 MINIO_ACCESS_KEY=minioadmin
@@ -144,6 +173,37 @@ METRICS_ARCHIVE_RETENTION_DAYS=90
 ENABLE_NETWORK_ISOLATION=true
 ENABLE_FILESYSTEM_ISOLATION=true
 
+# Kubernetes Execution Configuration
+# Execution mode: 'agent' (default, recommended) or 'nsenter' (legacy)
+#   agent:   Executor-agent binary runs inside the main container.
+#            No nsenter, no capabilities, no privilege escalation.
+#            Compatible with GKE Sandbox (gVisor) and restricted Pod Security Standards.
+#   nsenter: Sidecar uses nsenter to enter the main container's mount namespace.
+#            Requires shareProcessNamespace, SYS_PTRACE/SYS_ADMIN/SYS_CHROOT caps,
+#            and allowPrivilegeEscalation: true. NOT compatible with GKE Sandbox.
+K8S_EXECUTION_MODE=agent
+# K8S_EXECUTOR_PORT=9090  # Port for the executor-agent HTTP server (agent mode only)
+
+# Sidecar image — must match the execution mode:
+#   agent mode:   aronmuon/kubecoderun-sidecar-agent:latest   (default)
+#   nsenter mode: aronmuon/kubecoderun-sidecar-nsenter:latest
+# K8S_SIDECAR_IMAGE=aronmuon/kubecoderun-sidecar-agent:latest
+
+# Image pull policy for execution pods (Always, IfNotPresent, Never)
+# K8S_IMAGE_PULL_POLICY=Always
+
+# Image pull secrets for private container registries (comma-separated secret names)
+# These Kubernetes secrets must already exist in the execution namespace.
+# Leave empty or unset if not using private registries.
+# K8S_IMAGE_PULL_SECRETS=my-registry-secret,another-secret
+
+# GKE Sandbox (gVisor) Configuration
+# Requires K8S_EXECUTION_MODE=agent (nsenter is incompatible with gVisor)
+# GKE_SANDBOX_ENABLED=false
+# GKE_SANDBOX_RUNTIME_CLASS=gvisor
+# GKE_SANDBOX_NODE_SELECTOR={}
+# GKE_SANDBOX_CUSTOM_TOLERATIONS=[]
+
 # WAN Network Access Configuration
 # When enabled, execution containers can access the public internet
 # but are blocked from accessing host, other containers, and private networks
diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
index 238e30a..0560fb2 100644
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -145,7 +145,7 @@ jobs:
     uses: ./.github/workflows/docker-build-reusable.yml
     secrets: inherit
     with:
-      image_name: kubecoderun-sidecar
+      image_name: kubecoderun-sidecar-agent
       dockerfile: docker/sidecar/Dockerfile
       context: docker/sidecar
       image_tag: ${{ needs.changes.outputs.image_tag }}
@@ -344,7 +344,7 @@ jobs:
     uses: ./.github/workflows/docker-retag-reusable.yml
     secrets: inherit
     with:
-      image_name: kubecoderun-sidecar
+      image_name: kubecoderun-sidecar-agent
       new_tag: ${{ needs.changes.outputs.image_tag }}
       previous_tag: ${{ needs.changes.outputs.previous_tag }}
 
diff --git a/.gitignore b/.gitignore
index 557bbf9..86d1a16 100644
--- a/.gitignore
+++ b/.gitignore
@@ -200,3 +200,5 @@ config/local.py
 
 # Hatch auto-generated version file
 _version.py
+
+.pdm-python
diff --git a/docker-compose.redis-cluster-tls.yml b/docker-compose.redis-cluster-tls.yml
new file mode 100644
index 0000000..ceff514
--- /dev/null
+++ b/docker-compose.redis-cluster-tls.yml
@@ -0,0 +1,231 @@
+# Redis Cluster with TLS for integration testing
+#
+# This mimics a production GCP Memorystore Redis Cluster setup:
+# - 6-node cluster (3 masters + 3 replicas) with TLS enabled
+# - No authentication (no password)
+# - Server-side TLS with CA verification (no mutual TLS / no client certs)
+# - Accessible on localhost ports 6380-6385 (TLS)
+#
+# Usage:
+#   docker compose -f docker-compose.redis-cluster-tls.yml up -d
+#
+# Test with:
+#   redis-cli -c -p 6380 --tls --cacert tests/tls-certs/ca.crt CLUSTER INFO
+
+services:
+  redis-tls-node-0:
+    image: redis:7-alpine
+    container_name: redis-tls-cluster-0
+    ports:
+      - "127.0.0.1:6380:6380"
+      - "127.0.0.1:16380:16380"
+    volumes:
+      - redis-tls-cluster-0:/data
+      - ./tests/tls-certs:/tls:ro
+    command: >
+      redis-server
+      --port 0
+      --tls-port 6380
+      --tls-cert-file /tls/redis.crt
+      --tls-key-file /tls/redis.key
+      --tls-ca-cert-file /tls/ca.crt
+      --tls-auth-clients no
+      --tls-replication yes
+      --cluster-enabled yes
+      --cluster-config-file nodes.conf
+      --cluster-node-timeout 5000
+      --appendonly yes
+      --bind 0.0.0.0
+      --protected-mode no
+    healthcheck:
+      test: ["CMD", "redis-cli", "-p", "6380", "--tls", "--cert", "/tls/redis.crt", "--key", "/tls/redis.key", "--cacert", "/tls/ca.crt", "ping"]
+      interval: 5s
+      timeout: 3s
+      retries: 10
+
+  redis-tls-node-1:
+    image: redis:7-alpine
+    container_name: redis-tls-cluster-1
+    ports:
+      - "127.0.0.1:6381:6381"
+      - "127.0.0.1:16381:16381"
+    volumes:
+      - redis-tls-cluster-1:/data
+      - ./tests/tls-certs:/tls:ro
+    command: >
+      redis-server
+      --port 0
+      --tls-port 6381
+      --tls-cert-file /tls/redis.crt
+      --tls-key-file /tls/redis.key
+      --tls-ca-cert-file /tls/ca.crt
+      --tls-auth-clients no
+      --tls-replication yes
+      --cluster-enabled yes
+      --cluster-config-file nodes.conf
+      --cluster-node-timeout 5000
+      --appendonly yes
+      --bind 0.0.0.0
+      --protected-mode no
+    healthcheck:
+      test: ["CMD", "redis-cli", "-p", "6381", "--tls", "--cert", "/tls/redis.crt", "--key", "/tls/redis.key", "--cacert", "/tls/ca.crt", "ping"]
+      interval: 5s
+      timeout: 3s
+      retries: 10
+
+  redis-tls-node-2:
+    image: redis:7-alpine
+    container_name: redis-tls-cluster-2
+    ports:
+      - "127.0.0.1:6382:6382"
+      - "127.0.0.1:16382:16382"
+    volumes:
+      - redis-tls-cluster-2:/data
+      - ./tests/tls-certs:/tls:ro
+    command: >
+      redis-server
+      --port 0
+      --tls-port 6382
+      --tls-cert-file /tls/redis.crt
+      --tls-key-file /tls/redis.key
+      --tls-ca-cert-file /tls/ca.crt
+      --tls-auth-clients no
+      --tls-replication yes
+      --cluster-enabled yes
+      --cluster-config-file nodes.conf
+      --cluster-node-timeout 5000
+      --appendonly yes
+      --bind 0.0.0.0
+      --protected-mode no
+    healthcheck:
+      test: ["CMD", "redis-cli", "-p", "6382", "--tls", "--cert", "/tls/redis.crt", "--key", "/tls/redis.key", "--cacert", "/tls/ca.crt", "ping"]
+      interval: 5s
+      timeout: 3s
+      retries: 10
+
+  redis-tls-node-3:
+    image: redis:7-alpine
+    container_name: redis-tls-cluster-3
+    ports:
+      - "127.0.0.1:6383:6383"
+      - "127.0.0.1:16383:16383"
+    volumes:
+      - redis-tls-cluster-3:/data
+      - ./tests/tls-certs:/tls:ro
+    command: >
+      redis-server
+      --port 0
+      --tls-port 6383
+      --tls-cert-file /tls/redis.crt
+      --tls-key-file /tls/redis.key
+      --tls-ca-cert-file /tls/ca.crt
+      --tls-auth-clients no
+      --tls-replication yes
+      --cluster-enabled yes
+      --cluster-config-file nodes.conf
+      --cluster-node-timeout 5000
+      --appendonly yes
+      --bind 0.0.0.0
+      --protected-mode no
+    healthcheck:
+      test: ["CMD", "redis-cli", "-p", "6383", "--tls", "--cert", "/tls/redis.crt", "--key", "/tls/redis.key", "--cacert", "/tls/ca.crt", "ping"]
+      interval: 5s
+      timeout: 3s
+      retries: 10
+
+  redis-tls-node-4:
+    image: redis:7-alpine
+    container_name: redis-tls-cluster-4
+    ports:
+      - "127.0.0.1:6384:6384"
+      - "127.0.0.1:16384:16384"
+    volumes:
+      - redis-tls-cluster-4:/data
+      - ./tests/tls-certs:/tls:ro
+    command: >
+      redis-server
+      --port 0
+      --tls-port 6384
+      --tls-cert-file /tls/redis.crt
+      --tls-key-file /tls/redis.key
+      --tls-ca-cert-file /tls/ca.crt
+      --tls-auth-clients no
+      --tls-replication yes
+      --cluster-enabled yes
+      --cluster-config-file nodes.conf
+      --cluster-node-timeout 5000
+      --appendonly yes
+      --bind 0.0.0.0
+      --protected-mode no
+    healthcheck:
+      test: ["CMD", "redis-cli", "-p", "6384", "--tls", "--cert", "/tls/redis.crt", "--key", "/tls/redis.key", "--cacert", "/tls/ca.crt", "ping"]
+      interval: 5s
+      timeout: 3s
+      retries: 10
+
+  redis-tls-node-5:
+    image: redis:7-alpine
+    container_name: redis-tls-cluster-5
+    ports:
+      - "127.0.0.1:6385:6385"
+      - "127.0.0.1:16385:16385"
+    volumes:
+      - redis-tls-cluster-5:/data
+      - ./tests/tls-certs:/tls:ro
+    command: >
+      redis-server
+      --port 0
+      --tls-port 6385
+      --tls-cert-file /tls/redis.crt
+      --tls-key-file /tls/redis.key
+      --tls-ca-cert-file /tls/ca.crt
+      --tls-auth-clients no
+      --tls-replication yes
+      --cluster-enabled yes
+      --cluster-config-file nodes.conf
+      --cluster-node-timeout 5000
+      --appendonly yes
+      --bind 0.0.0.0
+      --protected-mode no
+    healthcheck:
+      test: ["CMD", "redis-cli", "-p", "6385", "--tls", "--cert", "/tls/redis.crt", "--key", "/tls/redis.key", "--cacert", "/tls/ca.crt", "ping"]
+      interval: 5s
+      timeout: 3s
+      retries: 10
+
+  # Initializer: creates TLS cluster from the 6 nodes
+  redis-tls-cluster-init:
+    image: redis:7-alpine
+    container_name: redis-tls-cluster-init
+    volumes:
+      - ./tests/tls-certs:/tls:ro
+    depends_on:
+      redis-tls-node-0:
+        condition: service_healthy
+      redis-tls-node-1:
+        condition: service_healthy
+      redis-tls-node-2:
+        condition: service_healthy
+      redis-tls-node-3:
+        condition: service_healthy
+      redis-tls-node-4:
+        condition: service_healthy
+      redis-tls-node-5:
+        condition: service_healthy
+    restart: "no"
+    entrypoint:
+      - sh
+      - -c
+      - |
+        echo 'Creating Redis TLS Cluster...' &&
+        redis-cli --cluster create redis-tls-node-0:6380 redis-tls-node-1:6381 redis-tls-node-2:6382 redis-tls-node-3:6383 redis-tls-node-4:6384 redis-tls-node-5:6385 --cluster-replicas 1 --cluster-yes --tls --cert /tls/redis.crt --key /tls/redis.key --cacert /tls/ca.crt &&
+        echo 'Redis TLS Cluster created successfully' &&
+        redis-cli -h redis-tls-node-0 -p 6380 --tls --cert /tls/redis.crt --key /tls/redis.key --cacert /tls/ca.crt CLUSTER INFO
+
+volumes:
+  redis-tls-cluster-0:
+  redis-tls-cluster-1:
+  redis-tls-cluster-2:
+  redis-tls-cluster-3:
+  redis-tls-cluster-4:
+  redis-tls-cluster-5:
diff --git a/docker-compose.redis-cluster.yml b/docker-compose.redis-cluster.yml
new file mode 100644
index 0000000..7f62c52
--- /dev/null
+++ b/docker-compose.redis-cluster.yml
@@ -0,0 +1,182 @@
+# Redis Cluster for integration testing
+#
+# Usage:
+#   docker compose -f docker-compose.redis-cluster.yml up -d
+#
+# This creates a 6-node Redis Cluster (3 masters + 3 replicas)
+# accessible on localhost ports 7000-7005.
+#
+# Test with: redis-cli -c -p 7000 CLUSTER INFO
+
+services:
+  redis-node-0:
+    image: redis:7-alpine
+    container_name: redis-cluster-0
+    ports:
+      - "127.0.0.1:7000:7000"
+      - "127.0.0.1:17000:17000"
+    volumes:
+      - redis-cluster-0:/data
+    command: >
+      redis-server
+      --port 7000
+      --cluster-enabled yes
+      --cluster-config-file nodes.conf
+      --cluster-node-timeout 5000
+      --appendonly yes
+      --bind 0.0.0.0
+      --protected-mode no
+    healthcheck:
+      test: ["CMD", "redis-cli", "-p", "7000", "ping"]
+      interval: 5s
+      timeout: 3s
+      retries: 5
+
+  redis-node-1:
+    image: redis:7-alpine
+    container_name: redis-cluster-1
+    ports:
+      - "127.0.0.1:7001:7001"
+      - "127.0.0.1:17001:17001"
+    volumes:
+      - redis-cluster-1:/data
+    command: >
+      redis-server
+      --port 7001
+      --cluster-enabled yes
+      --cluster-config-file nodes.conf
+      --cluster-node-timeout 5000
+      --appendonly yes
+      --bind 0.0.0.0
+      --protected-mode no
+    healthcheck:
+      test: ["CMD", "redis-cli", "-p", "7001", "ping"]
+      interval: 5s
+      timeout: 3s
+      retries: 5
+
+  redis-node-2:
+    image: redis:7-alpine
+    container_name: redis-cluster-2
+    ports:
+      - "127.0.0.1:7002:7002"
+      - "127.0.0.1:17002:17002"
+    volumes:
+      - redis-cluster-2:/data
+    command: >
+      redis-server
+      --port 7002
+      --cluster-enabled yes
+      --cluster-config-file nodes.conf
+      --cluster-node-timeout 5000
+      --appendonly yes
+      --bind 0.0.0.0
+      --protected-mode no
+    healthcheck:
+      test: ["CMD", "redis-cli", "-p", "7002", "ping"]
+      interval: 5s
+      timeout: 3s
+      retries: 5
+
+  redis-node-3:
+    image: redis:7-alpine
+    container_name: redis-cluster-3
+    ports:
+      - "127.0.0.1:7003:7003"
+      - "127.0.0.1:17003:17003"
+    volumes:
+      - redis-cluster-3:/data
+    command: >
+      redis-server
+      --port 7003
+      --cluster-enabled yes
+      --cluster-config-file nodes.conf
+      --cluster-node-timeout 5000
+      --appendonly yes
+      --bind 0.0.0.0
+      --protected-mode no
+    healthcheck:
+      test: ["CMD", "redis-cli", "-p", "7003", "ping"]
+      interval: 5s
+      timeout: 3s
+      retries: 5
+
+  redis-node-4:
+    image: redis:7-alpine
+    container_name: redis-cluster-4
+    ports:
+      - "127.0.0.1:7004:7004"
+      - "127.0.0.1:17004:17004"
+    volumes:
+      - redis-cluster-4:/data
+    command: >
+      redis-server
+      --port 7004
+      --cluster-enabled yes
+      --cluster-config-file nodes.conf
+      --cluster-node-timeout 5000
+      --appendonly yes
+      --bind 0.0.0.0
+      --protected-mode no
+    healthcheck:
+      test: ["CMD", "redis-cli", "-p", "7004", "ping"]
+      interval: 5s
+      timeout: 3s
+      retries: 5
+
+  redis-node-5:
+    image: redis:7-alpine
+    container_name: redis-cluster-5
+    ports:
+      - "127.0.0.1:7005:7005"
+      - "127.0.0.1:17005:17005"
+    volumes:
+      - redis-cluster-5:/data
+    command: >
+      redis-server
+      --port 7005
+      --cluster-enabled yes
+      --cluster-config-file nodes.conf
+      --cluster-node-timeout 5000
+      --appendonly yes
+      --bind 0.0.0.0
+      --protected-mode no
+    healthcheck:
+      test: ["CMD", "redis-cli", "-p", "7005", "ping"]
+      interval: 5s
+      timeout: 3s
+      retries: 5
+
+  # Initializer: creates cluster from the 6 nodes
+  redis-cluster-init:
+    image: redis:7-alpine
+    container_name: redis-cluster-init
+    depends_on:
+      redis-node-0:
+        condition: service_healthy
+      redis-node-1:
+        condition: service_healthy
+      redis-node-2:
+        condition: service_healthy
+      redis-node-3:
+        condition: service_healthy
+      redis-node-4:
+        condition: service_healthy
+      redis-node-5:
+        condition: service_healthy
+    restart: "no"
+    entrypoint: >
+      sh -c "
+        echo 'Creating Redis Cluster...' &&
+        redis-cli --cluster create redis-node-0:7000 redis-node-1:7001 redis-node-2:7002 redis-node-3:7003 redis-node-4:7004 redis-node-5:7005 --cluster-replicas 1 --cluster-yes &&
+        echo 'Redis Cluster created successfully' &&
+        redis-cli -h redis-node-0 -p 7000 CLUSTER INFO
+      "
+
+volumes:
+  redis-cluster-0:
+  redis-cluster-1:
+  redis-cluster-2:
+  redis-cluster-3:
+  redis-cluster-4:
+  redis-cluster-5:
diff --git a/docker/c-cpp.Dockerfile b/docker/c-cpp.Dockerfile
index 38ead4e..5dd26f1 100644
--- a/docker/c-cpp.Dockerfile
+++ b/docker/c-cpp.Dockerfile
@@ -1,7 +1,8 @@
 # syntax=docker/dockerfile:1
 # C/C++ execution environment with Docker Hardened Images.
+# Uses -dev variant because compilers and dev libraries must be available at runtime.
 
-FROM dhi.io/debian-base:trixie
+FROM dhi.io/debian-base:trixie-debian13-dev
 
 ARG BUILD_DATE
 ARG VERSION
diff --git a/docker/d.Dockerfile b/docker/d.Dockerfile
index da2d92f..b64bf77 100644
--- a/docker/d.Dockerfile
+++ b/docker/d.Dockerfile
@@ -1,7 +1,8 @@
 # syntax=docker/dockerfile:1
 # D execution environment with Docker Hardened Images.
+# Uses -dev variant because compilers must be available at runtime.
 
-FROM dhi.io/debian-base:trixie
+FROM dhi.io/debian-base:trixie-debian13-dev
 
 ARG BUILD_DATE
 ARG VERSION
diff --git a/docker/fortran.Dockerfile b/docker/fortran.Dockerfile
index 928e1e1..8d20f7a 100644
--- a/docker/fortran.Dockerfile
+++ b/docker/fortran.Dockerfile
@@ -1,7 +1,8 @@
 # syntax=docker/dockerfile:1
 # Fortran execution environment with Docker Hardened Images.
+# Uses -dev variant because compilers and dev libraries must be available at runtime.
 
-FROM dhi.io/debian-base:trixie
+FROM dhi.io/debian-base:trixie-debian13-dev
 
 ARG BUILD_DATE
 ARG VERSION
diff --git a/docker/go.Dockerfile b/docker/go.Dockerfile
index bf37a74..4f86354 100644
--- a/docker/go.Dockerfile
+++ b/docker/go.Dockerfile
@@ -3,7 +3,7 @@
 
 ################################
 # Stage 1: Build and download dependencies
-FROM dhi.io/golang:1.25-debian13-dev AS builder
+FROM dhi.io/golang:1.26-debian13-dev AS builder
 
 SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 
@@ -26,7 +26,7 @@ RUN cd /tmp/gosetup && \
 
 ################################
 # Stage 2: Prepare runtime directories
-FROM dhi.io/golang:1.25-debian13-dev AS runtime-deps
+FROM dhi.io/golang:1.26-debian13-dev AS runtime-deps
 
 SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 
@@ -35,7 +35,7 @@ RUN mkdir -p /mnt/data /mnt/data/go-build && chown -R 65532:65532 /mnt/data
 
 ################################
 # Stage 3: Minimal runtime image
-FROM dhi.io/golang:1.25-debian13 AS final
+FROM dhi.io/golang:1.26-debian13 AS final
 
 ARG BUILD_DATE
 ARG VERSION
diff --git a/docker/php.Dockerfile b/docker/php.Dockerfile
index fe28565..1549d80 100644
--- a/docker/php.Dockerfile
+++ b/docker/php.Dockerfile
@@ -2,8 +2,8 @@
 # PHP execution environment with Docker Hardened Images.
 
 # PHP version configuration - single source of truth
-ARG PHP_VERSION=8.4.17
-ARG PHP_MAJOR=8.4
+ARG PHP_VERSION=8.5.3
+ARG PHP_MAJOR=8.5
 ARG DEBIAN_VERSION=debian13
 
 ARG BUILD_DATE
diff --git a/docker/python.Dockerfile b/docker/python.Dockerfile
index 197e6d7..e3b9917 100644
--- a/docker/python.Dockerfile
+++ b/docker/python.Dockerfile
@@ -83,33 +83,34 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 RUN mkdir -p /usr/lib/x86_64-linux-gnu /usr/lib/aarch64-linux-gnu && \
     apt-get update && \
     DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-    # Runtime libraries (counterparts to -dev packages in builder)
-    libxml2 \
-    libxslt1.1 \
-    libffi8 \
-    libssl3t64 \
+    # Core System Utilities
+    libgomp1 \
+    liblz4-1 \
+    # Image Processing (Pillow, OpenCV)
+    ffmpeg \
     libjpeg62-turbo \
     libpng16-16t64 \
     libtiff6 \
+    libwebp7 \
     libopenjp2-7 \
-    libfreetype6 \
     liblcms2-2 \
-    libwebp7 \
-    libportaudio2 \
-    libpulse0 \
-    # External tools needed at runtime
+    # XML/HTML Processing (lxml, beautifulsoup4)
+    libxml2 \
+    libxslt1.1 \
+    # Cryptography (cryptography, PyOpenSSL)
+    libffi8 \
+    libssl3t64 \
+    # Font Support (Matplotlib, WordCloud)
+    libfreetype6 \
+    fontconfig \
+    # External Tools (Runtime executables)
     poppler-utils \
-    tesseract-ocr \
-    pandoc \
-    ffmpeg \
-    flac \
-    antiword \
-    unrtf \
     && apt-get autoremove -y \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/* \
     && mkdir -p /mnt/data && chown 65532:65532 /mnt/data
 
+
 ################################
 # Final stage - minimal runtime image
 ################################
@@ -129,13 +130,7 @@ LABEL org.opencontainers.image.title="KubeCodeRun Python Environment" \
 COPY --from=runtime-deps /usr/lib/x86_64-linux-gnu /usr/lib/x86_64-linux-gnu
 COPY --from=runtime-deps /usr/lib/aarch64-linux-gnu /usr/lib/aarch64-linux-gnu
 COPY --from=runtime-deps /usr/bin/pdftotext /usr/bin/pdftoppm /usr/bin/pdfinfo /usr/bin/
-COPY --from=runtime-deps /usr/bin/tesseract /usr/bin/
-COPY --from=runtime-deps /usr/bin/pandoc /usr/bin/
 COPY --from=runtime-deps /usr/bin/ffmpeg /usr/bin/ffprobe /usr/bin/
-COPY --from=runtime-deps /usr/bin/flac /usr/bin/
-COPY --from=runtime-deps /usr/bin/antiword /usr/bin/
-COPY --from=runtime-deps /usr/bin/unrtf /usr/bin/
-COPY --from=runtime-deps /usr/share/tesseract-ocr /usr/share/tesseract-ocr
 
 # Copy installed Python packages from builder
 # DHI Python is installed in /opt/python, not /usr/local
diff --git a/docker/r.Dockerfile b/docker/r.Dockerfile
index e8343e4..626a44b 100644
--- a/docker/r.Dockerfile
+++ b/docker/r.Dockerfile
@@ -9,7 +9,7 @@ ARG VCS_REF
 ################################
 # Builder stage - install R and compile packages
 ################################
-FROM dhi.io/debian-base:trixie AS builder
+FROM dhi.io/debian-base:trixie-debian13-dev AS builder
 
 SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 
@@ -40,7 +40,7 @@ RUN R -e "options(repos = c(CRAN = 'https://packagemanager.posit.co/cran/__linux
 ################################
 # Final stage - runtime image
 ################################
-FROM dhi.io/debian-base:trixie AS final
+FROM dhi.io/debian-base:trixie-debian13-dev AS final
 
 ARG BUILD_DATE
 ARG VERSION
diff --git a/docker/requirements/go.mod b/docker/requirements/go.mod
index bd7d5d6..8b9b86a 100644
--- a/docker/requirements/go.mod
+++ b/docker/requirements/go.mod
@@ -1,18 +1,22 @@
 module preload
 
-go 1.25
+go 1.26
 
 require (
-	github.com/gorilla/mux v1.8.1
+	github.com/davidbyttow/govips/v2 v2.16.0
+	github.com/fatih/color v1.18.0
 	github.com/gin-gonic/gin v1.11.0
-	github.com/sirupsen/logrus v1.9.4
+	github.com/gocarina/gocsv v0.0.0-20240520201108-78e41c74b4b1
 	github.com/google/uuid v1.6.0
-	github.com/shopspring/decimal v1.4.0
-	gonum.org/v1/gonum v0.17.0
+	github.com/gorilla/mux v1.8.1
 	github.com/montanaflynn/stats v0.7.1
-	github.com/xuri/excelize/v2 v2.10.0
+	github.com/shopspring/decimal v1.4.0
+	github.com/sirupsen/logrus v1.9.4
 	github.com/spf13/cobra v1.10.2
-	github.com/fatih/color v1.18.0
-	github.com/gocarina/gocsv v0.0.0-20240520201108-78e41c74b4b1
 	github.com/tidwall/gjson v1.18.0
+	github.com/unidoc/unioffice/v2 v2.8.0
+	github.com/xuri/excelize/v2 v2.10.0
+	github.com/yuin/goldmark v1.7.16
+	gonum.org/v1/gonum v0.17.0
+	gopkg.in/yaml.v3 v3.0.1
 )
diff --git a/docker/requirements/nodejs.txt b/docker/requirements/nodejs.txt
index 665e959..e8b5683 100644
--- a/docker/requirements/nodejs.txt
+++ b/docker/requirements/nodejs.txt
@@ -1,34 +1,58 @@
-# Node.js global packages
-# One package per line for npm install -g
+# Node.js packages
+# One package per line: npm install -g <package>
 
-lodash
+# Core utilities
+archiver
 axios
-moment
-uuid
 chalk
 commander
-express
-fs-extra
-csv-parser
-json2csv
-papaparse
-archiver
-yauzl
-pdf-lib
+crypto-js
 date-fns
+fs-extra
+lodash
+luxon
+uuid
 validator
-crypto-js
+zod
+
+# Web scraping & parsing
+cheerio
+
+# Data processing
+papaparse
 mathjs
 ml-matrix
 simple-statistics
-sharp
+
+# File formats - Office documents
+exceljs
+xlsx
+pptxgenjs
+mammoth
+
+# File formats - Other
+js-yaml
+marked
+pdf-lib
+
+# Images
 jimp
+sharp
+
+# Compression
+yauzl
+
+# TypeScript (recommended local + global)
 typescript
 ts-node
-xlsx
-exceljs
-luxon
-zod
+@types/node
+
+# Web frameworks
+express
 handlebars
-cheerio
-marked
+
+# UI libraries
+jquery
+jquery-ui-dist
+three
+p5
diff --git a/docker/requirements/python-analysis.txt b/docker/requirements/python-analysis.txt
index ec53ec8..7b2c6ae 100644
--- a/docker/requirements/python-analysis.txt
+++ b/docker/requirements/python-analysis.txt
@@ -1,19 +1,18 @@
-# Math, science, and analysis packages
-
-scipy>=1.11
-scikit-learn>=1.3
-statsmodels>=0.14
-sympy>=1.12
-mpmath>=1.3
-numba>=0.58
-llvmlite>=0.41
-numexpr>=2.8
-networkx>=3.2
-lifelines>=0.27
-autograd>=1.6
-autograd-gamma>=0.5
-formulaic>=1.0
-patsy>=0.5
-kiwisolver>=1.4
-joblib>=1.3
-threadpoolctl>=3.2
+# Math, science, and analysis
+autograd-gamma>=0.5.0
+autograd>=1.7.0
+formulaic>=1.0.2
+joblib>=1.4.2
+kiwisolver>=1.4.7
+lifelines>=0.30.0
+llvmlite>=0.44.0
+mpmath>=1.3.0
+networkx>=3.4.2
+numba>=0.61.0
+numexpr>=2.10.2
+patsy>=1.0.1
+scikit-learn>=1.7.2
+scipy>=1.16.1
+statsmodels>=0.14.4
+sympy>=1.13.3
+threadpoolctl>=3.5.0
diff --git a/docker/requirements/python-core.txt b/docker/requirements/python-core.txt
index 3fbfb33..a6e78cd 100644
--- a/docker/requirements/python-core.txt
+++ b/docker/requirements/python-core.txt
@@ -1,19 +1,17 @@
-# Core data processing packages
-# These are the most stable and form the foundation
-
-cloudpickle>=3.0
-lz4>=4.3.0
-numpy>=1.24
-pandas>=2.0
-openpyxl>=3.1
-xlrd>=2.0
-XlsxWriter>=3.1
-pyarrow>=14.0
-tabulate>=0.9
-six>=1.16
-packaging>=23.0
-python-dateutil>=2.8
-pytz>=2024.1
-tzdata>=2024.1
-pendulum>=3.0
-pydantic>=2.5
+# Core data processing
+cloudpickle>=3.1.0
+lz4>=4.3.3
+numpy>=2.2.1
+pandas>=3.0.0
+openpyxl>=3.1.5
+xlrd>=2.0.1
+XlsxWriter>=3.2.0
+pyarrow>=18.1.0
+tabulate>=0.9.0
+six>=1.17.0
+packaging>=24.2
+python-dateutil>=2.9.0
+pytz>=2025.1
+tzdata>=2025.2
+pendulum>=3.0.0
+pydantic>=2.10.6
diff --git a/docker/requirements/python-documents.txt b/docker/requirements/python-documents.txt
index 8317219..6d861d8 100644
--- a/docker/requirements/python-documents.txt
+++ b/docker/requirements/python-documents.txt
@@ -1,41 +1,34 @@
-# Document processing packages (PDF, Office, etc.)
+# Document processing - PDF
+pdf2image>=1.17.0
+pdfminer.six>=20231228
+pypdf>=5.1.0
+PyPDF2>=3.0.1
+reportlab>=4.2.5
 
-# PDF
-PyPDF2>=3.0
-pdfminer.six>=20221105
-pdfminer>=20191125
-pdf2image>=1.16
-reportlab>=4.0
-
-# Office documents
-python-docx>=1.1
-python-pptx>=0.6
-mammoth>=1.6
+# Document processing - Office (Excel, PowerPoint, Word)
+docx2python>=3.1.1
 docx2txt>=0.8
-docx2python>=2.0
-docxcompose>=1.4
-docxtpl>=0.16
-doc2pdf>=0.2
-
-# XML/HTML parsing
-beautifulsoup4>=4.12
-lxml>=4.9
-soupsieve>=2.5
-defusedxml>=0.7
-cssselect2>=0.7
-webencodings>=0.5
-tinycss2>=1.2
+docxcompose>=1.4.0
+docxtpl>=0.18.0
+mammoth>=1.8.0
+openpyxl>=3.1.5
+python-docx>=1.1.2
+python-pptx>=1.0.2
 
-# Text extraction
-textract-py3>=1.5
-antiword>=0.1
-pytesseract>=0.3
-pypandoc>=1.12
+# Document processing - XML/HTML
+beautifulsoup4>=4.12.3
+cssselect2>=0.7.0
+defusedxml>=0.7.1
+lxml>=5.3.0
+soupsieve>=2.6
+tinycss2>=1.4.0
+webencodings>=0.5.1
 
-# Other formats
-vsdx>=0.5
-compressed-rtf>=1.0
-extract-msg>=0.47
-olefile>=0.47
-ebcdic>=1.1
-ExifRead>=3.0
+# Text and format processing (YAML, Markdown, JSON, CSV)
+chardet>=5.2.0
+markdown-it-py>=3.0.0
+markdown>=3.7
+pyparsing>=3.2.1
+python-frontmatter>=1.1.0
+pyyaml>=6.0.2
+toml>=0.10.2
diff --git a/docker/requirements/python-utilities.txt b/docker/requirements/python-utilities.txt
index b7a8521..171c571 100644
--- a/docker/requirements/python-utilities.txt
+++ b/docker/requirements/python-utilities.txt
@@ -1,65 +1,43 @@
-# Utility packages (cryptography, encoding, misc)
-
-# Cryptography
-cryptography>=41.0
-bcrypt>=4.1
-PyNaCl>=1.5
-pycryptodome>=3.19
-passlib>=1.7
-
-# Encoding/parsing
-chardet>=5.2
-cffi>=1.16
-pycparser>=2.21
-pyparsing>=3.1
-base58>=2.1
-cobble>=0.1
-xxhash>=3.4
-
-# Templates and markup
-Jinja2>=3.1
-MarkupSafe>=2.1
-
-# Barcodes/QR
-qrcode>=7.4
-python-barcode>=0.15
-
-# Other utilities
-argcomplete>=3.2
-babel>=2.14
-deprecation>=2.1
-hachoir>=3.3
-interface-meta>=1.3
-paragraphs>=0.1
-sortedcontainers>=2.4
-tenacity>=8.2
-typing-extensions>=4.9
+# Cryptography and security
+bcrypt>=4.2.1
+cryptography>=44.0.0
+passlib>=1.7.4
+pycryptodome>=3.21.0
+PyNaCl>=1.5.0
+
+# Utilities
+argcomplete>=3.5.3
+babel>=2.16.0
+base58>=2.1.1
+cffi>=1.17.1
+deprecation>=2.1.0
+hachoir>=3.3.0
+Jinja2>=3.1.5
+MarkupSafe>=3.0.2
+pycparser>=2.22
+python-barcode>=0.15.1
+qrcode>=8.0
+regex>=2024.11.6
+sortedcontainers>=2.4.0
+tenacity>=9.0.0
+typing-extensions>=4.12.2
 tzlocal>=5.2
-Whoosh>=2.7
-wordcloud>=1.9
-wrapt>=1.16
-regex>=2023.12
-
-# HTTP clients
-requests>=2.31
-httpx>=0.25
-
-# CLI and output formatting
-rich>=13.7
-click>=8.1
-typer>=0.9
-
-# Data generation and formatting
-faker>=22.0
-humanize>=4.9
+wordcloud>=1.9.4
+wrapt>=1.17.2
+xxhash>=3.5.0
+
+# HTTP and networking
+certifi>=2025.1.31
+httpx>=0.28.1
+requests>=2.32.3
+urllib3>=2.3.0
+
+# CLI and formatting
+click>=8.1.8
+faker>=33.3.0
+humanize>=4.11.0
+rich>=13.9.4
+typer>=0.15.1
 
 # Configuration
-python-dotenv>=1.0
-toml>=0.10
-pyyaml>=6.0
-
-# Audio
-SpeechRecognition>=3.10
-
-# Email
-IMAPClient>=3.0
+python-dotenv>=1.0.1
diff --git a/docker/requirements/python-visualization.txt b/docker/requirements/python-visualization.txt
index 2ba55a0..dba6e97 100644
--- a/docker/requirements/python-visualization.txt
+++ b/docker/requirements/python-visualization.txt
@@ -1,14 +1,13 @@
-# Visualization and graphics packages
-
-matplotlib>=3.8
-seaborn>=0.13
-plotly>=5.18
-pillow>=10.0
-imageio>=2.33
-scikit-image>=0.22
-opencv-python-headless>=4.8
-contourpy>=1.2
-cycler>=0.12
-fonttools>=4.47
-lazy-loader>=0.3
-tifffile>=2024.1
+# Visualization
+contourpy>=1.3.1
+cycler>=0.12.1
+fonttools>=4.55.3
+imageio>=2.36.1
+lazy-loader>=0.4
+matplotlib>=3.10.0
+opencv-python-headless>=4.10.0.84
+pillow>=11.1.0
+plotly>=5.24.1
+scikit-image>=0.25.0
+seaborn>=0.13.2
+tifffile>=2025.2.12
diff --git a/docker/requirements/rust-Cargo.toml b/docker/requirements/rust-Cargo.toml
index 36071e6..55c343c 100644
--- a/docker/requirements/rust-Cargo.toml
+++ b/docker/requirements/rust-Cargo.toml
@@ -5,29 +5,38 @@ edition = "2021"
 
 [dependencies]
 # Serialization
-serde = { version = "1", features = ["derive"] }
-serde_json = "1"
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0.149"
+serde_yaml = "0.9.34"
 
 # Data formats
-csv = "1"
-zip = "7"
-flate2 = "1"
-calamine = "0.32"
+csv = "1.4.0"
+zip = "8.1.0"
+flate2 = "1.1.9"
+
+# Spreadsheets (Excel, ODS)
+calamine = { version = "0.33.0", features = ["dates"] }
+
+# Office documents (DOCX, PPTX, XLSX)
+undoc = "0.1.13"
+
+# Markdown parsing
+pulldown-cmark = "0.13.0"
 
 # Images
-image = "0.25"
+image = "0.25.9"
 
 # Utilities
-uuid = { version = "1", features = ["v4"] }
-chrono = { version = "0.4", features = ["serde"] }
-regex = "1"
-clap = { version = "4", features = ["derive"] }
-anyhow = "1"
-thiserror = "2"
+uuid = { version = "1.21.0", features = ["v4", "serde"] }
+chrono = { version = "0.4.43", features = ["serde"] }
+regex = "1.12.3"
+clap = { version = "4.5.59", features = ["derive"] }
+anyhow = "1.0.101"
+thiserror = "2.0.18"
 
 # Math/Stats
-nalgebra = "0.34"
-statrs = "0.18"
+nalgebra = "0.34.1"
+statrs = "0.18.0"
 
 # Plotting
-plotters = "0.3"
+plotters = "0.3.7"
diff --git a/docker/rust.Dockerfile b/docker/rust.Dockerfile
index 1a3416b..e4197c0 100644
--- a/docker/rust.Dockerfile
+++ b/docker/rust.Dockerfile
@@ -8,7 +8,7 @@
 ################################
 # Builder stage - compile crate dependencies
 ################################
-FROM dhi.io/rust:1.92-debian13-dev AS builder
+FROM dhi.io/rust:1.93-debian13-dev AS builder
 
 SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 
@@ -45,7 +45,7 @@ RUN rm -rf /tmp/rust-cache/src /tmp/rust-cache/Cargo.toml /tmp/rust-cache/Cargo.
 ################################
 # Final stage - runtime only
 ################################
-FROM dhi.io/rust:1.92-debian13-dev AS final
+FROM dhi.io/rust:1.93-debian13-dev AS final
 
 ARG BUILD_DATE
 ARG VERSION
diff --git a/docker/sidecar/Dockerfile b/docker/sidecar/Dockerfile
index 5e79da2..32248a7 100644
--- a/docker/sidecar/Dockerfile
+++ b/docker/sidecar/Dockerfile
@@ -1,32 +1,75 @@
 # syntax=docker/dockerfile:1
-# KubeCodeRun HTTP sidecar with Docker Hardened Images.
+# KubeCodeRun HTTP Sidecar — Multi-target Dockerfile.
+#
+# Produces two distinct container images via Docker build targets:
+#
+#   docker build --target sidecar-agent   → kubecoderun-sidecar-agent (default)
+#   docker build --target sidecar-nsenter → kubecoderun-sidecar-nsenter
+#
+# sidecar-agent (default):
+#   - Contains the executor-agent Go binary (copied to main container via init container)
+#   - No nsenter, no setcap, no capabilities, no privilege escalation
+#   - Compatible with GKE Sandbox (gVisor) and restricted Pod Security Standards
+#
+# sidecar-nsenter (legacy):
+#   - Contains nsenter with file capabilities (setcap) for namespace entry
+#   - Requires shareProcessNamespace, SYS_PTRACE/SYS_ADMIN/SYS_CHROOT capabilities,
+#     and allowPrivilegeEscalation: true in the pod spec
+#   - For clusters that do not support agent mode or need legacy behavior
 
 ARG BUILD_DATE
 ARG VERSION
 ARG VCS_REF
 
 ################################
-# Builder stage - install Python dependencies and runtime tools
+# Executor agent build stage — statically compiled Go binary.
+# This binary runs in the main (language) container via init container copy,
+# providing HTTP-based code execution without nsenter.
 ################################
-FROM dhi.io/python:3.13-debian13-dev AS builder
+FROM golang:1.26-alpine AS agent-builder
+
+WORKDIR /build
+COPY executor-agent/ .
+RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -trimpath -o /opt/executor-agent .
+
+################################
+# Python builder (common) — install Python dependencies and app code.
+# Used by both agent and nsenter targets.
+################################
+FROM dhi.io/python:3.13-debian13-dev AS builder-common
 
 SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 
 ENV PIP_DISABLE_PIP_VERSION_CHECK=1
 
-# Install runtime dependencies and set up nsenter with file capabilities
-# - util-linux: provides nsenter for entering container namespaces
-# - libcap2-bin: provides setcap for setting file capabilities
-# Create both arch lib dirs to ensure COPY works on either architecture
+# Create data directory and arch lib dirs (for COPY compatibility)
 RUN mkdir -p /lib/x86_64-linux-gnu /lib/aarch64-linux-gnu && \
-    apt-get update && \
+    mkdir -p /mnt/data && chown 65532:65532 /mnt/data
+
+WORKDIR /app
+
+# Install Python dependencies
+COPY requirements.txt /tmp/requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install -r /tmp/requirements.txt
+
+# Copy application code
+COPY main.py .
+
+################################
+# nsenter builder — extends common builder with nsenter + file capabilities.
+# Only needed for the nsenter sidecar target.
+################################
+FROM builder-common AS builder-nsenter
+
+# Install nsenter (util-linux) and setcap (libcap2-bin)
+RUN apt-get update && \
     DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
     util-linux \
     libcap2-bin \
     && apt-get autoremove -y \
     && apt-get clean \
-    && rm -rf /var/lib/apt/lists/* \
-    && mkdir -p /mnt/data && chown 65532:65532 /mnt/data
+    && rm -rf /var/lib/apt/lists/*
 
 # Add file capabilities to nsenter binary so non-root users can use it
 # - cap_sys_ptrace: access /proc/<pid>/ns/ of other processes
@@ -34,52 +77,27 @@ RUN mkdir -p /lib/x86_64-linux-gnu /lib/aarch64-linux-gnu && \
 # - cap_sys_chroot: required for mount namespace operations
 RUN setcap 'cap_sys_ptrace,cap_sys_admin,cap_sys_chroot+eip' /usr/bin/nsenter
 
-WORKDIR /app
-
-# Install Python dependencies
-COPY requirements.txt /tmp/requirements.txt
-RUN --mount=type=cache,target=/root/.cache/pip \
-    pip install -r /tmp/requirements.txt
-
-# Copy application code
-COPY main.py .
 
 ################################
-# Final stage - minimal runtime image
+# Common runtime base — shared by both final targets.
+# Contains Python runtime, sidecar app, and common configuration.
 ################################
-FROM dhi.io/python:3.13-debian13 AS final
+FROM dhi.io/python:3.13-debian13 AS runtime-base
 
 ARG BUILD_DATE
 ARG VERSION
 ARG VCS_REF
 
-LABEL org.opencontainers.image.title="KubeCodeRun Sidecar" \
-      org.opencontainers.image.description="HTTP sidecar for executing code in Kubernetes pods via nsenter" \
-      org.opencontainers.image.version="${VERSION}" \
-      org.opencontainers.image.created="${BUILD_DATE}" \
-      org.opencontainers.image.revision="${VCS_REF}"
-
-# Copy nsenter with file capabilities from builder
-COPY --from=builder /usr/bin/nsenter /usr/bin/
-
-# Copy shared libraries needed by nsenter (libselinux, libpcre2) for both architectures
-# Note: Only one arch directory will have content depending on build platform
-COPY --from=builder /lib/x86_64-linux-gnu /lib/x86_64-linux-gnu
-COPY --from=builder /lib/aarch64-linux-gnu /lib/aarch64-linux-gnu
-
 # Copy installed Python packages from builder
 # DHI Python is installed in /opt/python, not /usr/local
-COPY --from=builder /opt/python/lib/python3.13/site-packages /opt/python/lib/python3.13/site-packages
-COPY --from=builder /opt/python/bin /opt/python/bin
-
-# Copy /usr/bin/env for execution patterns, sleep for CMD
-COPY --from=builder /usr/bin/env /usr/bin/sleep /usr/bin/
+COPY --from=builder-common /opt/python/lib/python3.13/site-packages /opt/python/lib/python3.13/site-packages
+COPY --from=builder-common /opt/python/bin /opt/python/bin
 
 # Copy data directory with correct ownership (DHI uses UID 65532)
-COPY --from=builder /mnt/data /mnt/data
+COPY --from=builder-common /mnt/data /mnt/data
 
 # Copy application code
-COPY --from=builder /app /app
+COPY --from=builder-common /app /app
 
 WORKDIR /app
 
@@ -102,15 +120,80 @@ ENV VERSION=${VERSION} \
     MAX_EXECUTION_TIME=120 \
     PYTHONUNBUFFERED=1
 
-# Kubernetes pod spec still requires:
-# - shareProcessNamespace: true (so sidecar can see main container's processes)
-# - securityContext.capabilities.add: ["SYS_PTRACE", "SYS_ADMIN", "SYS_CHROOT"]
-#   (to allow the bounding set to include these caps)
-# - securityContext.allowPrivilegeEscalation: true
-#   (required for file capabilities to be honored)
-
 # DHI images run as non-root (UID 65532) by default
-# File capabilities on nsenter allow this user to use nsenter with required privileges
-
-# Run sidecar
 CMD ["python", "main.py"]
+
+
+################################
+# TARGET: sidecar-agent (default)
+#
+# Agent mode sidecar — the recommended execution mode.
+# Contains the executor-agent Go binary for init-container distribution.
+# No nsenter, no capabilities, no privilege escalation needed.
+#
+# Kubernetes pod spec (agent mode):
+#   - No shareProcessNamespace needed
+#   - No capabilities needed (all dropped)
+#   - allowPrivilegeEscalation: false for all containers
+#   - Init container copies /opt/executor-agent to shared volume
+#   - Main container runs executor-agent instead of sleep infinity
+#   - Compatible with GKE Sandbox (gVisor) and restricted Pod Security Standards
+#
+# Build: docker build --target sidecar-agent -t kubecoderun-sidecar-agent .
+################################
+FROM runtime-base AS sidecar-agent
+
+ARG BUILD_DATE
+ARG VERSION
+ARG VCS_REF
+
+LABEL org.opencontainers.image.title="KubeCodeRun Sidecar (Agent)" \
+      org.opencontainers.image.description="HTTP sidecar for executing code in Kubernetes pods via executor agent" \
+      org.opencontainers.image.version="${VERSION}" \
+      org.opencontainers.image.created="${BUILD_DATE}" \
+      org.opencontainers.image.revision="${VCS_REF}"
+
+# Copy executor agent binary (distributed to main container via init container)
+COPY --from=agent-builder /opt/executor-agent /opt/executor-agent
+
+ENV EXECUTION_MODE=agent
+
+
+################################
+# TARGET: sidecar-nsenter (legacy)
+#
+# nsenter mode sidecar — for backward compatibility.
+# Contains nsenter with file capabilities for namespace entry.
+#
+# Kubernetes pod spec (nsenter mode):
+#   - shareProcessNamespace: true (so sidecar can see main container's processes)
+#   - securityContext.capabilities.add: ["SYS_PTRACE", "SYS_ADMIN", "SYS_CHROOT"]
+#   - securityContext.allowPrivilegeEscalation: true
+#     (required for file capabilities to be honored)
+#
+# Build: docker build --target sidecar-nsenter -t kubecoderun-sidecar-nsenter .
+################################
+FROM runtime-base AS sidecar-nsenter
+
+ARG BUILD_DATE
+ARG VERSION
+ARG VCS_REF
+
+LABEL org.opencontainers.image.title="KubeCodeRun Sidecar (nsenter)" \
+      org.opencontainers.image.description="HTTP sidecar for executing code in Kubernetes pods via nsenter" \
+      org.opencontainers.image.version="${VERSION}" \
+      org.opencontainers.image.created="${BUILD_DATE}" \
+      org.opencontainers.image.revision="${VCS_REF}"
+
+# Copy nsenter with file capabilities from nsenter builder
+COPY --from=builder-nsenter /usr/bin/nsenter /usr/bin/
+
+# Copy /usr/bin/env from builder — nsenter mode uses /usr/bin/env -i for clean execution
+COPY --from=builder-common /usr/bin/env /usr/bin/env
+
+# Copy shared libraries needed by nsenter (libselinux, libpcre2) for both architectures
+# Note: Only one arch directory will have content depending on build platform
+COPY --from=builder-nsenter /lib/x86_64-linux-gnu /lib/x86_64-linux-gnu
+COPY --from=builder-nsenter /lib/aarch64-linux-gnu /lib/aarch64-linux-gnu
+
+ENV EXECUTION_MODE=nsenter
diff --git a/docker/sidecar/executor-agent/go.mod b/docker/sidecar/executor-agent/go.mod
new file mode 100644
index 0000000..b7f6ec9
--- /dev/null
+++ b/docker/sidecar/executor-agent/go.mod
@@ -0,0 +1,3 @@
+module executor-agent
+
+go 1.26
diff --git a/docker/sidecar/executor-agent/main.go b/docker/sidecar/executor-agent/main.go
new file mode 100644
index 0000000..cbd6dc5
--- /dev/null
+++ b/docker/sidecar/executor-agent/main.go
@@ -0,0 +1,253 @@
+// Executor Agent - Lightweight HTTP server for code execution in Kubernetes pods.
+//
+// This binary runs inside the main (language) container and provides an HTTP API
+// that the sidecar uses to execute code. It replaces the nsenter-based approach,
+// enabling execution without any Linux capabilities or privilege escalation.
+//
+// Architecture:
+//   - Listens on localhost (pod-internal only) on a configurable port (default: 9090)
+//   - Receives execution requests from the sidecar via HTTP
+//   - Spawns subprocesses using the container's inherited environment (PATH, HOME, etc.)
+//   - Returns stdout, stderr, exit code, and execution time
+//
+// The agent inherits its environment from the container's ENTRYPOINT (env -i PATH=... HOME=...),
+// ensuring subprocesses run with the exact same sanitized environment as the language runtime.
+
+package main
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"os/exec"
+	"os/signal"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"syscall"
+	"time"
+)
+
+const (
+	defaultPort   = 9090
+	maxOutputSize = 1048576  // 1MB - matches sidecar's MAX_OUTPUT_SIZE
+	maxBodySize   = 10485760 // 10MB
+)
+
+// ExecuteRequest is the JSON request body for /execute.
+type ExecuteRequest struct {
+	Command    []string          `json:"command"`
+	Timeout    int               `json:"timeout"`
+	WorkingDir string            `json:"working_dir"`
+	Env        map[string]string `json:"env,omitempty"`
+}
+
+// ExecuteResponse is the JSON response body for /execute.
+type ExecuteResponse struct {
+	ExitCode        int    `json:"exit_code"`
+	Stdout          string `json:"stdout"`
+	Stderr          string `json:"stderr"`
+	ExecutionTimeMs int64  `json:"execution_time_ms"`
+}
+
+func handleExecute(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodPost {
+		http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+
+	body, err := io.ReadAll(io.LimitReader(r.Body, maxBodySize))
+	if err != nil {
+		writeJSON(w, http.StatusBadRequest, ExecuteResponse{
+			ExitCode: 1, Stderr: "Failed to read request body",
+		})
+		return
+	}
+
+	var req ExecuteRequest
+	if err := json.Unmarshal(body, &req); err != nil {
+		writeJSON(w, http.StatusBadRequest, ExecuteResponse{
+			ExitCode: 1, Stderr: fmt.Sprintf("Invalid JSON: %v", err),
+		})
+		return
+	}
+
+	if len(req.Command) == 0 {
+		writeJSON(w, http.StatusBadRequest, ExecuteResponse{
+			ExitCode: 1, Stderr: "No command specified",
+		})
+		return
+	}
+
+	timeout := req.Timeout
+	if timeout <= 0 {
+		timeout = 30
+	}
+
+	workingDir := req.WorkingDir
+	if workingDir == "" {
+		workingDir = "/mnt/data"
+	}
+
+	// Validate that working directory is within the safe /mnt/data directory.
+	// Use filepath.Clean + exact-prefix check to prevent traversal to e.g. /mnt/data2.
+	absDir, err := filepath.Abs(workingDir)
+	if err != nil {
+		writeJSON(w, http.StatusBadRequest, ExecuteResponse{
+			ExitCode: 1, Stderr: fmt.Sprintf("Invalid working directory: %v", err),
+		})
+		return
+	}
+	absDir = filepath.Clean(absDir)
+	if absDir != "/mnt/data" && !strings.HasPrefix(absDir, "/mnt/data/") {
+		writeJSON(w, http.StatusBadRequest, ExecuteResponse{
+			ExitCode: 1, Stderr: fmt.Sprintf("Invalid working directory: must be within /mnt/data, got %q", workingDir),
+		})
+		return
+	}
+	workingDir = absDir
+
+	fmt.Fprintf(os.Stdout, "[executor-agent] cmd=%v timeout=%ds dir=%s\n",
+		req.Command, timeout, workingDir)
+
+	start := time.Now()
+	ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second)
+	defer cancel()
+
+	cmd := exec.CommandContext(ctx, req.Command[0], req.Command[1:]...)
+	cmd.Dir = workingDir
+
+	// Inherit the current process environment (from container's ENTRYPOINT env -i).
+	// Merge request-provided env overrides by replacing existing keys (so the
+	// override actually takes effect regardless of runtime first/last-wins semantics).
+	if len(req.Env) > 0 {
+		env := os.Environ()
+		for k, v := range req.Env {
+			prefix := k + "="
+			found := false
+			for i, e := range env {
+				if strings.HasPrefix(e, prefix) {
+					env[i] = prefix + v
+					found = true
+					break
+				}
+			}
+			if !found {
+				env = append(env, prefix+v)
+			}
+		}
+		cmd.Env = env
+	}
+
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+
+	err = cmd.Run()
+	elapsed := time.Since(start).Milliseconds()
+
+	exitCode := 0
+	if err != nil {
+		if ctx.Err() == context.DeadlineExceeded {
+			fmt.Fprintf(os.Stdout, "[executor-agent] TIMEOUT after %ds\n", timeout)
+			writeJSON(w, http.StatusOK, ExecuteResponse{
+				ExitCode:        124,
+				Stdout:          "",
+				Stderr:          fmt.Sprintf("Execution timed out after %d seconds", timeout),
+				ExecutionTimeMs: elapsed,
+			})
+			return
+		}
+		if exitErr, ok := err.(*exec.ExitError); ok {
+			exitCode = exitErr.ExitCode()
+		} else {
+			writeJSON(w, http.StatusOK, ExecuteResponse{
+				ExitCode:        1,
+				Stdout:          "",
+				Stderr:          fmt.Sprintf("Failed to execute command: %v", err),
+				ExecutionTimeMs: elapsed,
+			})
+			return
+		}
+	}
+
+	stdoutStr := truncate(stdout.String(), maxOutputSize)
+	stderrStr := truncate(stderr.String(), maxOutputSize)
+
+	fmt.Fprintf(os.Stdout, "[executor-agent] exit=%d stdout=%d stderr=%d time=%dms\n",
+		exitCode, len(stdoutStr), len(stderrStr), elapsed)
+
+	writeJSON(w, http.StatusOK, ExecuteResponse{
+		ExitCode:        exitCode,
+		Stdout:          stdoutStr,
+		Stderr:          stderrStr,
+		ExecutionTimeMs: elapsed,
+	})
+}
+
+func handleHealth(w http.ResponseWriter, _ *http.Request) {
+	writeJSON(w, http.StatusOK, map[string]string{"status": "healthy"})
+}
+
+func handleReady(w http.ResponseWriter, _ *http.Request) {
+	writeJSON(w, http.StatusOK, map[string]string{"status": "ready"})
+}
+
+func writeJSON(w http.ResponseWriter, status int, data interface{}) {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(status)
+	json.NewEncoder(w).Encode(data) //nolint:errcheck
+}
+
+func truncate(s string, maxLen int) string {
+	if len(s) > maxLen {
+		return s[:maxLen]
+	}
+	return s
+}
+
+func main() {
+	port := defaultPort
+
+	// Parse --port flag from CLI args
+	for i := 1; i < len(os.Args)-1; i++ {
+		if os.Args[i] == "--port" {
+			if p, err := strconv.Atoi(os.Args[i+1]); err == nil {
+				port = p
+			}
+		}
+	}
+
+	mux := http.NewServeMux()
+	mux.HandleFunc("/execute", handleExecute)
+	mux.HandleFunc("/health", handleHealth)
+	mux.HandleFunc("/ready", handleReady)
+
+	server := &http.Server{
+		Addr:         fmt.Sprintf("127.0.0.1:%d", port),
+		Handler:      mux,
+		ReadTimeout:  30 * time.Second,
+		WriteTimeout: 300 * time.Second,
+	}
+
+	// Graceful shutdown on SIGTERM/SIGINT
+	go func() {
+		sigCh := make(chan os.Signal, 1)
+		signal.Notify(sigCh, syscall.SIGTERM, syscall.SIGINT)
+		<-sigCh
+		fmt.Fprintln(os.Stdout, "[executor-agent] Shutting down...")
+		ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+		defer cancel()
+		server.Shutdown(ctx) //nolint:errcheck
+	}()
+
+	fmt.Fprintf(os.Stdout, "[executor-agent] Listening on 127.0.0.1:%d\n", port)
+	if err := server.ListenAndServe(); err != http.ErrServerClosed {
+		fmt.Fprintf(os.Stderr, "[executor-agent] Server error: %v\n", err)
+		os.Exit(1)
+	}
+}
diff --git a/docker/sidecar/main.py b/docker/sidecar/main.py
index 3d4663e..684d8e2 100644
--- a/docker/sidecar/main.py
+++ b/docker/sidecar/main.py
@@ -2,10 +2,17 @@
 """HTTP Sidecar for Kubernetes Pod Execution.
 
 This sidecar runs alongside the main language container and provides
-an HTTP API for code execution. It uses nsenter to execute code in
-the main container's mount namespace.
+an HTTP API for code execution. It supports two execution modes:
 
-Requires: shareProcessNamespace: true in the pod spec.
+1. Agent mode (default): Forwards execution requests to an executor agent
+   HTTP server running inside the main container. No nsenter, no capabilities,
+   no privilege escalation needed. Compatible with GKE Sandbox (gVisor).
+
+2. nsenter mode (legacy): Uses nsenter to execute code in the main container's
+   mount namespace. Requires shareProcessNamespace, SYS_PTRACE, SYS_ADMIN,
+   SYS_CHROOT capabilities, and allowPrivilegeEscalation: true.
+
+The mode is controlled by the EXECUTION_MODE environment variable.
 """
 
 import asyncio
@@ -19,6 +26,7 @@
 from pathlib import Path
 from typing import Optional
 
+import httpx
 from fastapi import FastAPI, File, HTTPException, Response, UploadFile
 from fastapi.responses import FileResponse
 from pydantic import BaseModel, Field
@@ -35,6 +43,11 @@
 # Network isolation mode - when true, disables network-dependent features (e.g., Go module proxy)
 NETWORK_ISOLATED = os.getenv("NETWORK_ISOLATED", "false").lower() in ("true", "1", "yes")
 
+# Execution mode: "agent" (default, no nsenter) or "nsenter" (legacy)
+EXECUTION_MODE = os.getenv("EXECUTION_MODE", "agent")
+# Executor port (used in agent mode for the executor agent HTTP server)
+EXECUTOR_PORT = int(os.getenv("EXECUTOR_PORT", "9090"))
+
 class ExecuteRequest(BaseModel):
     """Request to execute code."""
     code: str
@@ -218,85 +231,208 @@ def apply_network_isolation_overrides(env: dict[str, str], language: str) -> dic
     return env
 
 
-def get_language_command(
-    language: str, code: str, working_dir: str, container_env: dict[str, str]
-) -> tuple[list[str], Path | None]:
-    """Get the command to execute code for a given language.
+def _write_code_file(language: str, code: str, working_dir: str) -> tuple[list[str], Path | None]:
+    """Write code to a temp file and return the bare command to execute it.
 
+    This is the core (DRY) logic shared by both execution modes.
     Returns (command_list, temp_file_path_or_none).
-
-    Environment is always read from the container at runtime via /proc/<pid>/environ.
-    This eliminates config drift between Dockerfiles and sidecar code.
-
-    Two execution modes:
-    - Direct mode: Uses '/usr/bin/env -i' for single-command execution
-    - Shell mode: Uses 'sh -c' for multi-step (compile && run) commands
-
-    Both modes use the runtime-detected environment from the container.
     """
-    # Use container env, fall back to minimal defaults if not available
-    env = container_env if container_env else {"PATH": "/usr/local/bin:/usr/bin:/bin", "HOME": "/tmp"}
-
-    # Single wrapper using /usr/bin/env -i with runtime-detected environment
-    def wrap(cmd_args: list[str]) -> list[str]:
-        env_args = [f"{k}={v}" for k, v in env.items()]
-        return ["/usr/bin/env", "-i"] + env_args + cmd_args
-
-    # Helper for compiled languages needing shell for compile && run
     safe_wd = shlex.quote(working_dir)
 
     if language in ("python", "py"):
         code_file = Path(working_dir) / "code.py"
         code_file.write_text(code)
-        return wrap(["python", str(code_file)]), code_file
+        return ["python", str(code_file)], code_file
     elif language in ("javascript", "js"):
         code_file = Path(working_dir) / "code.js"
         code_file.write_text(code)
-        return wrap(["node", str(code_file)]), code_file
+        return ["node", str(code_file)], code_file
     elif language in ("typescript", "ts"):
         code_file = Path(working_dir) / "code.ts"
         code_file.write_text(code)
-        return wrap(["node", "/opt/scripts/ts-runner.js", str(code_file)]), code_file
+        return ["node", "/opt/scripts/ts-runner.js", str(code_file)], code_file
     elif language in ("go",):
         code_file = Path(working_dir) / "main.go"
         code_file.write_text(code)
-        return wrap(["go", "run", str(code_file)]), code_file
+        return ["go", "run", str(code_file)], code_file
     elif language in ("rust", "rs"):
         code_file = Path(working_dir) / "main.rs"
         code_file.write_text(code)
-        return wrap(["sh", "-c", f"cd {safe_wd} && rustc {code_file} -o /tmp/main && /tmp/main"]), code_file
+        return ["sh", "-c", f"cd {safe_wd} && rustc {code_file} -o /tmp/main && /tmp/main"], code_file
     elif language in ("java",):
         code_file = Path(working_dir) / "Code.java"
         code_file.write_text(code)
-        return wrap(["sh", "-c", f"cd {safe_wd} && javac {code_file} && java -cp {working_dir} Code"]), code_file
+        return ["sh", "-c", f"cd {safe_wd} && javac {code_file} && java -cp {working_dir} Code"], code_file
     elif language in ("c",):
         code_file = Path(working_dir) / "code.c"
         code_file.write_text(code)
-        return wrap(["sh", "-c", f"cd {safe_wd} && gcc {code_file} -o /tmp/code && /tmp/code"]), code_file
+        return ["sh", "-c", f"cd {safe_wd} && gcc {code_file} -o /tmp/code && /tmp/code"], code_file
     elif language in ("cpp",):
         code_file = Path(working_dir) / "code.cpp"
         code_file.write_text(code)
-        return wrap(["sh", "-c", f"cd {safe_wd} && g++ {code_file} -o /tmp/code && /tmp/code"]), code_file
+        return ["sh", "-c", f"cd {safe_wd} && g++ {code_file} -o /tmp/code && /tmp/code"], code_file
     elif language in ("php",):
         code_file = Path(working_dir) / "code.php"
         code_file.write_text(code)
-        return wrap(["php", str(code_file)]), code_file
+        return ["php", str(code_file)], code_file
     elif language in ("r",):
         code_file = Path(working_dir) / "code.r"
         code_file.write_text(code)
-        return wrap(["Rscript", str(code_file)]), code_file
+        return ["Rscript", str(code_file)], code_file
     elif language in ("fortran", "f90"):
         code_file = Path(working_dir) / "code.f90"
         code_file.write_text(code)
-        return wrap(["sh", "-c", f"cd {safe_wd} && gfortran {code_file} -o /tmp/code && /tmp/code"]), code_file
+        return ["sh", "-c", f"cd {safe_wd} && gfortran {code_file} -o /tmp/code && /tmp/code"], code_file
     elif language in ("d", "dlang"):
         code_file = Path(working_dir) / "code.d"
         code_file.write_text(code)
-        return wrap(["sh", "-c", f"cd {safe_wd} && ldc2 {code_file} -of=/tmp/code && /tmp/code"]), code_file
+        return ["sh", "-c", f"cd {safe_wd} && ldc2 {code_file} -of=/tmp/code && /tmp/code"], code_file
     else:
         return [], None
 
 
+def get_language_command(
+    language: str, code: str, working_dir: str, container_env: dict[str, str]
+) -> tuple[list[str], Path | None]:
+    """Get the command to execute code for a given language (nsenter mode).
+
+    Wraps the bare command with `/usr/bin/env -i` and the container's environment
+    variables to ensure a clean, reproducible execution context.
+
+    Returns (command_list, temp_file_path_or_none).
+
+    Environment is always read from the container at runtime via /proc/<pid>/environ.
+    This eliminates config drift between Dockerfiles and sidecar code.
+    """
+    cmd, temp_file = _write_code_file(language, code, working_dir)
+    if not cmd:
+        return [], None
+
+    # Use container env, fall back to minimal defaults if not available
+    env = container_env if container_env else {"PATH": "/usr/local/bin:/usr/bin:/bin", "HOME": "/tmp"}
+
+    # Wrap with /usr/bin/env -i for a clean environment
+    env_args = [f"{k}={v}" for k, v in env.items()]
+    return ["/usr/bin/env", "-i"] + env_args + cmd, temp_file
+
+
+def get_language_command_bare(
+    language: str, code: str, working_dir: str,
+) -> tuple[list[str], Path | None]:
+    """Get the bare command to execute code for a given language (agent mode).
+
+    Used in agent mode where the executor agent already inherits the correct
+    environment from the container's ENTRYPOINT. No env -i wrapper needed.
+
+    Returns (command_list, temp_file_path_or_none).
+    """
+    return _write_code_file(language, code, working_dir)
+
+
+def get_network_isolation_overrides(language: str) -> dict[str, str]:
+    """Get environment variable overrides for network-isolated execution.
+
+    Returns a dict of env vars to override in the executor agent's subprocess.
+    """
+    if not NETWORK_ISOLATED:
+        return {}
+
+    overrides = {}
+    if language in ("go",):
+        overrides["GOPROXY"] = "off"
+        overrides["GOSUMDB"] = "off"
+        print(f"[EXECUTE] Network isolation: overriding GOPROXY=off, GOSUMDB=off", flush=True)
+    return overrides
+
+
+async def execute_via_agent(request: ExecuteRequest) -> ExecuteResponse:
+    """Execute code via the executor agent running in the main container.
+
+    The executor agent is a lightweight HTTP server that runs inside the main
+    container, receiving commands over localhost (shared pod network namespace).
+    No nsenter, capabilities, or privilege escalation needed.
+    """
+    start_time = time.perf_counter()
+
+    try:
+        # Write code to a temp file and get the bare command (no env -i wrapper)
+        cmd, temp_file = get_language_command_bare(
+            LANGUAGE, request.code, request.working_dir
+        )
+        if not cmd:
+            return ExecuteResponse(
+                exit_code=1,
+                stdout="",
+                stderr=f"Unsupported language: {LANGUAGE}",
+                execution_time_ms=0,
+            )
+    except Exception as e:
+        return ExecuteResponse(
+            exit_code=1,
+            stdout="",
+            stderr=f"Failed to prepare execution: {str(e)}\n{traceback.format_exc()}",
+            execution_time_ms=int((time.perf_counter() - start_time) * 1000),
+        )
+
+    # Build env overrides for network isolation
+    env_overrides = get_network_isolation_overrides(LANGUAGE)
+
+    print(f"[EXECUTE] agent mode, cmd={cmd}, timeout={request.timeout}s", flush=True)
+
+    try:
+        async with httpx.AsyncClient() as client:
+            resp = await client.post(
+                f"http://127.0.0.1:{EXECUTOR_PORT}/execute",
+                json={
+                    "command": cmd,
+                    "timeout": request.timeout,
+                    "working_dir": request.working_dir,
+                    "env": env_overrides if env_overrides else None,
+                },
+                timeout=request.timeout + 10,  # Extra margin for HTTP overhead
+            )
+
+            if resp.status_code != 200:
+                return ExecuteResponse(
+                    exit_code=1,
+                    stdout="",
+                    stderr=f"Executor agent returned HTTP {resp.status_code}: {resp.text}",
+                    execution_time_ms=int((time.perf_counter() - start_time) * 1000),
+                )
+
+            data = resp.json()
+            return ExecuteResponse(
+                exit_code=data.get("exit_code", 1),
+                stdout=data.get("stdout", ""),
+                stderr=data.get("stderr", ""),
+                execution_time_ms=data.get("execution_time_ms", 0),
+            )
+
+    except httpx.TimeoutException:
+        return ExecuteResponse(
+            exit_code=124,
+            stdout="",
+            stderr=f"Execution timed out after {request.timeout} seconds",
+            execution_time_ms=int((time.perf_counter() - start_time) * 1000),
+        )
+    except httpx.ConnectError:
+        return ExecuteResponse(
+            exit_code=1,
+            stdout="",
+            stderr=f"Cannot connect to executor agent at 127.0.0.1:{EXECUTOR_PORT}. "
+                   f"Ensure the main container is running the executor agent.",
+            execution_time_ms=int((time.perf_counter() - start_time) * 1000),
+        )
+    except Exception as e:
+        print(f"[EXECUTE] AGENT EXCEPTION: {type(e).__name__}: {e}", flush=True)
+        return ExecuteResponse(
+            exit_code=1,
+            stdout="",
+            stderr=f"Agent execution error: {str(e)}\n{traceback.format_exc()}",
+            execution_time_ms=int((time.perf_counter() - start_time) * 1000),
+        )
+
+
 async def execute_via_nsenter(request: ExecuteRequest) -> ExecuteResponse:
     """Execute code in the main container using nsenter.
 
@@ -474,8 +610,11 @@ async def execute_via_subprocess_direct(request: ExecuteRequest) -> ExecuteRespo
 
 @app.post("/execute", response_model=ExecuteResponse)
 async def execute_code(request: ExecuteRequest) -> ExecuteResponse:
-    """Execute code and return results via nsenter."""
-    return await execute_via_nsenter(request)
+    """Execute code using the configured execution mode (agent or nsenter)."""
+    if EXECUTION_MODE == "agent":
+        return await execute_via_agent(request)
+    else:
+        return await execute_via_nsenter(request)
 
 
 @app.post("/files")
@@ -586,10 +725,25 @@ async def readiness_check():
     if not os.path.isdir(WORKING_DIR):
         raise HTTPException(status_code=503, detail="Working directory not ready")
 
-    # Check if we can find the main container
-    main_pid = find_main_container_pid()
-    if not main_pid:
-        raise HTTPException(status_code=503, detail="Main container not found")
+    if EXECUTION_MODE == "agent":
+        # In agent mode, check if the executor agent is reachable
+        try:
+            async with httpx.AsyncClient() as client:
+                resp = await client.get(
+                    f"http://127.0.0.1:{EXECUTOR_PORT}/health",
+                    timeout=2,
+                )
+                if resp.status_code != 200:
+                    raise HTTPException(status_code=503, detail="Executor agent not healthy")
+        except httpx.ConnectError:
+            raise HTTPException(status_code=503, detail="Executor agent not reachable")
+        except Exception:
+            raise HTTPException(status_code=503, detail="Executor agent health check failed")
+    else:
+        # In nsenter mode, check if we can find the main container
+        main_pid = find_main_container_pid()
+        if not main_pid:
+            raise HTTPException(status_code=503, detail="Main container not found")
 
     return {"status": "ready"}
 
diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
index fc0dcd3..f0a81b4 100644
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -48,20 +48,105 @@ The warm pool approach achieves ~85% reduction in P99 latency compared to cold-s
 
 ## Pod Design: Two-Container Sidecar Pattern
 
-Each execution pod contains two containers that share process namespaces, enabling the sidecar to execute code using the main container's runtime environment.
+Each execution pod contains two containers that communicate over the shared pod network (`localhost`). KubeCodeRun supports two execution modes controlled by `K8S_EXECUTION_MODE`.
 
-### 1. Main Container (Language Runtime)
+### Execution Modes
+
+#### Agent Mode (Default) — `K8S_EXECUTION_MODE=agent`
+
+In agent mode, a statically compiled Go binary (**executor agent**) runs inside the main (language) container. The sidecar forwards execution requests to the agent over `localhost:9090`.
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                      Execution Pod                          │
+│  shareProcessNamespace: false (not needed)                  │
+│                                                             │
+│  ┌────────────────┐                                         │
+│  │ Init Container │  Copies /opt/executor-agent              │
+│  │ (agent-init)   │  → /mnt/data/.executor-agent            │
+│  └────────┬───────┘                                         │
+│           │                                                 │
+│  ┌────────▼────────────┐    ┌─────────────────────────────┐ │
+│  │   Main Container    │    │      Sidecar Container      │ │
+│  │                     │    │                             │ │
+│  │  • Language runtime │◄───│  • Receives HTTP request    │ │
+│  │  • Executor agent   │    │  • Forwards to agent via    │ │
+│  │    on 127.0.0.1:9090│    │    POST localhost:9090      │ │
+│  │  • Spawns code      │    │  • Returns stdout/stderr    │ │
+│  │    subprocesses     │    │                             │ │
+│  └─────────────────────┘    └─────────────────────────────┘ │
+│           │                            │                    │
+│           └────────────────────────────┘                    │
+│                   Shared /mnt/data volume                   │
+└─────────────────────────────────────────────────────────────┘
+```
+
+**Security properties:**
+- No `shareProcessNamespace` — containers cannot see each other's processes
+- No capabilities — all capabilities dropped for all containers
+- `allowPrivilegeEscalation: false` — no binary can gain elevated privileges
+- Compatible with **GKE Sandbox (gVisor)** and restricted Pod Security Standards
+- Communication via `localhost` only (pod-internal, not network-accessible)
+
+**Container images:**
+- **Sidecar:** Built with `--target sidecar-agent` from `docker/sidecar/Dockerfile`
+- **Image name:** `kubecoderun-sidecar-agent` (contains executor-agent binary + Python sidecar)
+
+#### nsenter Mode (Legacy) — `K8S_EXECUTION_MODE=nsenter`
+
+In nsenter mode, the sidecar uses Linux `nsenter` to execute code in the main container's mount namespace. This requires elevated privileges and is preserved for backward compatibility.
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                      Execution Pod                          │
+│  shareProcessNamespace: true                                │
+│                                                             │
+│  ┌─────────────────────┐    ┌─────────────────────────────┐ │
+│  │   Main Container    │    │      Sidecar Container      │ │
+│  │                     │    │                             │ │
+│  │  • Python/Node/Go   │◄───│  • Receives HTTP request    │ │
+│  │  • sleep infinity   │    │  • Writes code to /mnt/data │ │
+│  │  • PID 1 visible    │    │  • nsenter -m -t <PID>      │ │
+│  │    to sidecar       │    │    --wdns=/mnt/data sh      │ │
+│  │                     │    │  • Returns stdout/stderr    │ │
+│  └─────────────────────┘    └─────────────────────────────┘ │
+│           │                            │                    │
+│           └────────────────────────────┘                    │
+│                   Shared /mnt/data volume                   │
+└─────────────────────────────────────────────────────────────┘
+```
+
+**Required pod settings:**
+- `shareProcessNamespace: true`
+- Sidecar capabilities: `SYS_PTRACE`, `SYS_ADMIN`, `SYS_CHROOT`
+- `allowPrivilegeEscalation: true` (for file capabilities on nsenter binary)
+- **Not compatible** with GKE Sandbox (gVisor)
+
+**Container images:**
+- **Sidecar:** Built with `--target sidecar-nsenter` from `docker/sidecar/Dockerfile`
+- **Image name:** `kubecoderun-sidecar-nsenter` (contains nsenter with setcap + Python sidecar)
+
+### Container Details
+
+#### 1. Main Container (Language Runtime)
 - Runs the language runtime (Python, Node.js, Go, etc.)
 - Provides the execution environment (compilers, interpreters, libraries)
 - Shares `/mnt/data` volume with sidecar
-- Runs a sleep loop to keep the container alive
+- **Agent mode:** Runs the executor agent binary (copied by init container)
+- **nsenter mode:** Runs `sleep infinity` to keep the container alive
 
-### 2. HTTP Sidecar (Executor)
+#### 2. HTTP Sidecar (Executor)
 - Lightweight FastAPI server (~50MB)
 - Exposes REST API for code execution
-- Uses `nsenter` to execute code in the main container's namespace
+- **Agent mode:** Forwards requests to the executor agent via HTTP on `localhost`
+- **nsenter mode:** Uses `nsenter` to execute code in the main container's namespace
 - Handles file transfers and state management
 
+#### 3. Init Container (Agent Mode Only)
+- Uses the sidecar-agent image
+- Copies `/opt/executor-agent` binary to `/mnt/data/.executor-agent`
+- Runs once at pod startup, then exits
+
 **Sidecar API Endpoints:**
 ```
 POST /execute     - Execute code with optional state
@@ -71,9 +156,9 @@ GET  /files/{name} - Download file content
 GET  /health      - Health check
 ```
 
-### Namespace Sharing with nsenter
+### Namespace Sharing with nsenter (Legacy Mode)
 
-The pod uses `shareProcessNamespace: true`, allowing containers to see each other's processes. The sidecar uses Linux `nsenter` to execute code in the main container's mount namespace:
+In nsenter mode, the pod uses `shareProcessNamespace: true`, allowing containers to see each other's processes. The sidecar uses Linux `nsenter` to execute code in the main container's mount namespace:
 
 ```
 ┌─────────────────────────────────────────────────────────────┐
@@ -100,18 +185,18 @@ The pod uses `shareProcessNamespace: true`, allowing containers to see each othe
 3. Sets the working directory to `/mnt/data` so relative paths write to the shared volume
 4. Captures stdout/stderr and returns via HTTP
 
-**nsenter Privilege Model:**
+**nsenter Privilege Model (nsenter mode only):**
 
 The sidecar runs as non-root (UID 65532) but requires Linux capabilities to use `nsenter`. Since capabilities for non-root users only populate the *bounding set* (not effective/permitted), we use **file capabilities** via `setcap` on the nsenter binary:
 
 ```dockerfile
-# In sidecar Dockerfile
+# In sidecar Dockerfile (sidecar-nsenter target only)
 RUN setcap 'cap_sys_ptrace,cap_sys_admin,cap_sys_chroot+eip' /usr/bin/nsenter
 ```
 
 This allows the non-root user to gain the required capabilities when executing nsenter, without running as root. The pod spec still requires `allowPrivilegeEscalation: true` for file capabilities to be honored. See [SECURITY.md](SECURITY.md) for full details.
 
-**Per-Language Environment Setup:**
+**Per-Language Environment Setup (nsenter mode only):**
 
 Since `nsenter -m` only enters the mount namespace (not the environment), the sidecar explicitly sets up PATH and environment variables for each language:
 
@@ -186,7 +271,8 @@ Since `nsenter -m` only enters the mount namespace (not the environment), the si
    ▼
 5. HTTP Sidecar
    ├── POST /execute
-   ├── Run code in main container
+   ├── Agent mode:  Forward to executor agent → subprocess in main container
+   ├── nsenter mode: nsenter into main container's mount namespace → subprocess
    └── Return stdout/stderr/files
    │
    ▼
@@ -253,7 +339,7 @@ POD_POOL_EXHAUSTION_TRIGGER=true   # Trigger immediate replenishment when exhaus
 
 ```python
 K8S_NAMESPACE=kubecoderun
-K8S_SIDECAR_IMAGE=aronmuon/kubecoderun-sidecar:latest
+K8S_SIDECAR_IMAGE=aronmuon/kubecoderun-sidecar-agent:latest
 K8S_IMAGE_REGISTRY=aronmuon/kubecoderun
 K8S_IMAGE_TAG=latest
 K8S_CPU_LIMIT=1
diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md
index 30049f5..ee57b72 100644
--- a/docs/CONFIGURATION.md
+++ b/docs/CONFIGURATION.md
@@ -135,18 +135,23 @@ Manages API key authentication and security.
 
 ### Redis Configuration
 
-Redis is used for session management and caching.
-
-| Variable                       | Default     | Description                                        |
-| ------------------------------ | ----------- | -------------------------------------------------- |
-| `REDIS_HOST`                   | `localhost` | Redis server hostname                              |
-| `REDIS_PORT`                   | `6379`      | Redis server port                                  |
-| `REDIS_PASSWORD`               | -           | Redis password (if required)                       |
-| `REDIS_DB`                     | `0`         | Redis database number                              |
-| `REDIS_URL`                    | -           | Complete Redis URL (overrides individual settings) |
-| `REDIS_MAX_CONNECTIONS`        | `20`        | Maximum connections in pool                        |
-| `REDIS_SOCKET_TIMEOUT`         | `5`         | Socket timeout (seconds)                           |
-| `REDIS_SOCKET_CONNECT_TIMEOUT` | `5`         | Connection timeout (seconds)                       |
+Redis is used for session management and caching. Three deployment modes are supported:
+**standalone** (default), **cluster**, and **sentinel** — all with optional TLS/SSL.
+
+#### Connection Settings
+
+| Variable                       | Default       | Description                                              |
+| ------------------------------ | ------------- | -------------------------------------------------------- |
+| `REDIS_MODE`                   | `standalone`  | Deployment mode: `standalone`, `cluster`, or `sentinel`  |
+| `REDIS_HOST`                   | `localhost`   | Redis server hostname                                    |
+| `REDIS_PORT`                   | `6379`        | Redis server port                                        |
+| `REDIS_PASSWORD`               | -             | Redis password (if required)                             |
+| `REDIS_DB`                     | `0`           | Redis database number (standalone/sentinel only)         |
+| `REDIS_URL`                    | -             | Complete Redis URL (overrides individual settings)       |
+| `REDIS_MAX_CONNECTIONS`        | `20`          | Maximum connections in pool                              |
+| `REDIS_SOCKET_TIMEOUT`         | `5`           | Socket timeout (seconds)                                 |
+| `REDIS_SOCKET_CONNECT_TIMEOUT` | `5`           | Connection timeout (seconds)                             |
+| `REDIS_KEY_PREFIX`             | -             | Optional prefix prepended to every Redis key (e.g. `prod:`) |
 
 **Example Redis URL:**
 
@@ -154,6 +159,83 @@ Redis is used for session management and caching.
 REDIS_URL=redis://password@localhost:6379/0
 ```
 
+#### Redis Cluster Mode
+
+Use `REDIS_MODE=cluster` when running against a Redis Cluster deployment (e.g. GCP Memorystore Cluster, AWS ElastiCache Cluster Mode).
+
+| Variable               | Default | Description                                                                 |
+| ---------------------- | ------- | --------------------------------------------------------------------------- |
+| `REDIS_CLUSTER_NODES`  | -       | Comma-separated `host:port` pairs for cluster startup nodes                 |
+
+> **Note:** `REDIS_DB` is ignored in cluster mode (Redis Cluster only supports database 0).
+
+**Example:**
+
+```bash
+REDIS_MODE=cluster
+REDIS_CLUSTER_NODES=node1:6379,node2:6379,node3:6379
+REDIS_PASSWORD=your-cluster-password
+```
+
+#### Redis Sentinel Mode
+
+Use `REDIS_MODE=sentinel` for high-availability setups with Redis Sentinel.
+
+| Variable                   | Default    | Description                                                  |
+| -------------------------- | ---------- | ------------------------------------------------------------ |
+| `REDIS_SENTINEL_NODES`     | -          | Comma-separated `host:port` pairs for Sentinel instances     |
+| `REDIS_SENTINEL_MASTER`    | `mymaster` | Name of the Sentinel-monitored master                        |
+| `REDIS_SENTINEL_PASSWORD`  | -          | Password for authenticating to Sentinel instances             |
+
+**Example:**
+
+```bash
+REDIS_MODE=sentinel
+REDIS_SENTINEL_NODES=sentinel1:26379,sentinel2:26379,sentinel3:26379
+REDIS_SENTINEL_MASTER=mymaster
+REDIS_PASSWORD=your-redis-password
+REDIS_SENTINEL_PASSWORD=your-sentinel-password
+```
+
+#### Redis TLS/SSL
+
+Enable TLS for encrypted connections. Required by most managed Redis services (GCP Memorystore, AWS ElastiCache, Azure Cache for Redis).
+
+| Variable                       | Default | Description                                                      |
+| ------------------------------ | ------- | ---------------------------------------------------------------- |
+| `REDIS_TLS_ENABLED`            | `false` | Enable TLS/SSL for Redis connections                             |
+| `REDIS_TLS_CA_CERT_FILE`       | -       | Path to CA certificate for verifying the server                  |
+| `REDIS_TLS_CERT_FILE`          | -       | Path to client TLS certificate (mutual TLS)                     |
+| `REDIS_TLS_KEY_FILE`           | -       | Path to client TLS private key (mutual TLS)                     |
+| `REDIS_TLS_INSECURE`           | `false` | Skip TLS certificate verification (NOT recommended)             |
+| `REDIS_TLS_CHECK_HOSTNAME`     | `false` | Verify server hostname against certificate CN/SAN                |
+
+> When `REDIS_TLS_ENABLED=true` the generated URL uses the `rediss://` scheme automatically.
+>
+> **Security note:** `REDIS_TLS_CHECK_HOSTNAME` is `false` by default because managed Redis services
+> (GCP Memorystore, AWS ElastiCache) and Redis Cluster node discovery expose IP addresses
+> that do not match certificate CN/SAN entries. The CA certificate chain is still fully
+> validated. For environments where Redis hostnames match their certificates, set
+> `REDIS_TLS_CHECK_HOSTNAME=true` for stronger TLS authentication.
+
+**Example — GCP Memorystore with TLS:**
+
+```bash
+REDIS_HOST=10.0.0.3
+REDIS_PORT=6378
+REDIS_TLS_ENABLED=true
+REDIS_TLS_CA_CERT_FILE=/etc/ssl/redis/server-ca.pem
+```
+
+**Example — GCP Memorystore Cluster:**
+
+```bash
+REDIS_MODE=cluster
+REDIS_CLUSTER_NODES=10.0.0.3:6379,10.0.0.4:6379,10.0.0.5:6379
+REDIS_TLS_ENABLED=true
+REDIS_TLS_CA_CERT_FILE=/etc/ssl/redis/server-ca.pem
+```
+
 ### MinIO/S3 Configuration
 
 MinIO provides S3-compatible object storage for files.
@@ -175,22 +257,128 @@ Kubernetes is used for secure code execution in isolated pods.
 | Variable               | Default                                      | Description                              |
 | ---------------------- | -------------------------------------------- | ---------------------------------------- |
 | `K8S_NAMESPACE`        | `""` (uses API's namespace)                  | Namespace for execution pods             |
-| `K8S_SIDECAR_IMAGE`    | `aronmuon/kubecoderun-sidecar:latest` | HTTP sidecar image for pod communication |
+| `K8S_SIDECAR_IMAGE`    | `aronmuon/kubecoderun-sidecar-agent:latest` | HTTP sidecar image for pod communication |
 | `K8S_IMAGE_REGISTRY`   | `aronmuon/kubecoderun`              | Registry prefix for language images      |
 | `K8S_IMAGE_TAG`        | `latest`                                     | Image tag for language images            |
 | `K8S_CPU_LIMIT`        | `1`                                          | CPU limit per execution pod              |
 | `K8S_MEMORY_LIMIT`     | `512Mi`                                      | Memory limit per execution pod           |
 | `K8S_CPU_REQUEST`      | `100m`                                       | CPU request per execution pod            |
 | `K8S_MEMORY_REQUEST`   | `128Mi`                                      | Memory request per execution pod         |
+| `K8S_EXECUTION_MODE`   | `agent`                                      | Execution mode: `agent` (default) or `nsenter` |
+| `K8S_EXECUTOR_PORT`    | `9090`                                       | Port for the executor HTTP server inside the main container |
+| `K8S_IMAGE_PULL_POLICY`| `Always`                                     | Image pull policy for execution pods (`Always`, `IfNotPresent`, `Never`) |
+| `K8S_IMAGE_PULL_SECRETS`| `""`                                        | Comma-separated list of Kubernetes secret names for pulling images from private registries |
+
+**Image Pull Secrets:**
+
+When using private container registries, create Kubernetes secrets in the execution namespace and reference them via `K8S_IMAGE_PULL_SECRETS`:
+
+```bash
+# Create the secret
+kubectl create secret docker-registry my-registry-secret \
+  --docker-server=ghcr.io \
+  --docker-username=<user> \
+  --docker-password=<token> \
+  -n <execution-namespace>
+
+# Configure the API
+K8S_IMAGE_PULL_SECRETS=my-registry-secret
+# Multiple secrets: K8S_IMAGE_PULL_SECRETS=secret1,secret2
+```
+
+The secrets are applied to all dynamically created execution pods (both warm pool pods and on-demand Job pods).
+
+**Execution Modes:**
+
+- **`agent` (default):** A lightweight Go HTTP server runs inside the main container. The sidecar forwards execution requests via localhost. No `nsenter`, no capabilities, no privilege escalation. Compatible with GKE Sandbox (gVisor) and restricted Pod Security Standards.
+- **`nsenter` (legacy):** The sidecar uses `nsenter` to enter the main container's mount namespace. Requires `shareProcessNamespace`, `SYS_PTRACE`/`SYS_ADMIN`/`SYS_CHROOT` capabilities, and `allowPrivilegeEscalation: true`. Use only on clusters that allow privilege escalation.
 
 **Security Notes:**
 
 - Both containers run with `runAsNonRoot: true` and `runAsUser: 65532`
-- The sidecar uses file capabilities (`setcap`) on the `nsenter` binary to allow non-root users to enter namespaces
-- Required pod capabilities (SYS_PTRACE, SYS_ADMIN, SYS_CHROOT) must be in the bounding set with `allowPrivilegeEscalation: true`
+- In agent mode: all capabilities are dropped, `allowPrivilegeEscalation: false` for all containers
+- In nsenter mode: the sidecar uses file capabilities (`setcap`) on the `nsenter` binary to allow non-root namespace entry
 - Network policies deny all egress by default
 - Pods are destroyed immediately after execution
-- See [SECURITY.md](SECURITY.md) for detailed explanation of the nsenter privilege model
+- See [SECURITY.md](SECURITY.md) for detailed explanation of the security model
+
+#### Sidecar Container Images
+
+The sidecar Dockerfile produces two distinct images via Docker build targets. Use the image that matches your configured `K8S_EXECUTION_MODE`:
+
+| Build Target | Image Name | Execution Mode | Description |
+|-------------|------------|---------------|-------------|
+| `sidecar-agent` (default) | `kubecoderun-sidecar-agent` | `agent` | Contains executor-agent binary; no nsenter, no capabilities |
+| `sidecar-nsenter` | `kubecoderun-sidecar-nsenter` | `nsenter` | Contains nsenter with file capabilities (setcap) |
+
+**Building the images:**
+
+```bash
+# Agent mode sidecar (default, recommended):
+docker build --target sidecar-agent \
+  -t kubecoderun-sidecar-agent:latest \
+  -f docker/sidecar/Dockerfile docker/sidecar/
+
+# nsenter mode sidecar (legacy):
+docker build --target sidecar-nsenter \
+  -t kubecoderun-sidecar-nsenter:latest \
+  -f docker/sidecar/Dockerfile docker/sidecar/
+
+# Or use the build script (builds both automatically):
+./scripts/build-images.sh sidecar-agent     # agent mode sidecar
+./scripts/build-images.sh sidecar-nsenter   # nsenter mode sidecar
+./scripts/build-images.sh                   # all images (both sidecars)
+```
+
+**Helm chart configuration:**
+
+Update `values.yaml` to use the correct sidecar image for your execution mode:
+
+```yaml
+execution:
+  executionMode: "agent"  # or "nsenter"
+  sidecar:
+    # For agent mode (default):
+    repository: ghcr.io/your-org/kubecoderun-sidecar-agent
+    # For nsenter mode:
+    # repository: ghcr.io/your-org/kubecoderun-sidecar-nsenter
+```
+
+### GKE Sandbox (gVisor) Configuration
+
+[GKE Sandbox](https://docs.cloud.google.com/kubernetes-engine/docs/concepts/sandbox-pods) provides kernel-level isolation using gVisor to protect the host kernel from untrusted code. It is **only compatible with agent execution mode**.
+
+| Variable                            | Default   | Description                                        |
+| ----------------------------------- | --------- | -------------------------------------------------- |
+| `GKE_SANDBOX_ENABLED`               | `false`   | Enable GKE Sandbox (gVisor) for execution pods     |
+| `GKE_SANDBOX_RUNTIME_CLASS`         | `gvisor`  | RuntimeClass name for sandboxed pods               |
+| `GKE_SANDBOX_NODE_SELECTOR`         | `{}`      | JSON node selector for sandbox nodes               |
+| `GKE_SANDBOX_CUSTOM_TOLERATIONS`    | `[]`      | JSON array of custom tolerations for sandbox nodes  |
+
+**Requirements:**
+
+- `K8S_EXECUTION_MODE=agent` (nsenter is **incompatible** with gVisor)
+- GKE cluster with a sandbox-enabled node pool (`--sandbox type=gvisor`)
+- At least two node pools — one with GKE Sandbox enabled, one without
+- Container-Optimized OS with containerd (`cos_containerd`) node image
+
+**Example configuration:**
+
+```bash
+K8S_EXECUTION_MODE=agent
+GKE_SANDBOX_ENABLED=true
+GKE_SANDBOX_RUNTIME_CLASS=gvisor
+# Schedule on specific sandbox node pool:
+GKE_SANDBOX_NODE_SELECTOR={"pool":"sandbox"}
+GKE_SANDBOX_CUSTOM_TOLERATIONS=[{"key":"pool","value":"sandbox","operator":"Equal","effect":"NoSchedule"}]
+```
+
+**Key limitations of GKE Sandbox** (see [GKE docs](https://docs.cloud.google.com/kubernetes-engine/docs/concepts/sandbox-pods#limitations)):
+
+- Incompatible with `nsenter` execution mode, privileged containers, and `shareProcessNamespace` (all avoided in agent mode)
+- Seccomp, AppArmor, and SELinux not applicable inside the sandbox
+- HostPath volumes and port-forwarding not supported
+- Container-level memory metrics not available (pod-level metrics are)
 
 ### Resource Limits
 
@@ -395,6 +583,10 @@ if validate_configuration():
 - [ ] Deploy Kubernetes NetworkPolicy to deny egress
 - [ ] Configure pod security context (non-root user)
 - [ ] Review and adjust resource limits
+- [ ] Choose execution mode (`K8S_EXECUTION_MODE=agent` recommended)
+- [ ] Ensure sidecar image matches execution mode (`sidecar-agent` for agent, `sidecar-nsenter` for nsenter)
+- [ ] Configure `K8S_IMAGE_PULL_SECRETS` if using private registries
+- [ ] Enable GKE Sandbox for additional kernel isolation if running on GKE (`GKE_SANDBOX_ENABLED=true`)
 
 ### Performance
 
diff --git a/docs/SECURITY.md b/docs/SECURITY.md
index 0415712..716666f 100644
--- a/docs/SECURITY.md
+++ b/docs/SECURITY.md
@@ -113,7 +113,75 @@ Code is analyzed for potentially dangerous patterns:
 - **Security context**: Pods run as non-root (`runAsUser: 65532`)
 - **Ephemeral execution**: Pods destroyed immediately after execution
 
-#### Namespace Sharing Security (nsenter)
+#### Execution Modes
+
+KubeCodeRun supports two execution modes, controlled by the `K8S_EXECUTION_MODE` environment variable:
+
+##### Agent Mode (Default) — `K8S_EXECUTION_MODE=agent`
+
+In agent mode, a lightweight Go HTTP server (the **executor agent**) runs inside the main language container. The sidecar forwards execution requests to it over `localhost` (pod-internal network). This eliminates the need for `nsenter`, Linux capabilities, privilege escalation, and `shareProcessNamespace`.
+
+**How it works:**
+
+1. An **init container** (using the `sidecar-agent` image) copies the executor agent binary from `/opt/executor-agent` to the shared volume at `/mnt/data/.executor-agent`
+2. The main container's CMD is overridden to run `/mnt/data/.executor-agent` instead of `sleep infinity`
+3. The executor agent starts an HTTP server on `127.0.0.1:9090` (configurable via `K8S_EXECUTOR_PORT`)
+4. The sidecar sends execution requests to the agent via HTTP POST to `/execute`
+5. The agent spawns subprocesses (e.g., `python code.py`) inheriting the container's sanitized environment
+
+**Pod Settings (agent mode):**
+```yaml
+spec:
+  # No shareProcessNamespace needed
+  initContainers:
+  - name: agent-init
+    image: <sidecar-agent-image>
+    command: ["python", "-c", "import shutil,os; shutil.copy2('/opt/executor-agent','/mnt/data/.executor-agent'); os.chmod('/mnt/data/.executor-agent',0o755)"]
+    securityContext:
+      runAsUser: 65532
+      runAsNonRoot: true
+      allowPrivilegeEscalation: false
+      capabilities:
+        drop: ["ALL"]
+  containers:
+  - name: main
+    args: ["/mnt/data/.executor-agent"]  # Runs via existing ENTRYPOINT
+    securityContext:
+      runAsUser: 65532
+      runAsNonRoot: true
+      allowPrivilegeEscalation: false
+      capabilities:
+        drop: ["ALL"]
+  - name: sidecar
+    env:
+    - name: EXECUTION_MODE
+      value: "agent"
+    - name: EXECUTOR_PORT
+      value: "9090"
+    securityContext:
+      runAsUser: 65532
+      runAsNonRoot: true
+      allowPrivilegeEscalation: false
+      capabilities:
+        drop: ["ALL"]
+```
+
+**Security advantages of agent mode:**
+
+| Feature | Benefit |
+|---------|---------|
+| No `shareProcessNamespace` | Containers cannot see each other's processes |
+| No capabilities | All capabilities dropped for all containers |
+| No `allowPrivilegeEscalation` | No binary can gain elevated privileges |
+| No `nsenter` | No namespace entry, no mount namespace sharing |
+| GKE Sandbox (gVisor) compatible | Works with the most restrictive Pod Security Standards |
+| Communication via localhost | Pod-internal only, not network-accessible |
+
+##### nsenter Mode (Legacy) — `K8S_EXECUTION_MODE=nsenter`
+
+In nsenter mode, the sidecar uses Linux `nsenter` to execute code in the main container's mount namespace. This requires elevated privileges and is preserved for backward compatibility with clusters that allow privilege escalation.
+
+**Namespace Sharing Security (nsenter)**
 
 The sidecar container uses Linux `nsenter` to execute code in the main container's mount namespace. This requires specific pod and image configuration.
 
@@ -267,6 +335,58 @@ execution:
 
 3. **No Inter-Pod Communication**: NetworkPolicy denies all ingress from other pods.
 
+### GKE Sandbox (gVisor) Support
+
+For clusters requiring additional kernel-level isolation, KubeCodeRun supports [GKE Sandbox](https://docs.cloud.google.com/kubernetes-engine/docs/concepts/sandbox-pods), which uses [gVisor](https://gvisor.dev/) to intercept system calls before they reach the host kernel.
+
+**GKE Sandbox requires agent mode** (`K8S_EXECUTION_MODE=agent`). nsenter mode is incompatible with gVisor because:
+- gVisor does not support `shareProcessNamespace` the same way as a standard Linux kernel
+- `nsenter` relies on host kernel namespace operations that gVisor intentionally intercepts
+- Agent mode eliminates the need for `SYS_PTRACE`, `SYS_ADMIN`, and `SYS_CHROOT` capabilities, which are restricted in sandboxed pods
+
+#### Configuration
+
+```yaml
+# In helm values.yaml
+execution:
+  executionMode: "agent"  # Required for GKE Sandbox
+
+  gkeSandbox:
+    enabled: true
+    runtimeClassName: "gvisor"
+    nodeSelector:
+      sandbox.gke.io/runtime: gvisor
+    customTolerations:
+      - key: sandbox.gke.io/runtime
+        operator: Equal
+        value: gvisor
+        effect: NoSchedule
+```
+
+#### Security Benefits
+
+| Feature | Without GKE Sandbox | With GKE Sandbox |
+|---------|-------------------|-----------------|
+| System call isolation | Seccomp profile only | gVisor userspace kernel intercepts all syscalls |
+| Kernel exposure | Container shares host kernel | gVisor provides an independent kernel API |
+| Escape risk | Kernel vulnerability could escape | Two boundaries: gVisor + container |
+| Side-channel attacks | Possible via shared kernel | Mitigated by kernel-level isolation |
+
+#### Requirements
+
+- GKE cluster with at least two node pools (one standard, one sandbox-enabled)
+- Sandbox node pool with `--sandbox type=gvisor`
+- Agent execution mode (`executionMode: "agent"`)
+- Sidecar image built with `--target sidecar-agent` (default)
+
+#### Limitations (from GKE documentation)
+
+- No `hostPath` storage
+- No privileged containers
+- Seccomp, AppArmor, SELinux are not supported (gVisor provides its own isolation)
+- Container-level memory metrics are not available (pod-level metrics work)
+- See [GKE Sandbox limitations](https://docs.cloud.google.com/kubernetes-engine/docs/concepts/sandbox-pods#limitations) for the full list
+
 ### State Persistence Security
 
 Python state persistence introduces additional security considerations:
@@ -280,7 +400,7 @@ Python state persistence introduces additional security considerations:
 
 #### Storage Security
 
-- **Redis encryption**: Consider enabling Redis TLS in production for encrypted state storage
+- **Redis encryption**: Enable Redis TLS in production for encrypted state storage (`REDIS_TLS_ENABLED=true`). Required for managed services like GCP Memorystore, AWS ElastiCache, and Azure Cache for Redis. See the [Configuration Guide](CONFIGURATION.md#redis-tlsssl) for details.
 - **MinIO encryption**: Enable server-side encryption for archived states
 - **TTL-based cleanup**: States automatically expire (2 hours in Redis, 7 days in MinIO archives)
 - **Size limits**: `STATE_MAX_SIZE_MB` prevents denial-of-service via large states
diff --git a/helm-deployments/kubecoderun/templates/_helpers.tpl b/helm-deployments/kubecoderun/templates/_helpers.tpl
index 6e6d32d..91d06d9 100644
--- a/helm-deployments/kubecoderun/templates/_helpers.tpl
+++ b/helm-deployments/kubecoderun/templates/_helpers.tpl
@@ -78,19 +78,21 @@ Execution namespace
 {{- end }}
 
 {{/*
-Redis URL
+Redis URL — honours TLS setting to switch between redis:// and rediss://
 */}}
 {{- define "kubecoderun.redisUrl" -}}
 {{- if .Values.redis.url }}
 {{- .Values.redis.url }}
 {{- else if .Values.redis.host }}
+{{- $scheme := ternary "rediss" "redis" .Values.redis.tls.enabled }}
 {{- if .Values.redis.password }}
-{{- printf "redis://:%s@%s:%d/%d" .Values.redis.password .Values.redis.host (int .Values.redis.port) (int .Values.redis.db) }}
+{{- printf "%s://:%s@%s:%d/%d" $scheme .Values.redis.password .Values.redis.host (int .Values.redis.port) (int .Values.redis.db) }}
 {{- else }}
-{{- printf "redis://%s:%d/%d" .Values.redis.host (int .Values.redis.port) (int .Values.redis.db) }}
+{{- printf "%s://%s:%d/%d" $scheme .Values.redis.host (int .Values.redis.port) (int .Values.redis.db) }}
 {{- end }}
 {{- else }}
-{{- "redis://redis:6379/0" }}
+{{- $scheme := ternary "rediss" "redis" .Values.redis.tls.enabled }}
+{{- printf "%s://redis:6379/0" $scheme }}
 {{- end }}
 {{- end }}
 
diff --git a/helm-deployments/kubecoderun/templates/configmap.yaml b/helm-deployments/kubecoderun/templates/configmap.yaml
index a20df80..3c0fd29 100644
--- a/helm-deployments/kubecoderun/templates/configmap.yaml
+++ b/helm-deployments/kubecoderun/templates/configmap.yaml
@@ -36,15 +36,32 @@ data:
   K8S_IMAGE_REGISTRY: {{ .Values.execution.imageRegistry | quote }}
   K8S_IMAGE_TAG: {{ $imageTag | quote }}
   K8S_IMAGE_PULL_POLICY: {{ .Values.execution.imagePullPolicy | quote }}
+  {{- if .Values.execution.imagePullSecrets }}
+  K8S_IMAGE_PULL_SECRETS: {{ join "," (pluck "name" .Values.execution.imagePullSecrets) | quote }}
+  {{- else }}
+  K8S_IMAGE_PULL_SECRETS: ""
+  {{- end }}
   K8S_CPU_LIMIT: {{ .Values.execution.resources.limits.cpu | quote }}
   K8S_MEMORY_LIMIT: {{ .Values.execution.resources.limits.memory | quote }}
   K8S_CPU_REQUEST: {{ .Values.execution.resources.requests.cpu | quote }}
   K8S_MEMORY_REQUEST: {{ .Values.execution.resources.requests.memory | quote }}
   K8S_RUN_AS_USER: {{ .Values.execution.securityContext.runAsUser | quote }}
+  K8S_EXECUTION_MODE: {{ .Values.execution.executionMode | quote }}
+  K8S_EXECUTOR_PORT: {{ .Values.execution.executorPort | quote }}
   K8S_SECCOMP_PROFILE_TYPE: {{ .Values.execution.securityContext.seccompProfile.type | quote }}
   K8S_JOB_TTL_SECONDS: {{ .Values.execution.jobs.ttlSecondsAfterFinished | quote }}
   K8S_JOB_DEADLINE_SECONDS: {{ .Values.execution.jobs.activeDeadlineSeconds | quote }}
 
+  # GKE Sandbox Configuration
+  GKE_SANDBOX_ENABLED: {{ .Values.execution.gkeSandbox.enabled | quote }}
+  GKE_SANDBOX_RUNTIME_CLASS: {{ .Values.execution.gkeSandbox.runtimeClassName | quote }}
+  {{- if .Values.execution.gkeSandbox.nodeSelector }}
+  GKE_SANDBOX_NODE_SELECTOR: {{ .Values.execution.gkeSandbox.nodeSelector | toJson | quote }}
+  {{- end }}
+  {{- if .Values.execution.gkeSandbox.customTolerations }}
+  GKE_SANDBOX_CUSTOM_TOLERATIONS: {{ .Values.execution.gkeSandbox.customTolerations | toJson | quote }}
+  {{- end }}
+
   # Pod Lifecycle
   POD_TTL_MINUTES: {{ .Values.execution.podTtlMinutes | quote }}
   POD_CLEANUP_INTERVAL_MINUTES: {{ .Values.execution.podCleanupIntervalMinutes | quote }}
@@ -302,10 +319,41 @@ data:
   WAN_NETWORK_NAME: {{ .Values.network.wan.networkName | quote }}
   WAN_DNS_SERVERS: {{ .Values.network.wan.dnsServers | toJson | quote }}
 
-  # Redis Advanced Configuration
+  # Redis Configuration
+  REDIS_MODE: {{ .Values.redis.mode | quote }}
+  {{- if .Values.redis.host }}
+  REDIS_HOST: {{ .Values.redis.host | quote }}
+  {{- end }}
+  REDIS_PORT: {{ .Values.redis.port | quote }}
+  REDIS_DB: {{ .Values.redis.db | quote }}
   REDIS_MAX_CONNECTIONS: {{ .Values.redis.maxConnections | quote }}
   REDIS_SOCKET_TIMEOUT: {{ .Values.redis.socketTimeout | quote }}
   REDIS_SOCKET_CONNECT_TIMEOUT: {{ .Values.redis.socketConnectTimeout | quote }}
+  {{- if .Values.redis.keyPrefix }}
+  REDIS_KEY_PREFIX: {{ .Values.redis.keyPrefix | quote }}
+  {{- end }}
+  {{- if .Values.redis.clusterNodes }}
+  REDIS_CLUSTER_NODES: {{ .Values.redis.clusterNodes | quote }}
+  {{- end }}
+  {{- if .Values.redis.sentinelNodes }}
+  REDIS_SENTINEL_NODES: {{ .Values.redis.sentinelNodes | quote }}
+  {{- end }}
+  REDIS_SENTINEL_MASTER: {{ .Values.redis.sentinelMaster | quote }}
+  {{- if .Values.redis.sentinelPassword }}
+  REDIS_SENTINEL_PASSWORD: {{ .Values.redis.sentinelPassword | quote }}
+  {{- end }}
+  REDIS_TLS_ENABLED: {{ .Values.redis.tls.enabled | quote }}
+  {{- if .Values.redis.tls.caCertFile }}
+  REDIS_TLS_CA_CERT_FILE: {{ .Values.redis.tls.caCertFile | quote }}
+  {{- end }}
+  {{- if .Values.redis.tls.certFile }}
+  REDIS_TLS_CERT_FILE: {{ .Values.redis.tls.certFile | quote }}
+  {{- end }}
+  {{- if .Values.redis.tls.keyFile }}
+  REDIS_TLS_KEY_FILE: {{ .Values.redis.tls.keyFile | quote }}
+  {{- end }}
+  REDIS_TLS_INSECURE: {{ .Values.redis.tls.insecure | quote }}
+  REDIS_TLS_CHECK_HOSTNAME: {{ .Values.redis.tls.checkHostname | quote }}
 
   # MinIO/S3 Configuration
   {{- if not .Values.secretsStore.enabled }}
diff --git a/helm-deployments/kubecoderun/templates/secret.yaml b/helm-deployments/kubecoderun/templates/secret.yaml
index 2e22a17..8003502 100644
--- a/helm-deployments/kubecoderun/templates/secret.yaml
+++ b/helm-deployments/kubecoderun/templates/secret.yaml
@@ -20,8 +20,11 @@ stringData:
   {{- end }}
   {{- end }}
   {{- if not .Values.redis.existingSecret }}
-  # Redis URL
+  # Redis URL (standalone mode) and password (all modes)
   REDIS_URL: {{ include "kubecoderun.redisUrl" . | quote }}
+  {{- if .Values.redis.password }}
+  REDIS_PASSWORD: {{ .Values.redis.password | quote }}
+  {{- end }}
   {{- end }}
   {{- if and (not .Values.minio.existingSecret) (not .Values.minio.useIAM) }}
   # S3-Compatible Storage Credentials (Garage/MinIO/S3)
diff --git a/helm-deployments/kubecoderun/values.yaml b/helm-deployments/kubecoderun/values.yaml
index e06bbd6..956aa4a 100644
--- a/helm-deployments/kubecoderun/values.yaml
+++ b/helm-deployments/kubecoderun/values.yaml
@@ -99,6 +99,10 @@ redis:
   # When set, the url/host/port/password/db fields below are ignored
   # Expected secret key: REDIS_URL (full connection string)
   existingSecret: ""
+
+  # Deployment mode: standalone (default), cluster, or sentinel
+  mode: "standalone"
+
   # External Redis URL (required unless existingSecret is set)
   url: "redis://redis:6379/0"
   # Or specify individual fields
@@ -111,6 +115,35 @@ redis:
   socketTimeout: 5
   socketConnectTimeout: 5
 
+  # Optional key prefix prepended to every Redis key.
+  # Useful when sharing a Redis instance across environments.
+  keyPrefix: ""
+
+  # Redis Cluster mode (mode: cluster)
+  # Comma-separated host:port pairs for cluster startup nodes
+  clusterNodes: ""
+
+  # Redis Sentinel mode (mode: sentinel)
+  # Comma-separated host:port pairs for sentinel instances
+  sentinelNodes: ""
+  sentinelMaster: "mymaster"
+  sentinelPassword: ""
+
+  # TLS/SSL settings (all modes)
+  tls:
+    enabled: false
+    # Path to CA certificate inside the container
+    caCertFile: ""
+    # Client certificate and key for mutual TLS
+    certFile: ""
+    keyFile: ""
+    # Skip server certificate verification (NOT recommended for production)
+    insecure: false
+    # Verify server hostname against certificate CN/SAN.
+    # Off by default because managed Redis services and cluster mode
+    # expose node IPs that typically don't match certificate names.
+    checkHostname: false
+
 minio:
   # Reference an existing Kubernetes Secret containing S3 credentials
   # When set, the accessKey/secretKey fields below are ignored
@@ -172,17 +205,36 @@ execution:
   # Image pull policy for execution pods (IfNotPresent, Always, Never)
   imagePullPolicy: "IfNotPresent"
 
+  # Image pull secrets for private registries (applies to execution pods)
+  # Example:
+  # imagePullSecrets:
+  #   - name: secret-for-registry
+  #   - name: another-secret
+  imagePullSecrets: []
+
   # Service account for execution pods (with pod/job create permissions)
   serviceAccount:
     create: true
     name: "kubecoderun-executor"
     annotations: {}
 
+  # Execution mode: "agent" (default) or "nsenter" (legacy)
+  # - agent: Executor agent runs inside main container. No nsenter, no capabilities,
+  #   no privilege escalation. Compatible with GKE Sandbox (gVisor). Requires the
+  #   sidecar-agent image (default build target).
+  # - nsenter: Sidecar uses nsenter to enter the main container's namespace. Requires
+  #   the sidecar-nsenter image, shareProcessNamespace, SYS_PTRACE/SYS_ADMIN/SYS_CHROOT
+  #   capabilities, and allowPrivilegeEscalation: true.
+  executionMode: "agent"
+
+  # Port for the executor HTTP server inside the main container
+  executorPort: 9090
+
   # Sidecar container configuration
-  # CRITICAL: User code runs in sidecar's cgroup via nsenter (Issue #32)
-  # These resource limits apply to user code execution, not the main container
+  # In nsenter mode: user code runs in sidecar's cgroup via nsenter
+  # In agent mode: sidecar only proxies requests, user code runs in main container's cgroup
   sidecar:
-    repository: ghcr.io/aron-muon/kubecoderun-sidecar
+    repository: ghcr.io/aron-muon/kubecoderun-sidecar-agent
     # tag defaults to Chart.AppVersion if not specified
     tag: ""
     port: 8080
@@ -285,6 +337,40 @@ execution:
     enabled: true
     denyEgress: true
 
+  # GKE Sandbox (gVisor) Configuration
+  # Provides additional kernel isolation for untrusted workloads using gVisor
+  # See: https://docs.cloud.google.com/kubernetes-engine/docs/concepts/sandbox-pods
+  gkeSandbox:
+    # Enable GKE Sandbox for execution pods.
+    # WARNING: When enabled, pods require nodes with the gVisor runtime class.
+    # Pods will stay Pending on clusters without sandbox-enabled node pools.
+    enabled: false
+
+    # Runtime class name (default: gvisor for GKE)
+    runtimeClassName: "gvisor"
+
+    # Node selector for sandbox-enabled nodes
+    # GKE automatically adds sandbox.gke.io/runtime=gvisor to sandbox nodes
+    # Add additional selectors here if needed (e.g., for specific node pools)
+    nodeSelector: {}
+    # Example:
+    #   sandbox.gke.io/runtime: gvisor
+    #   cloud.google.com/gke-nodepool: sandbox-pool
+
+    # Custom tolerations for node pool taints
+    # GKE automatically adds toleration for sandbox.gke.io/runtime=gvisor
+    # Use this for additional custom taints (e.g., dedicated sandbox node pools)
+    customTolerations: []
+    # Example:
+    # - key: pool
+    #   operator: Equal
+    #   value: sandbox
+    #   effect: NoSchedule
+    # - key: sandbox.gke.io/runtime
+    #   operator: Equal
+    #   value: gvisor
+    #   effect: NoSchedule
+
 # Resource Limits Configuration
 resourceLimits:
   # Execution limits
diff --git a/scripts/build-images.sh b/scripts/build-images.sh
index 13acdb0..0a94ef7 100755
--- a/scripts/build-images.sh
+++ b/scripts/build-images.sh
@@ -1,29 +1,6 @@
 #!/usr/bin/env bash
 # shellcheck disable=SC2153  # Variables are intentionally sourced from result files
 # Build all KubeCodeRun Docker images in parallel
-#
-# Usage: ./scripts/build-images.sh [OPTIONS] [IMAGE]
-#
-# Arguments:
-#   IMAGE                Build a single image with full output (e.g., go, python, sidecar)
-#
-# Options:
-#   -t, --tag TAG        Image tag (default: latest)
-#   -r, --registry REG   Registry prefix (e.g., aronmuon/kubecoderun)
-#   -p, --push           Push images after building
-#   --no-cache           Build without cache
-#   --sequential         Build sequentially instead of in parallel
-#   -h, --help           Show this help message
-#
-# Environment:
-#   DHI_USERNAME         Username for dhi.io registry login
-#   DHI_PASSWORD         Password for dhi.io registry login
-#
-# Examples:
-#   ./scripts/build-images.sh                  # Build all images in parallel
-#   ./scripts/build-images.sh go               # Build only the go image with full output
-#   ./scripts/build-images.sh --no-cache rust  # Build rust image without cache
-
 set -euo pipefail
 
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
@@ -61,13 +38,77 @@ LANGUAGE_IMAGES=(
 # Infrastructure images with custom contexts
 # sidecar: context is docker/sidecar/ (contains requirements.txt, main.py)
 # api: context is repo root (needs uv.lock, pyproject.toml, src/)
+#
+# Format: dockerfile_path:image_name:context_dir:docker_target (target is optional)
+# The sidecar Dockerfile has two targets:
+#   sidecar-agent   → kubecoderun-sidecar-agent (default, no nsenter)
+#   sidecar-nsenter → kubecoderun-sidecar-nsenter (legacy, with nsenter+setcap)
 INFRA_IMAGES=(
-    "sidecar/Dockerfile:sidecar:docker/sidecar"
+    "sidecar/Dockerfile:sidecar-agent:docker/sidecar:sidecar-agent"
+    "sidecar/Dockerfile:sidecar-nsenter:docker/sidecar:sidecar-nsenter"
     "api/Dockerfile:api:."
 )
 
 usage() {
-    head -n 25 "$0" | tail -n 23 | sed 's/^# //'
+    cat <<'EOF'
+Usage: ./scripts/build-images.sh [OPTIONS] [IMAGE]
+
+Build Docker images for the KubeCodeRun platform.
+
+When called without arguments, builds ALL images (language runtimes,
+sidecar variants, and API) in parallel. Specify IMAGE to build a
+single image with full terminal output (useful for debugging).
+
+Arguments:
+  IMAGE                   Name of a single image to build (see --list)
+
+Options:
+  -t, --tag TAG           Image tag (default: latest)
+  -r, --registry REG      Registry prefix (e.g., ghcr.io/org/kubecoderun)
+  -p, --push              Push images to the registry after building
+      --no-cache          Build without Docker layer cache
+      --sequential        Build images one at a time instead of in parallel
+  -l, --list              List all available image names and exit
+  -h, -?, --help          Show this help message and exit
+
+Environment Variables:
+  DHI_USERNAME            Username for dhi.io registry authentication
+  DHI_PASSWORD            Password for dhi.io registry authentication
+
+Examples:
+  # Build all images in parallel (default)
+  ./scripts/build-images.sh
+
+  # Build only the Go language image with full output
+  ./scripts/build-images.sh go
+
+  # Build the agent-mode sidecar without cache
+  ./scripts/build-images.sh --no-cache sidecar-agent
+
+  # Build and push all images to a private registry
+  ./scripts/build-images.sh -r ghcr.io/myorg/kubecoderun -t v2.0.0 --push
+
+  # Build the nsenter-mode sidecar
+  ./scripts/build-images.sh sidecar-nsenter
+
+  # List all available image names
+  ./scripts/build-images.sh --list
+EOF
+}
+
+list_images() {
+    local all_images=("${LANGUAGE_IMAGES[@]}" "${INFRA_IMAGES[@]}")
+    echo "Available images:"
+    echo ""
+    printf "  %-20s %-35s %-15s\n" "NAME" "DOCKERFILE" "TARGET"
+    printf "  %-20s %-35s %-15s\n" "────────────────────" "───────────────────────────────────" "───────────────"
+    for entry in "${all_images[@]}"; do
+        IFS=':' read -r dockerfile image_name context_dir docker_target <<< "$entry"
+        printf "  %-20s %-35s %-15s\n" "$image_name" "$dockerfile" "${docker_target:--}"
+    done
+    echo ""
+    echo "Build a single image:  ./scripts/build-images.sh <NAME>"
+    echo "Build all images:      ./scripts/build-images.sh"
 }
 
 dhi_login() {
@@ -112,13 +153,18 @@ parse_args() {
                 SEQUENTIAL=true
                 shift
                 ;;
-            -h|--help)
+            -h|-\?|--help)
                 usage
                 exit 0
                 ;;
+            -l|--list)
+                list_images
+                exit 0
+                ;;
             -*)
-                echo "Unknown option: $1"
-                usage
+                echo "Error: Unknown option '$1'"
+                echo ""
+                echo "Run './scripts/build-images.sh --help' for usage information."
                 exit 1
                 ;;
             *)
@@ -169,6 +215,7 @@ build_image() {
     local image_name="$2"
     local result_file="$3"
     local context_dir="$4"
+    local docker_target="$5"
     local full_name
     full_name=$(get_full_image_name "$image_name")
 
@@ -186,9 +233,17 @@ build_image() {
     local build_output
     local exit_code=0
 
+    # Build with optional --target
+    local target_flag=""
+    if [[ -n "$docker_target" ]]; then
+        target_flag="--target $docker_target"
+    fi
+
     # shellcheck disable=SC2086
     build_output=$(docker build \
         $NO_CACHE \
+        $target_flag \
+        --build-arg VERSION="$TAG" \
         -t "$full_name" \
         -f "$DOCKER_DIR/$dockerfile" \
         "$context_path" 2>&1) || exit_code=$?
@@ -228,9 +283,10 @@ build_image_wrapper() {
     local dockerfile="$1"
     local image_name="$2"
     local context_dir="$3"
+    local docker_target="$4"
     local result_file="$RESULTS_DIR/${image_name}.result"
 
-    if build_image "$dockerfile" "$image_name" "$result_file" "$context_dir"; then
+    if build_image "$dockerfile" "$image_name" "$result_file" "$context_dir" "$docker_target"; then
         echo "Completed: $image_name"
     else
         echo "Failed: $image_name"
@@ -244,7 +300,7 @@ build_single_image() {
     local found=false
 
     for entry in "${all_images[@]}"; do
-        IFS=':' read -r dockerfile image_name context_dir <<< "$entry"
+        IFS=':' read -r dockerfile image_name context_dir docker_target <<< "$entry"
 
         if [[ "$image_name" == "$target_image" ]]; then
             found=true
@@ -268,12 +324,22 @@ build_single_image() {
             echo "Building $image_name -> $full_name"
             echo "  Dockerfile: $DOCKER_DIR/$dockerfile"
             echo "  Context:    $context_path"
+            if [[ -n "$docker_target" ]]; then
+                echo "  Target:     $docker_target"
+            fi
             echo ""
 
-            # Build with output directly to terminal
+            # Build with output directly to terminal (optional --target)
+            local target_flag=""
+            if [[ -n "$docker_target" ]]; then
+                target_flag="--target $docker_target"
+            fi
+
             # shellcheck disable=SC2086
             docker build \
                 $NO_CACHE \
+                $target_flag \
+                --build-arg VERSION="$TAG" \
                 -t "$full_name" \
                 -f "$DOCKER_DIR/$dockerfile" \
                 "$context_path"
@@ -297,7 +363,7 @@ build_single_image() {
         echo ""
         echo "Available images:"
         for entry in "${all_images[@]}"; do
-            IFS=':' read -r _ image_name _ <<< "$entry"
+            IFS=':' read -r _ image_name _ _ <<< "$entry"
             echo "  - $image_name"
         done
         exit 1
@@ -342,8 +408,8 @@ main() {
     echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
 
     for entry in "${all_images[@]}"; do
-        # Parse entry: dockerfile:image_name:context_dir
-        IFS=':' read -r dockerfile image_name context_dir <<< "$entry"
+        # Parse entry: dockerfile:image_name:context_dir:docker_target (target is optional)
+        IFS=':' read -r dockerfile image_name context_dir docker_target <<< "$entry"
 
         if [[ ! -f "$DOCKER_DIR/$dockerfile" ]]; then
             echo "Warning: Dockerfile not found: $dockerfile"
@@ -352,9 +418,9 @@ main() {
 
         echo "Starting: $image_name"
         if [[ "$SEQUENTIAL" == true ]]; then
-            build_image_wrapper "$dockerfile" "$image_name" "$context_dir"
+            build_image_wrapper "$dockerfile" "$image_name" "$context_dir" "$docker_target"
         else
-            build_image_wrapper "$dockerfile" "$image_name" "$context_dir" &
+            build_image_wrapper "$dockerfile" "$image_name" "$context_dir" "$docker_target" &
             pids+=($!)
         fi
     done
@@ -386,7 +452,7 @@ main() {
     printf "%-15s %-10s %-12s %-8s\n" "─────────────" "────────" "──────────" "──────"
 
     for entry in "${all_images[@]}"; do
-        IFS=':' read -r _ image_name _ <<< "$entry"
+        IFS=':' read -r _ image_name _ _ <<< "$entry"
         result_file="$RESULTS_DIR/${image_name}.result"
 
         if [[ -f "$result_file" ]]; then
diff --git a/src/config/__init__.py b/src/config/__init__.py
index aee6bfa..9926ff4 100644
--- a/src/config/__init__.py
+++ b/src/config/__init__.py
@@ -88,15 +88,71 @@ class Settings(BaseSettings):
     rate_limit_enabled: bool = Field(default=True, description="Enable per-key rate limiting for Redis-managed keys")
 
     # Redis Configuration
+    redis_mode: Literal["standalone", "cluster", "sentinel"] = Field(
+        default="standalone",
+        description="Redis deployment mode: standalone, cluster, or sentinel",
+    )
     redis_host: str = Field(default="localhost")
     redis_port: int = Field(default=6379, ge=1, le=65535)
-    redis_password: str | None = Field(default=None)
+    redis_password: str | None = Field(default=None, description="Redis password (empty string treated as no password)")
     redis_db: int = Field(default=0, ge=0, le=15)
     redis_url: str | None = Field(default=None)
     redis_max_connections: int = Field(default=20, ge=1)
     redis_socket_timeout: int = Field(default=5, ge=1)
     redis_socket_connect_timeout: int = Field(default=5, ge=1)
 
+    # Redis Cluster
+    redis_cluster_nodes: str | None = Field(
+        default=None,
+        description="Comma-separated host:port pairs for Redis Cluster startup nodes",
+    )
+
+    # Redis Sentinel
+    redis_sentinel_nodes: str | None = Field(
+        default=None,
+        description="Comma-separated host:port pairs for Sentinel instances",
+    )
+    redis_sentinel_master: str = Field(
+        default="mymaster",
+        description="Name of the Sentinel-monitored master",
+    )
+    redis_sentinel_password: str | None = Field(
+        default=None,
+        description="Password for authenticating to Sentinel instances",
+    )
+
+    # Redis Key Prefix
+    redis_key_prefix: str = Field(
+        default="",
+        description="Optional prefix prepended to every Redis key (e.g. 'prod:', 'kubecoderun:')",
+    )
+
+    # Redis TLS/SSL
+    redis_tls_enabled: bool = Field(
+        default=False,
+        description="Enable TLS/SSL for Redis connections",
+    )
+    redis_tls_cert_file: str | None = Field(
+        default=None,
+        description="Path to client TLS certificate (mutual TLS)",
+    )
+    redis_tls_key_file: str | None = Field(
+        default=None,
+        description="Path to client TLS private key (mutual TLS)",
+    )
+    redis_tls_ca_cert_file: str | None = Field(
+        default=None,
+        description="Path to CA certificate for verifying the server",
+    )
+    redis_tls_insecure: bool = Field(
+        default=False,
+        description="Skip TLS certificate verification (NOT recommended for production)",
+    )
+    redis_tls_check_hostname: bool = Field(
+        default=False,
+        description="Enable TLS hostname verification (off by default for managed Redis / cluster)",
+    )
+
     # MinIO/S3 Configuration
     minio_endpoint: str = Field(default="localhost:9000")
     minio_access_key: str | None = Field(default=None)
@@ -119,7 +175,7 @@ class Settings(BaseSettings):
         description="Service account for execution pods",
     )
     k8s_sidecar_image: str = Field(
-        default="aronmuon/kubecoderun-sidecar:latest",
+        default="aronmuon/kubecoderun-sidecar-agent:latest",
         description="Sidecar container image for pod communication",
     )
     k8s_sidecar_port: int = Field(default=8080, ge=1, le=65535, description="Sidecar HTTP API port")
@@ -132,6 +188,16 @@ class Settings(BaseSettings):
     k8s_cpu_request: str = Field(default="100m", description="CPU request for execution pods")
     k8s_memory_request: str = Field(default="128Mi", description="Memory request for execution pods")
     k8s_run_as_user: int = Field(default=65532, ge=1, description="UID to run containers as")
+    k8s_execution_mode: Literal["agent", "nsenter"] = Field(
+        default="agent",
+        description="Execution mode: 'agent' (no nsenter/capabilities, gVisor-safe) or 'nsenter' (legacy)",
+    )
+    k8s_executor_port: int = Field(
+        default=9090,
+        ge=1,
+        le=65535,
+        description="Port for the executor HTTP server inside the main container",
+    )
     k8s_seccomp_profile_type: Literal["RuntimeDefault", "Unconfined"] = Field(
         default="RuntimeDefault",
         description="Seccomp profile type for execution pods",
@@ -157,6 +223,28 @@ class Settings(BaseSettings):
         default="Always",
         description="Image pull policy for execution pods (Always, IfNotPresent, Never)",
     )
+    k8s_image_pull_secrets: str = Field(
+        default="",
+        description="Comma-separated list of secret names for pulling images from private registries",
+    )
+
+    # GKE Sandbox (gVisor) Configuration
+    gke_sandbox_enabled: bool = Field(
+        default=False,
+        description="Enable GKE Sandbox (gVisor) for additional kernel isolation",
+    )
+    gke_sandbox_runtime_class: str = Field(
+        default="gvisor",
+        description="Runtime class name for sandboxed pods",
+    )
+    gke_sandbox_node_selector: str | None = Field(
+        default=None,
+        description="JSON string of node selector for sandbox-enabled nodes",
+    )
+    gke_sandbox_custom_tolerations: str | None = Field(
+        default=None,
+        description="JSON string of custom tolerations for node pool taints",
+    )
 
     # Resource Limits - Execution
     max_execution_time: int = Field(default=30, ge=1, le=600)
@@ -407,6 +495,12 @@ def _set_supported_languages(cls, data):
             }
         return data
 
+    # Service Version Override (set at deploy time to override build-time version)
+    service_version: str | None = Field(
+        default=None,
+        description="Runtime version override (e.g. '2.1.4'). Falls back to build-time version from _version.py.",
+    )
+
     # Logging Configuration
     log_level: str = Field(default="INFO")
     log_format: str = Field(default="json")
@@ -435,6 +529,38 @@ def parse_api_keys(cls, v):
         """Parse comma-separated API keys into a list."""
         return [key.strip() for key in v.split(",") if key.strip()] if v else None
 
+    @field_validator("redis_host", mode="before")
+    @classmethod
+    def sanitize_redis_host(cls, v):
+        """Strip accidental URL scheme from Redis host."""
+        if isinstance(v, str):
+            for scheme in ("rediss://", "redis://"):
+                if v.lower().startswith(scheme):
+                    v = v[len(scheme) :].rstrip("/")
+                    break
+        return v
+
+    @field_validator("redis_password", "redis_sentinel_password", mode="before")
+    @classmethod
+    def sanitize_redis_password(cls, v):
+        """Convert empty password strings to None.
+
+        Kubernetes / Helm often set REDIS_PASSWORD="" which pydantic reads
+        as empty string.  Passing an empty password to redis-py sends
+        AUTH "" which fails when the server has no auth configured.
+        """
+        if isinstance(v, str) and v.strip() == "":
+            return None
+        return v
+
+    @field_validator("redis_cluster_nodes", "redis_sentinel_nodes", mode="before")
+    @classmethod
+    def sanitize_redis_nodes(cls, v):
+        """Convert empty node lists to None so code falls back to host:port."""
+        if isinstance(v, str) and v.strip() == "":
+            return None
+        return v
+
     @field_validator("minio_endpoint")
     @classmethod
     def validate_minio_endpoint(cls, v):
@@ -470,6 +596,7 @@ def api(self) -> APIConfig:
     def redis(self) -> RedisConfig:
         """Access Redis configuration group."""
         return RedisConfig(
+            redis_mode=self.redis_mode,
             redis_host=self.redis_host,
             redis_port=self.redis_port,
             redis_password=self.redis_password,
@@ -478,6 +605,17 @@ def redis(self) -> RedisConfig:
             redis_max_connections=self.redis_max_connections,
             redis_socket_timeout=self.redis_socket_timeout,
             redis_socket_connect_timeout=self.redis_socket_connect_timeout,
+            redis_cluster_nodes=self.redis_cluster_nodes,
+            redis_sentinel_nodes=self.redis_sentinel_nodes,
+            redis_sentinel_master=self.redis_sentinel_master,
+            redis_sentinel_password=self.redis_sentinel_password,
+            redis_key_prefix=self.redis_key_prefix,
+            redis_tls_enabled=self.redis_tls_enabled,
+            redis_tls_cert_file=self.redis_tls_cert_file,
+            redis_tls_key_file=self.redis_tls_key_file,
+            redis_tls_ca_cert_file=self.redis_tls_ca_cert_file,
+            redis_tls_insecure=self.redis_tls_insecure,
+            redis_tls_check_hostname=self.redis_tls_check_hostname,
         )
 
     @property
@@ -548,6 +686,33 @@ def logging(self) -> LoggingConfig:
     @property
     def kubernetes(self) -> KubernetesConfig:
         """Access Kubernetes configuration group."""
+        import json
+
+        # Parse JSON strings for node selector and tolerations
+        sandbox_node_selector = None
+        if self.gke_sandbox_node_selector:
+            try:
+                sandbox_node_selector = json.loads(self.gke_sandbox_node_selector)
+            except json.JSONDecodeError:
+                import logging
+
+                logging.getLogger(__name__).warning(
+                    "Invalid JSON in GKE_SANDBOX_NODE_SELECTOR, ignoring: %s",
+                    self.gke_sandbox_node_selector,
+                )
+
+        custom_tolerations = None
+        if self.gke_sandbox_custom_tolerations:
+            try:
+                custom_tolerations = json.loads(self.gke_sandbox_custom_tolerations)
+            except json.JSONDecodeError:
+                import logging
+
+                logging.getLogger(__name__).warning(
+                    "Invalid JSON in GKE_SANDBOX_CUSTOM_TOLERATIONS, ignoring: %s",
+                    self.gke_sandbox_custom_tolerations,
+                )
+
         return KubernetesConfig(
             namespace=self.k8s_namespace,
             service_account=self.k8s_service_account,
@@ -562,11 +727,19 @@ def kubernetes(self) -> KubernetesConfig:
             cpu_request=self.k8s_cpu_request,
             memory_request=self.k8s_memory_request,
             run_as_user=self.k8s_run_as_user,
+            execution_mode=self.k8s_execution_mode,
+            executor_port=self.k8s_executor_port,
             seccomp_profile_type=self.k8s_seccomp_profile_type,
             job_ttl_seconds_after_finished=self.k8s_job_ttl_seconds,
             job_active_deadline_seconds=self.k8s_job_deadline_seconds,
             image_registry=self.k8s_image_registry,
             image_tag=self.k8s_image_tag,
+            image_pull_policy=self.k8s_image_pull_policy,
+            image_pull_secrets=self.k8s_image_pull_secrets,
+            gke_sandbox_enabled=self.gke_sandbox_enabled,
+            runtime_class_name=self.gke_sandbox_runtime_class,
+            sandbox_node_selector=sandbox_node_selector,
+            custom_tolerations=custom_tolerations,
         )
 
     def get_pool_configs(self):
@@ -574,10 +747,26 @@ def get_pool_configs(self):
 
         Returns list of PoolConfig for all configured languages.
         """
+        import json
         import os
 
         from ..services.kubernetes.models import PoolConfig
 
+        # Parse GKE Sandbox configuration once
+        sandbox_node_selector = None
+        if self.gke_sandbox_node_selector:
+            try:
+                sandbox_node_selector = json.loads(self.gke_sandbox_node_selector)
+            except json.JSONDecodeError:
+                pass
+
+        custom_tolerations = None
+        if self.gke_sandbox_custom_tolerations:
+            try:
+                custom_tolerations = json.loads(self.gke_sandbox_custom_tolerations)
+            except json.JSONDecodeError:
+                pass
+
         configs = []
         languages = ["py", "js", "ts", "go", "java", "c", "cpp", "php", "rs", "r", "f90", "d"]
 
@@ -610,6 +799,11 @@ def get_pool_configs(self):
             sidecar_cpu_request = os.getenv(f"LANG_CPU_REQUEST_{lang_upper}") or self.k8s_sidecar_cpu_request
             sidecar_memory_request = os.getenv(f"LANG_MEMORY_REQUEST_{lang_upper}") or self.k8s_sidecar_memory_request
 
+            # Parse image pull secrets (comma-separated string -> list)
+            pull_secrets = None
+            if self.k8s_image_pull_secrets:
+                pull_secrets = [s.strip() for s in self.k8s_image_pull_secrets.split(",") if s.strip()]
+
             configs.append(
                 PoolConfig(
                     language=lang,
@@ -623,8 +817,15 @@ def get_pool_configs(self):
                     sidecar_cpu_request=sidecar_cpu_request,
                     sidecar_memory_request=sidecar_memory_request,
                     image_pull_policy=self.k8s_image_pull_policy,
+                    image_pull_secrets=pull_secrets,
+                    execution_mode=self.k8s_execution_mode,
+                    executor_port=self.k8s_executor_port,
                     seccomp_profile_type=self.k8s_seccomp_profile_type,
                     network_isolated=self.enable_network_isolation,
+                    gke_sandbox_enabled=self.gke_sandbox_enabled,
+                    runtime_class_name=self.gke_sandbox_runtime_class,
+                    sandbox_node_selector=sandbox_node_selector,
+                    custom_tolerations=custom_tolerations,
                 )
             )
 
@@ -643,11 +844,15 @@ def validate_ssl_files(self) -> bool:
         return Path(self.ssl_cert_file).exists() and Path(self.ssl_key_file).exists()
 
     def get_redis_url(self) -> str:
-        """Get Redis connection URL."""
+        """Get Redis connection URL.
+
+        Automatically uses ``rediss://`` when TLS is enabled.
+        """
         if self.redis_url:
             return self.redis_url
+        scheme = "rediss" if self.redis_tls_enabled else "redis"
         password_part = f":{self.redis_password}@" if self.redis_password else ""
-        return f"redis://{password_part}{self.redis_host}:{self.redis_port}/{self.redis_db}"
+        return f"{scheme}://{password_part}{self.redis_host}:{self.redis_port}/{self.redis_db}"
 
     def get_valid_api_keys(self) -> list[str]:
         """Get all valid API keys including the primary key."""
diff --git a/src/config/kubernetes.py b/src/config/kubernetes.py
index fe99a64..64d07f1 100644
--- a/src/config/kubernetes.py
+++ b/src/config/kubernetes.py
@@ -18,7 +18,7 @@ class KubernetesConfig:
     service_account: str = "kubecoderun-executor"
 
     # Sidecar configuration
-    sidecar_image: str = "aronmuon/kubecoderun-sidecar:latest"
+    sidecar_image: str = "aronmuon/kubecoderun-sidecar-agent:latest"
     sidecar_port: int = 8080
 
     # Resource limits for execution pods
@@ -27,7 +27,9 @@ class KubernetesConfig:
     cpu_request: str = "100m"
     memory_request: str = "128Mi"
 
-    # Sidecar resource limits (CRITICAL: user code inherits these via nsenter)
+    # Sidecar resource limits
+    # In nsenter mode: user code runs in sidecar's cgroup via nsenter
+    # In agent mode: user code runs in main container's cgroup
     sidecar_cpu_limit: str = "500m"
     sidecar_memory_limit: str = "512Mi"
     sidecar_cpu_request: str = "100m"
@@ -39,6 +41,14 @@ class KubernetesConfig:
     run_as_non_root: bool = True
     seccomp_profile_type: str = "RuntimeDefault"
 
+    # Execution mode: "agent" (default, no nsenter/capabilities needed) or "nsenter" (legacy)
+    # agent: Executor agent runs in main container, no privilege escalation or capabilities needed
+    # nsenter: Sidecar uses nsenter to enter main container namespace (requires capabilities)
+    execution_mode: str = "agent"
+
+    # Executor port (main container listens on this port for execution requests)
+    executor_port: int = 9090
+
     # Job settings (for languages with pool_size=0)
     job_ttl_seconds_after_finished: int = 60
     job_active_deadline_seconds: int = 300
@@ -52,6 +62,27 @@ class KubernetesConfig:
     # e.g., aronmuon/kubecoderun-python:latest
     image_registry: str = "aronmuon/kubecoderun"
     image_tag: str = "latest"
+    image_pull_policy: str = "Always"
+
+    # Image pull secrets for private registries
+    # Format: comma-separated list of secret names, e.g., "secret-for-registry,another-secret"
+    image_pull_secrets: str = ""
+
+    # GKE Sandbox (gVisor) configuration
+    # When enabled, pods run with additional kernel isolation via gVisor
+    gke_sandbox_enabled: bool = False
+
+    # Runtime class name for sandboxed pods (default: gvisor for GKE)
+    runtime_class_name: str = "gvisor"
+
+    # Node selector for sandbox nodes
+    # GKE automatically adds: sandbox.gke.io/runtime=gvisor
+    sandbox_node_selector: dict[str, str] | None = None
+
+    # Custom tolerations for execution pods
+    # GKE Sandbox automatically adds toleration for sandbox.gke.io/runtime=gvisor
+    # Use this for additional custom node pool taints (e.g., pool=sandbox)
+    custom_tolerations: list[dict[str, str]] | None = None
 
     def get_image_for_language(self, language: str) -> str:
         """Get the container image for a language.
diff --git a/src/config/redis.py b/src/config/redis.py
index 153f11e..f668528 100644
--- a/src/config/redis.py
+++ b/src/config/redis.py
@@ -1,11 +1,25 @@
-"""Redis configuration."""
+"""Redis configuration.
 
-from pydantic import Field
+Supports three deployment modes:
+- **standalone** (default): Single Redis instance.
+- **cluster**: Redis Cluster with automatic slot routing.
+- **sentinel**: Redis Sentinel for high-availability failover.
+
+TLS/SSL is supported in all modes and is required for most managed Redis
+services such as GCP Memorystore, AWS ElastiCache, and Azure Cache for Redis.
+"""
+
+from typing import Literal
+
+from pydantic import Field, field_validator
 from pydantic_settings import BaseSettings, SettingsConfigDict
 
 
 class RedisConfig(BaseSettings):
-    """Redis connection settings."""
+    """Redis connection settings.
+
+    Supports standalone, cluster, and sentinel modes with optional TLS.
+    """
 
     model_config = SettingsConfigDict(
         env_prefix="",
@@ -13,6 +27,14 @@ class RedisConfig(BaseSettings):
         populate_by_name=True,
     )
 
+    # -- Connection mode -------------------------------------------------------
+    mode: Literal["standalone", "cluster", "sentinel"] = Field(
+        default="standalone",
+        alias="redis_mode",
+        description="Redis deployment mode: standalone, cluster, or sentinel",
+    )
+
+    # -- Basic connection (standalone / single-entry for cluster & sentinel) ---
     host: str = Field(default="localhost", alias="redis_host")
     port: int = Field(default=6379, ge=1, le=65535, alias="redis_port")
     password: str | None = Field(default=None, alias="redis_password")
@@ -22,9 +44,193 @@ class RedisConfig(BaseSettings):
     socket_timeout: int = Field(default=5, ge=1, alias="redis_socket_timeout")
     socket_connect_timeout: int = Field(default=5, ge=1, alias="redis_socket_connect_timeout")
 
+    # -- Cluster mode ----------------------------------------------------------
+    cluster_nodes: str | None = Field(
+        default=None,
+        alias="redis_cluster_nodes",
+        description=(
+            "Comma-separated list of host:port pairs for Redis Cluster startup nodes. "
+            "Example: 'node1:6379,node2:6379,node3:6379'"
+        ),
+    )
+
+    # -- Sentinel mode ---------------------------------------------------------
+    sentinel_nodes: str | None = Field(
+        default=None,
+        alias="redis_sentinel_nodes",
+        description=(
+            "Comma-separated list of host:port pairs for Sentinel instances. "
+            "Example: 'sentinel1:26379,sentinel2:26379,sentinel3:26379'"
+        ),
+    )
+    sentinel_master: str = Field(
+        default="mymaster",
+        alias="redis_sentinel_master",
+        description="Name of the Sentinel-monitored master.",
+    )
+    sentinel_password: str | None = Field(
+        default=None,
+        alias="redis_sentinel_password",
+        description="Password for authenticating to Sentinel instances (if different from Redis password).",
+    )
+
+    # -- Key prefix ------------------------------------------------------------
+    key_prefix: str = Field(
+        default="",
+        alias="redis_key_prefix",
+        description=(
+            "Optional prefix prepended to every Redis key. "
+            "Useful for sharing a single Redis instance across multiple environments "
+            "or applications (e.g. 'prod:', 'staging:', 'kubecoderun:'). "
+            "Must end with a separator like ':' if you want one."
+        ),
+    )
+
+    # -- TLS / SSL -------------------------------------------------------------
+    tls_enabled: bool = Field(
+        default=False,
+        alias="redis_tls_enabled",
+        description="Enable TLS/SSL for Redis connections.",
+    )
+    tls_cert_file: str | None = Field(
+        default=None,
+        alias="redis_tls_cert_file",
+        description="Path to client TLS certificate file (mutual TLS).",
+    )
+    tls_key_file: str | None = Field(
+        default=None,
+        alias="redis_tls_key_file",
+        description="Path to client TLS private key file (mutual TLS).",
+    )
+    tls_ca_cert_file: str | None = Field(
+        default=None,
+        alias="redis_tls_ca_cert_file",
+        description="Path to CA certificate file for verifying the server.",
+    )
+    tls_insecure: bool = Field(
+        default=False,
+        alias="redis_tls_insecure",
+        description="Skip TLS certificate verification (NOT recommended for production).",
+    )
+    tls_check_hostname: bool = Field(
+        default=False,
+        alias="redis_tls_check_hostname",
+        description=(
+            "Enable TLS hostname verification. Disabled by default because "
+            "managed Redis services (GCP Memorystore, AWS ElastiCache) and "
+            "Redis Cluster mode expose node IPs that typically do not match "
+            "the certificate CN/SAN entries. The certificate chain is still "
+            "verified against the CA when tls_insecure is False."
+        ),
+    )
+
+    # -- Validators ------------------------------------------------------------
+
+    @field_validator("host", mode="before")
+    @classmethod
+    def _sanitize_host(cls, v: str) -> str:
+        """Strip an accidental URL scheme from the host value.
+
+        Users sometimes set ``REDIS_HOST=rediss://hostname`` instead of just
+        ``REDIS_HOST=hostname``.  This validator normalises the value so that
+        downstream code always receives a plain hostname or IP.
+        """
+        if isinstance(v, str):
+            for scheme in ("rediss://", "redis://"):
+                if v.lower().startswith(scheme):
+                    v = v[len(scheme) :]
+                    # Drop any trailing slash left over
+                    v = v.rstrip("/")
+                    break
+        return v
+
+    @field_validator("password", "sentinel_password", mode="before")
+    @classmethod
+    def _empty_string_to_none(cls, v: str | None) -> str | None:
+        """Convert empty strings to ``None``.
+
+        Kubernetes ConfigMaps and Helm values often set ``REDIS_PASSWORD: ""``
+        which pydantic-settings reads as ``""`` rather than ``None``.  Passing
+        an empty password to redis-py causes it to send ``AUTH ""`` which
+        fails when the server has no authentication configured.
+        """
+        if isinstance(v, str) and v.strip() == "":
+            return None
+        return v
+
+    @field_validator("cluster_nodes", "sentinel_nodes", mode="before")
+    @classmethod
+    def _empty_nodes_to_none(cls, v: str | None) -> str | None:
+        """Convert empty/whitespace-only node lists to ``None``.
+
+        Helm values default to ``clusterNodes: ""`` which renders in the
+        ConfigMap as an empty string.  This validator treats it the same
+        as "not set" so the code falls back to ``host:port``.
+        """
+        if isinstance(v, str) and v.strip() == "":
+            return None
+        return v
+
+    # -- Helpers ---------------------------------------------------------------
+
     def get_url(self) -> str:
-        """Get Redis connection URL."""
+        """Get Redis connection URL (standalone mode only).
+
+        For cluster/sentinel modes the URL is not used; startup nodes are
+        provided separately. This method honours an explicit ``url`` and
+        automatically switches between the ``redis://`` and ``rediss://``
+        scheme based on the ``tls_enabled`` flag.
+        """
         if self.url:
             return self.url
+        scheme = "rediss" if self.tls_enabled else "redis"
         password_part = f":{self.password}@" if self.password else ""
-        return f"redis://{password_part}{self.host}:{self.port}/{self.db}"
+        return f"{scheme}://{password_part}{self.host}:{self.port}/{self.db}"
+
+    def get_tls_kwargs(self) -> dict:
+        """Build keyword arguments for redis-py SSL/TLS configuration.
+
+        Returns an empty dict when TLS is disabled so callers can safely
+        unpack the result: ``redis.Redis(**config.get_tls_kwargs())``.
+        """
+        if not self.tls_enabled:
+            return {}
+
+        import ssl
+
+        kwargs: dict = {"ssl": True}
+
+        if self.tls_insecure:
+            kwargs["ssl_cert_reqs"] = ssl.CERT_NONE
+            kwargs["ssl_check_hostname"] = False
+        else:
+            kwargs["ssl_cert_reqs"] = ssl.CERT_REQUIRED
+            # Hostname checking is off by default because managed Redis
+            # services (GCP Memorystore, AWS ElastiCache) and Redis
+            # Cluster node discovery return IPs that do not match the
+            # certificate CN/SAN.  The certificate chain is still fully
+            # validated against the CA.
+            kwargs["ssl_check_hostname"] = self.tls_check_hostname
+
+        if self.tls_ca_cert_file:
+            kwargs["ssl_ca_certs"] = self.tls_ca_cert_file
+        if self.tls_cert_file:
+            kwargs["ssl_certfile"] = self.tls_cert_file
+        if self.tls_key_file:
+            kwargs["ssl_keyfile"] = self.tls_key_file
+
+        return kwargs
+
+    def parse_nodes(self, raw: str) -> list[tuple[str, int]]:
+        """Parse a comma-separated ``host:port`` string into a list of tuples."""
+        nodes: list[tuple[str, int]] = []
+        for entry in raw.split(","):
+            entry = entry.strip()
+            if not entry:
+                continue
+            if ":" in entry:
+                h, p = entry.rsplit(":", 1)
+                nodes.append((h.strip(), int(p.strip())))
+            else:
+                nodes.append((entry, self.port))
+        return nodes
diff --git a/src/core/pool.py b/src/core/pool.py
index 21e9baa..26629cc 100644
--- a/src/core/pool.py
+++ b/src/core/pool.py
@@ -2,15 +2,33 @@
 
 This module provides centralized connection pools for external services,
 allowing efficient resource sharing across the application.
+
+Supported Redis deployment modes:
+- **standalone** (default): Single Redis server with ``ConnectionPool``.
+- **cluster**: Redis Cluster via ``RedisCluster``.
+- **sentinel**: Redis Sentinel via ``Sentinel`` for HA failover.
+
+All modes support optional TLS/SSL for managed services such as
+GCP Memorystore, AWS ElastiCache, and Azure Cache for Redis.
 """
 
-from typing import Optional
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
 
 import redis.asyncio as redis
 import structlog
+from redis.asyncio.cluster import RedisCluster
+from redis.asyncio.sentinel import Sentinel
+from redis.backoff import ExponentialBackoff
+from redis.exceptions import ConnectionError, TimeoutError
+from redis.retry import Retry
 
 from ..config import settings
 
+if TYPE_CHECKING:
+    from ..config.redis import RedisConfig
+
 logger = structlog.get_logger(__name__)
 
 
@@ -18,76 +36,209 @@ class RedisPool:
     """Centralized async Redis connection pool.
 
     Provides a shared connection pool for all services that need Redis,
-    avoiding the overhead of multiple separate pools.
+    avoiding the overhead of multiple separate pools.  Supports standalone,
+    cluster, and sentinel modes with optional TLS.
 
     Usage:
         client = redis_pool.get_client()
         await client.set("key", "value")
     """
 
-    def __init__(self):
+    def __init__(self) -> None:
         self._pool: redis.ConnectionPool | None = None
-        self._client: redis.Redis | None = None
-        self._initialized = False
+        self._client: redis.Redis | RedisCluster | None = None
+        self._sentinel: Sentinel | None = None
+        self._initialized: bool = False
+        self._mode: str = "standalone"
+        self._key_prefix: str = ""
 
     def _initialize(self) -> None:
-        """Initialize the connection pool lazily."""
+        """Initialize the connection pool lazily based on the configured mode."""
         if self._initialized:
             return
 
         try:
-            redis_url = settings.get_redis_url()
-            self._pool = redis.ConnectionPool.from_url(
-                redis_url,
-                max_connections=20,  # Shared across all services
-                decode_responses=True,
-                socket_timeout=5.0,
-                socket_connect_timeout=5.0,
-                retry_on_timeout=True,
-            )
-            self._client = redis.Redis(connection_pool=self._pool)
+            redis_cfg = settings.redis
+            self._mode = redis_cfg.mode
+            self._key_prefix = redis_cfg.key_prefix
+            tls_kwargs = redis_cfg.get_tls_kwargs()
+            max_conns = redis_cfg.max_connections
+            socket_timeout = float(redis_cfg.socket_timeout)
+            socket_connect_timeout = float(redis_cfg.socket_connect_timeout)
+
+            if self._mode == "cluster":
+                self._init_cluster(redis_cfg, tls_kwargs, max_conns, socket_timeout, socket_connect_timeout)
+            elif self._mode == "sentinel":
+                self._init_sentinel(redis_cfg, tls_kwargs, max_conns, socket_timeout, socket_connect_timeout)
+            else:
+                self._init_standalone(redis_cfg, tls_kwargs, max_conns, socket_timeout, socket_connect_timeout)
+
             self._initialized = True
-            logger.info(
-                "Redis connection pool initialized",
-                max_connections=20,
-                url=redis_url.split("@")[-1],  # Don't log password
-            )
         except Exception as e:
-            logger.error("Failed to initialize Redis pool", error=str(e))
-            # Create a fallback client
-            self._client = redis.from_url("redis://localhost:6379/0", decode_responses=True)
-            self._initialized = True
-
-    def get_client(self) -> redis.Redis:
+            logger.error(
+                "Failed to initialize Redis pool",
+                error=str(e),
+                mode=self._mode,
+            )
+            raise
+
+    # -- Mode-specific initialisers -------------------------------------------
+
+    def _init_standalone(
+        self,
+        cfg: RedisConfig,
+        tls_kwargs: dict,
+        max_conns: int,
+        socket_timeout: float,
+        socket_connect_timeout: float,
+    ) -> None:
+        redis_url = cfg.get_url()
+        self._pool = redis.ConnectionPool.from_url(
+            redis_url,
+            max_connections=max_conns,
+            decode_responses=True,
+            socket_timeout=socket_timeout,
+            socket_connect_timeout=socket_connect_timeout,
+            retry_on_timeout=True,
+            **tls_kwargs,
+        )
+        self._client = redis.Redis(connection_pool=self._pool)
+        logger.info(
+            "Redis standalone connection pool initialized",
+            max_connections=max_conns,
+            tls=cfg.tls_enabled,
+            url=redis_url.split("@")[-1],
+        )
+
+    def _init_cluster(
+        self,
+        cfg: RedisConfig,
+        tls_kwargs: dict,
+        max_conns: int,
+        socket_timeout: float,
+        socket_connect_timeout: float,
+    ) -> None:
+        if cfg.cluster_nodes:
+            startup_nodes = [redis.cluster.ClusterNode(host=h, port=p) for h, p in cfg.parse_nodes(cfg.cluster_nodes)]
+        else:
+            startup_nodes = [redis.cluster.ClusterNode(host=cfg.host, port=cfg.port)]
+
+        self._client = RedisCluster(
+            startup_nodes=startup_nodes,
+            password=cfg.password,
+            decode_responses=True,
+            max_connections=max_conns,
+            socket_timeout=socket_timeout,
+            socket_connect_timeout=socket_connect_timeout,
+            retry=Retry(ExponentialBackoff(), retries=3),
+            retry_on_error=[ConnectionError, TimeoutError],
+            **tls_kwargs,
+        )
+        logger.info(
+            "Redis cluster connection initialized",
+            startup_nodes=[
+                f"{h}:{p}"
+                for h, p in (cfg.parse_nodes(cfg.cluster_nodes) if cfg.cluster_nodes else [(cfg.host, cfg.port)])
+            ],
+            tls=cfg.tls_enabled,
+        )
+
+    def _init_sentinel(
+        self,
+        cfg: RedisConfig,
+        tls_kwargs: dict,
+        max_conns: int,
+        socket_timeout: float,
+        socket_connect_timeout: float,
+    ) -> None:
+        if cfg.sentinel_nodes:
+            sentinel_hosts = cfg.parse_nodes(cfg.sentinel_nodes)
+        else:
+            sentinel_hosts = [(cfg.host, 26379)]
+
+        self._sentinel = Sentinel(
+            sentinels=sentinel_hosts,
+            password=cfg.sentinel_password,
+            socket_timeout=socket_timeout,
+            socket_connect_timeout=socket_connect_timeout,
+            **tls_kwargs,
+        )
+        self._client = self._sentinel.master_for(
+            service_name=cfg.sentinel_master,
+            password=cfg.password,
+            decode_responses=True,
+            socket_timeout=socket_timeout,
+            socket_connect_timeout=socket_connect_timeout,
+            max_connections=max_conns,
+            retry_on_timeout=True,
+            **tls_kwargs,
+        )
+        logger.info(
+            "Redis sentinel connection initialized",
+            sentinel_nodes=[f"{h}:{p}" for h, p in sentinel_hosts],
+            master=cfg.sentinel_master,
+            tls=cfg.tls_enabled,
+        )
+
+    # -- Public API -----------------------------------------------------------
+
+    def get_client(self) -> redis.Redis | RedisCluster:
         """Get an async Redis client from the shared pool.
 
         Returns:
-            Async Redis client instance connected to the shared pool
+            Async Redis client instance connected to the shared pool.
+            For cluster mode this is a ``RedisCluster`` instance which
+            exposes the same command interface.
         """
         if not self._initialized:
             self._initialize()
         assert self._client is not None, "Redis client not initialized"
         return self._client
 
+    @property
+    def key_prefix(self) -> str:
+        """Return the configured Redis key prefix (may be empty)."""
+        if not self._initialized:
+            self._initialize()
+        return self._key_prefix
+
+    def make_key(self, key: str) -> str:
+        """Prepend the configured key prefix to *key*.
+
+        Returns *key* unchanged when no prefix is configured.
+        """
+        prefix = self.key_prefix
+        if prefix:
+            return f"{prefix}{key}"
+        return key
+
     @property
     def pool_stats(self) -> dict:
         """Get connection pool statistics."""
-        if not self._pool:
-            return {"initialized": False}
+        if not self._pool and self._mode == "standalone":
+            return {"initialized": self._initialized, "mode": self._mode}
+
+        stats: dict = {"initialized": self._initialized, "mode": self._mode}
+
+        if self._key_prefix:
+            stats["key_prefix"] = self._key_prefix
+
+        if self._pool:
+            stats["max_connections"] = self._pool.max_connections
 
-        return {
-            "initialized": True,
-            "max_connections": self._pool.max_connections,
-        }
+        return stats
 
     async def close(self) -> None:
         """Close the connection pool and release all connections."""
         if self._client:
             await self._client.close()
-            logger.info("Redis connection pool closed")
+            logger.info("Redis connection pool closed", mode=self._mode)
         self._pool = None
         self._client = None
+        self._sentinel = None
         self._initialized = False
+        self._mode = "standalone"
+        self._key_prefix = ""
 
 
 # Global Redis pool instance
diff --git a/src/main.py b/src/main.py
index d1d46dd..3914c1b 100644
--- a/src/main.py
+++ b/src/main.py
@@ -33,6 +33,9 @@
 from .utils.logging import setup_logging
 from .utils.shutdown import setup_graceful_shutdown, shutdown_handler
 
+# Resolve effective version: runtime SERVICE_VERSION overrides build-time _version.py
+effective_version: str = settings.service_version or __version__
+
 # Setup logging
 setup_logging()
 logger = structlog.get_logger()
@@ -42,7 +45,7 @@
 async def lifespan(app: FastAPI):
     """Application lifespan manager."""
     # Startup
-    logger.info("Starting Code Interpreter API", version=__version__)
+    logger.info("Starting Code Interpreter API", version=effective_version)
 
     # Setup graceful shutdown callbacks (uvicorn handles signals)
     setup_graceful_shutdown()
@@ -143,6 +146,39 @@ async def lifespan(app: FastAPI):
             # Build pool configs from settings
             pool_configs = settings.get_pool_configs()
 
+            # Parse image pull secrets (comma-separated string -> list)
+            pull_secrets = None
+            if settings.k8s_image_pull_secrets:
+                pull_secrets = [s.strip() for s in settings.k8s_image_pull_secrets.split(",") if s.strip()]
+
+            # Validate execution mode / sidecar image consistency
+            sidecar_img = settings.k8s_sidecar_image.lower()
+            exec_mode = settings.k8s_execution_mode
+            if exec_mode == "agent" and "nsenter" in sidecar_img:
+                logger.warning(
+                    "Execution mode is 'agent' but sidecar image appears to be nsenter-based. "
+                    "Consider using a sidecar-agent image for agent mode.",
+                    sidecar_image=settings.k8s_sidecar_image,
+                    execution_mode=exec_mode,
+                )
+            elif exec_mode == "nsenter" and "agent" in sidecar_img and "nsenter" not in sidecar_img:
+                logger.warning(
+                    "Execution mode is 'nsenter' but sidecar image appears to be agent-based. "
+                    "Consider using a sidecar-nsenter image for nsenter mode.",
+                    sidecar_image=settings.k8s_sidecar_image,
+                    execution_mode=exec_mode,
+                )
+
+            # Validate GKE Sandbox / execution mode compatibility
+            if settings.gke_sandbox_enabled and exec_mode == "nsenter":
+                logger.warning(
+                    "GKE Sandbox (gVisor) is enabled but execution mode is 'nsenter'. "
+                    "nsenter requires SYS_PTRACE/SYS_ADMIN/SYS_CHROOT capabilities which are "
+                    "incompatible with gVisor. Switch to 'agent' execution mode for GKE Sandbox.",
+                    execution_mode=exec_mode,
+                    gke_sandbox_enabled=True,
+                )
+
             kubernetes_manager = KubernetesManager(
                 namespace=settings.k8s_namespace or None,
                 pool_configs=pool_configs,
@@ -151,8 +187,16 @@ async def lifespan(app: FastAPI):
                 default_memory_limit=settings.k8s_memory_limit,
                 default_cpu_request=settings.k8s_cpu_request,
                 default_memory_request=settings.k8s_memory_request,
+                execution_mode=settings.k8s_execution_mode,
+                executor_port=settings.k8s_executor_port,
                 seccomp_profile_type=settings.k8s_seccomp_profile_type,
                 network_isolated=settings.enable_network_isolation,
+                image_pull_policy=settings.k8s_image_pull_policy,
+                gke_sandbox_enabled=settings.gke_sandbox_enabled,
+                runtime_class_name=settings.gke_sandbox_runtime_class,
+                sandbox_node_selector=settings.kubernetes.sandbox_node_selector,
+                custom_tolerations=settings.kubernetes.custom_tolerations,
+                image_pull_secrets=pull_secrets,
             )
 
             await kubernetes_manager.start()
@@ -249,7 +293,7 @@ async def lifespan(app: FastAPI):
 app = FastAPI(
     title="Code Interpreter API",
     description="A secure API for executing code in isolated Kubernetes pods",
-    version=__version__,
+    version=effective_version,
     docs_url="/docs" if settings.enable_docs else None,
     redoc_url="/redoc" if settings.enable_docs else None,
     debug=settings.api_debug,
@@ -287,7 +331,7 @@ async def health_check():
     """Health check endpoint for liveness probe."""
     return {
         "status": "healthy",
-        "version": __version__,
+        "version": effective_version,
         "config": {
             "debug": settings.api_debug,
             "docs_enabled": settings.enable_docs,
diff --git a/src/services/api_key_manager.py b/src/services/api_key_manager.py
index 3fb0249..463c873 100644
--- a/src/services/api_key_manager.py
+++ b/src/services/api_key_manager.py
@@ -31,12 +31,12 @@
 class ApiKeyManagerService:
     """Manages API keys stored in Redis."""
 
-    # Redis key prefixes
-    RECORD_PREFIX = "api_keys:records:"
-    VALID_CACHE_PREFIX = "api_keys:valid:"
-    USAGE_PREFIX = "api_keys:usage:"
-    INDEX_KEY = "api_keys:index"
-    ENV_KEYS_INDEX = "api_keys:env_index"  # Separate index for env keys
+    # Base Redis key prefixes (before application-level prefix)
+    _RECORD_PREFIX = "api_keys:records:"
+    _VALID_CACHE_PREFIX = "api_keys:valid:"
+    _USAGE_PREFIX = "api_keys:usage:"
+    _INDEX_KEY = "api_keys:index"
+    _ENV_KEYS_INDEX = "api_keys:env_index"  # Separate index for env keys
 
     # Cache TTL
     VALIDATION_CACHE_TTL = 300  # 5 minutes
@@ -49,6 +49,14 @@ def __init__(self, redis_client: redis.Redis | None = None):
         """
         self._redis = redis_client
 
+        # Compute prefixed keys once so every method uses the prefix
+        mk = redis_pool.make_key
+        self.RECORD_PREFIX = mk(self._RECORD_PREFIX)
+        self.VALID_CACHE_PREFIX = mk(self._VALID_CACHE_PREFIX)
+        self.USAGE_PREFIX = mk(self._USAGE_PREFIX)
+        self.INDEX_KEY = mk(self._INDEX_KEY)
+        self.ENV_KEYS_INDEX = mk(self._ENV_KEYS_INDEX)
+
     @property
     def redis(self) -> redis.Redis:
         """Get Redis client, initializing if needed."""
@@ -130,8 +138,9 @@ async def _ensure_single_env_key_record(self, api_key: str, name: str) -> ApiKey
                 source="environment",
             )
 
-            # Store in Redis
-            pipe = self.redis.pipeline(transaction=True)
+            # Store in Redis (transaction=False for Redis Cluster compatibility
+            # — record key and index key hash to different slots)
+            pipe = self.redis.pipeline(transaction=False)
             pipe.hset(record_key, mapping=record.to_redis_hash())
             pipe.sadd(self.ENV_KEYS_INDEX, key_hash)
             await pipe.execute()
@@ -237,9 +246,10 @@ async def create_key(
             metadata=metadata or {},
         )
 
-        # Store in Redis
+        # Store in Redis (transaction=False for Redis Cluster compatibility
+        # — record key and index key hash to different slots)
         record_key = f"{self.RECORD_PREFIX}{key_hash}"
-        pipe = self.redis.pipeline(transaction=True)
+        pipe = self.redis.pipeline(transaction=False)
         pipe.hset(record_key, mapping=record.to_redis_hash())
         pipe.sadd(self.INDEX_KEY, key_hash)
         await pipe.execute()
@@ -358,8 +368,9 @@ async def revoke_key(self, key_hash: str) -> bool:
         if not exists:
             return False
 
-        # Delete from Redis
-        pipe = self.redis.pipeline(transaction=True)
+        # Delete from Redis (transaction=False for Redis Cluster compatibility
+        # — keys hash to different slots)
+        pipe = self.redis.pipeline(transaction=False)
         pipe.delete(record_key)
         pipe.srem(self.INDEX_KEY, key_hash)
         pipe.delete(f"{self.VALID_CACHE_PREFIX}{self._short_hash(key_hash)}")
diff --git a/src/services/detailed_metrics.py b/src/services/detailed_metrics.py
index e3cb82b..31b578e 100644
--- a/src/services/detailed_metrics.py
+++ b/src/services/detailed_metrics.py
@@ -31,12 +31,12 @@
 class DetailedMetricsService:
     """Service for collecting and querying detailed execution metrics."""
 
-    # Redis key prefixes
-    BUFFER_KEY = "metrics:detailed:buffer"
-    HOURLY_PREFIX = "metrics:detailed:hourly:"
-    DAILY_PREFIX = "metrics:detailed:daily:"
-    POOL_STATS_KEY = "metrics:pool:stats"
-    API_KEY_HOURLY_PREFIX = "metrics:api_key:"
+    # Base Redis key prefixes (before application-level prefix)
+    _BUFFER_KEY = "metrics:detailed:buffer"
+    _HOURLY_PREFIX = "metrics:detailed:hourly:"
+    _DAILY_PREFIX = "metrics:detailed:daily:"
+    _POOL_STATS_KEY = "metrics:pool:stats"
+    _API_KEY_HOURLY_PREFIX = "metrics:api_key:"
 
     # Buffer and retention settings
     MAX_BUFFER_SIZE = 10000
@@ -52,6 +52,16 @@ def __init__(self, redis_client: redis.Redis | None = None):
         self._redis = redis_client
         self._in_memory_buffer: list[DetailedExecutionMetrics] = []
 
+        # Compute prefixed keys once
+        from ..core.pool import redis_pool
+
+        mk = redis_pool.make_key
+        self.BUFFER_KEY = mk(self._BUFFER_KEY)
+        self.HOURLY_PREFIX = mk(self._HOURLY_PREFIX)
+        self.DAILY_PREFIX = mk(self._DAILY_PREFIX)
+        self.POOL_STATS_KEY = mk(self._POOL_STATS_KEY)
+        self.API_KEY_HOURLY_PREFIX = mk(self._API_KEY_HOURLY_PREFIX)
+
     def register_event_handlers(self) -> None:
         """Register event handlers for pool metrics."""
         from ..core.events import (
diff --git a/src/services/file.py b/src/services/file.py
index 7c9d7cf..1554398 100644
--- a/src/services/file.py
+++ b/src/services/file.py
@@ -29,8 +29,11 @@ def __init__(self):
         # which handles IAM vs static credentials automatically
         self.minio_client = settings.minio.create_client()
 
-        # Initialize Redis client
-        self.redis_client = redis.from_url(settings.get_redis_url(), decode_responses=True)
+        # Initialize Redis client via the shared connection pool so that
+        # cluster, sentinel, and TLS modes are handled automatically.
+        from ..core.pool import redis_pool
+
+        self.redis_client = redis_pool.get_client()
 
         self.bucket_name = settings.minio_bucket
 
@@ -55,11 +58,15 @@ def _get_file_key(self, session_id: str, file_id: str, file_type: str = "uploads
 
     def _get_file_metadata_key(self, session_id: str, file_id: str) -> str:
         """Generate Redis key for file metadata."""
-        return f"files:{session_id}:{file_id}"
+        from ..core.pool import redis_pool
+
+        return redis_pool.make_key(f"files:{session_id}:{file_id}")
 
     def _get_session_files_key(self, session_id: str) -> str:
         """Generate Redis key for session file list."""
-        return f"session_files:{session_id}"
+        from ..core.pool import redis_pool
+
+        return redis_pool.make_key(f"session_files:{session_id}")
 
     async def _store_file_metadata(self, session_id: str, file_id: str, metadata: dict[str, Any]) -> None:
         """Store file metadata in Redis."""
diff --git a/src/services/health.py b/src/services/health.py
index 083d04d..af90608 100644
--- a/src/services/health.py
+++ b/src/services/health.py
@@ -142,16 +142,16 @@ async def check_redis(self) -> HealthCheckResult:
 
         try:
             # Use shared connection pool
-            if not self._redis_client:
-                from ..core.pool import redis_pool
+            from ..core.pool import redis_pool
 
+            if not self._redis_client:
                 self._redis_client = redis_pool.get_client()
 
             # Test basic connectivity
             await self._redis_client.ping()
 
             # Test read/write operations
-            test_key = "health_check:test"
+            test_key = redis_pool.make_key("health_check:test")
             test_value = f"test_{int(time.time())}"
 
             await self._redis_client.set(test_key, test_value, ex=60)
diff --git a/src/services/kubernetes/client.py b/src/services/kubernetes/client.py
index a313dc9..3e81ecf 100644
--- a/src/services/kubernetes/client.py
+++ b/src/services/kubernetes/client.py
@@ -193,9 +193,27 @@ def create_pod_manifest(
     sidecar_memory_request: str = "256Mi",
     seccomp_profile_type: str = "RuntimeDefault",
     network_isolated: bool = False,
+    execution_mode: str = "agent",
+    executor_port: int = 9090,
+    gke_sandbox_enabled: bool = False,
+    runtime_class_name: str = "gvisor",
+    sandbox_node_selector: dict[str, str] | None = None,
+    custom_tolerations: list[dict[str, str]] | None = None,
+    image_pull_secrets: list[str] | None = None,
 ) -> client.V1Pod:
     """Create a Pod manifest for code execution.
 
+    Supports two execution modes:
+
+    - agent (default): An executor agent runs in the main container, providing
+      HTTP-based code execution. No nsenter, no capabilities, no privilege
+      escalation needed. Compatible with GKE Sandbox (gVisor) and restricted
+      Pod Security Standards.
+
+    - nsenter (legacy): The sidecar uses nsenter to enter the main container's
+      mount namespace. Requires SYS_PTRACE, SYS_ADMIN, SYS_CHROOT capabilities,
+      shareProcessNamespace, and allowPrivilegeEscalation: true.
+
     Args:
         name: Pod name
         namespace: Kubernetes namespace
@@ -211,10 +229,30 @@ def create_pod_manifest(
         run_as_user: UID to run containers as
         sidecar_port: Port for sidecar HTTP API
         seccomp_profile_type: Seccomp profile type (RuntimeDefault or Unconfined)
+        network_isolated: Whether network isolation is enabled
+        execution_mode: Execution mode - "agent" (default) or "nsenter"
+        executor_port: Port for the executor HTTP server inside the main container
+        gke_sandbox_enabled: Enable GKE Sandbox (gVisor) for additional kernel isolation
+        runtime_class_name: Runtime class name for sandboxed pods (default: gvisor)
+        sandbox_node_selector: Node selector for sandbox-enabled nodes
+        custom_tolerations: Additional tolerations for custom node pool taints
+        image_pull_secrets: List of secret names for pulling images from private registries
 
     Returns:
         V1Pod manifest ready for creation.
     """
+    use_agent = execution_mode == "agent"
+
+    # Warn if GKE Sandbox is enabled with nsenter mode (incompatible with gVisor)
+    if gke_sandbox_enabled and not use_agent:
+        logger.warning(
+            "GKE Sandbox (gVisor) is enabled but execution mode is 'nsenter'. "
+            "nsenter requires capabilities incompatible with gVisor. "
+            "Consider switching to 'agent' execution mode.",
+            execution_mode=execution_mode,
+            gke_sandbox_enabled=gke_sandbox_enabled,
+        )
+
     # Shared volume for code and data
     shared_volume = client.V1Volume(
         name="shared-data",
@@ -229,8 +267,8 @@ def create_pod_manifest(
         mount_path="/mnt/data",
     )
 
-    # Security context for main container
-    security_context = client.V1SecurityContext(
+    # Security context for main container - minimal privileges in both modes
+    main_security_context = client.V1SecurityContext(
         run_as_user=run_as_user,
         run_as_group=run_as_user,
         run_as_non_root=True,
@@ -238,34 +276,35 @@ def create_pod_manifest(
         capabilities=client.V1Capabilities(drop=["ALL"]),
     )
 
-    # Security context for sidecar - needs elevated privileges for nsenter
-    #
-    # The sidecar uses nsenter to execute code in the main container's mount namespace.
-    # nsenter requires these capabilities:
-    # - SYS_PTRACE: access /proc/<pid>/ns/ of other processes
-    # - SYS_ADMIN: call setns() to enter namespaces
-    # - SYS_CHROOT: required for mount namespace operations
-    #
-    # For non-root users, Linux capabilities only populate the bounding set, not
-    # effective/permitted sets. To make capabilities usable, the sidecar Docker image
-    # uses setcap on the nsenter binary:
-    #   setcap 'cap_sys_ptrace,cap_sys_admin,cap_sys_chroot+eip' /usr/bin/nsenter
-    #
-    # The pod spec must still:
-    # - Add capabilities to the bounding set (capabilities.add)
-    # - Allow privilege escalation (for file capabilities to be honored)
-    #
-    # This approach allows running as non-root while still having nsenter work.
-    sidecar_security_context = client.V1SecurityContext(
-        run_as_user=run_as_user,
-        run_as_group=run_as_user,
-        run_as_non_root=True,
-        allow_privilege_escalation=True,  # Required for file capabilities
-        capabilities=client.V1Capabilities(
-            add=["SYS_PTRACE", "SYS_ADMIN", "SYS_CHROOT"],
-            drop=["ALL"],
-        ),
-    )
+    if use_agent:
+        # Agent mode: sidecar also has minimal privileges (no nsenter needed)
+        sidecar_security_context = client.V1SecurityContext(
+            run_as_user=run_as_user,
+            run_as_group=run_as_user,
+            run_as_non_root=True,
+            allow_privilege_escalation=False,
+            capabilities=client.V1Capabilities(drop=["ALL"]),
+        )
+    else:
+        # nsenter mode: sidecar needs elevated privileges for nsenter
+        #
+        # The sidecar uses nsenter to execute code in the main container's mount namespace.
+        # nsenter requires these capabilities:
+        # - SYS_PTRACE: access /proc/<pid>/ns/ of other processes
+        # - SYS_ADMIN: call setns() to enter namespaces
+        # - SYS_CHROOT: required for mount namespace operations
+        #
+        # File capabilities (setcap on nsenter) require allowPrivilegeEscalation: true.
+        sidecar_security_context = client.V1SecurityContext(
+            run_as_user=run_as_user,
+            run_as_group=run_as_user,
+            run_as_non_root=True,
+            allow_privilege_escalation=True,
+            capabilities=client.V1Capabilities(
+                add=["SYS_PTRACE", "SYS_ADMIN", "SYS_CHROOT"],
+                drop=["ALL"],
+            ),
+        )
 
     # Resource requirements
     resources = client.V1ResourceRequirements(
@@ -279,7 +318,7 @@ def create_pod_manifest(
         image=main_image,
         image_pull_policy=image_pull_policy,
         volume_mounts=[shared_mount],
-        security_context=security_context,
+        security_context=main_security_context,
         resources=resources,
         env=[
             client.V1EnvVar(name="PYTHONUNBUFFERED", value="1"),
@@ -287,6 +326,21 @@ def create_pod_manifest(
         ],
     )
 
+    # In agent mode, override CMD to run the executor agent from the shared volume
+    # (copied there by the init container)
+    if use_agent:
+        main_container.args = ["/mnt/data/.executor-agent", "--port", str(executor_port)]
+
+    # Sidecar environment variables
+    sidecar_env = [
+        client.V1EnvVar(name="LANGUAGE", value=language),
+        client.V1EnvVar(name="WORKING_DIR", value="/mnt/data"),
+        client.V1EnvVar(name="SIDECAR_PORT", value=str(sidecar_port)),
+        client.V1EnvVar(name="NETWORK_ISOLATED", value=str(network_isolated).lower()),
+        client.V1EnvVar(name="EXECUTION_MODE", value=execution_mode),
+        client.V1EnvVar(name="EXECUTOR_PORT", value=str(executor_port)),
+    ]
+
     # Sidecar container (HTTP API)
     sidecar_container = client.V1Container(
         name="sidecar",
@@ -296,17 +350,12 @@ def create_pod_manifest(
         volume_mounts=[shared_mount],
         security_context=sidecar_security_context,
         resources=client.V1ResourceRequirements(
-            # CRITICAL: User code runs in the sidecar's cgroup via nsenter (Issue #32)
-            # These limits apply to user code execution, not just the sidecar process
+            # In nsenter mode: user code runs in the sidecar's cgroup via nsenter
+            # In agent mode: sidecar only proxies requests, user code runs in main container
             limits={"cpu": sidecar_cpu_limit, "memory": sidecar_memory_limit},
             requests={"cpu": sidecar_cpu_request, "memory": sidecar_memory_request},
         ),
-        env=[
-            client.V1EnvVar(name="LANGUAGE", value=language),
-            client.V1EnvVar(name="WORKING_DIR", value="/mnt/data"),
-            client.V1EnvVar(name="SIDECAR_PORT", value=str(sidecar_port)),
-            client.V1EnvVar(name="NETWORK_ISOLATED", value=str(network_isolated).lower()),
-        ],
+        env=sidecar_env,
         readiness_probe=client.V1Probe(
             http_get=client.V1HTTPGetAction(path="/ready", port=sidecar_port),
             initial_delay_seconds=5,
@@ -323,33 +372,111 @@ def create_pod_manifest(
         ),
     )
 
+    # Init containers (agent mode only)
+    # Copy the executor agent binary from the sidecar image to the shared volume
+    init_containers = None
+    if use_agent:
+        init_containers = [
+            client.V1Container(
+                name="agent-init",
+                image=sidecar_image,
+                image_pull_policy=image_pull_policy,
+                command=[
+                    "python",
+                    "-c",
+                    "import shutil, os; shutil.copy2('/opt/executor-agent', '/mnt/data/.executor-agent'); os.chmod('/mnt/data/.executor-agent', 0o755)",
+                ],
+                volume_mounts=[shared_mount],
+                security_context=client.V1SecurityContext(
+                    run_as_user=run_as_user,
+                    run_as_group=run_as_user,
+                    run_as_non_root=True,
+                    allow_privilege_escalation=False,
+                    capabilities=client.V1Capabilities(drop=["ALL"]),
+                ),
+                resources=client.V1ResourceRequirements(
+                    limits={"cpu": "100m", "memory": "64Mi"},
+                    requests={"cpu": "50m", "memory": "32Mi"},
+                ),
+            )
+        ]
+
+    # GKE Sandbox configuration
+    # When enabled, adds gVisor runtime, node selector, and tolerations
+    runtime_class = runtime_class_name if gke_sandbox_enabled else None
+
+    # Build node selector
+    node_selector = {}
+    if gke_sandbox_enabled:
+        # GKE automatically adds this label to sandbox-enabled nodes
+        node_selector["sandbox.gke.io/runtime"] = "gvisor"
+    if sandbox_node_selector:
+        node_selector.update(sandbox_node_selector)
+
+    # Build tolerations list
+    tolerations = []
+    if gke_sandbox_enabled:
+        # GKE Sandbox standard taint
+        tolerations.append(
+            client.V1Toleration(
+                key="sandbox.gke.io/runtime",
+                operator="Equal",
+                value="gvisor",
+                effect="NoSchedule",
+            )
+        )
+    if custom_tolerations:
+        # Add custom node pool taints (e.g., pool=sandbox)
+        for tol in custom_tolerations:
+            tol_key = tol.get("key")
+            if not tol_key:
+                logger.warning("Skipping custom toleration with missing 'key' field", toleration=tol)
+                continue
+            tolerations.append(
+                client.V1Toleration(
+                    key=tol_key,
+                    operator=tol.get("operator", "Equal"),
+                    value=tol.get("value"),
+                    effect=tol.get("effect", "NoSchedule"),
+                )
+            )
+
+    # Build image pull secrets list
+    pull_secrets = None
+    if image_pull_secrets:
+        pull_secrets = [client.V1LocalObjectReference(name=secret_name) for secret_name in image_pull_secrets]
+
     # Pod spec
     pod_spec = client.V1PodSpec(
+        init_containers=init_containers,
         containers=[main_container, sidecar_container],
         volumes=[shared_volume],
         restart_policy="Never",
         termination_grace_period_seconds=10,
-        # Share process namespace so sidecar can use nsenter to execute in main container
-        share_process_namespace=True,
+        # Share process namespace only needed for nsenter mode
+        share_process_namespace=not use_agent,
+        runtime_class_name=runtime_class,
+        node_selector=node_selector if node_selector else None,
+        tolerations=tolerations if tolerations else None,
+        image_pull_secrets=pull_secrets,
         security_context=client.V1PodSecurityContext(
-            # Note: We don't set run_as_user at pod level; each container
-            # sets its own security context. Both run as non-root UID 65532.
-            # The sidecar uses file capabilities (setcap) on nsenter for privileges.
             fs_group=run_as_user,
-            # Apply seccomp profile to block dangerous syscalls
-            # while preserving nsenter functionality for the sidecar
             seccomp_profile=client.V1SeccompProfile(type=seccomp_profile_type),
         ),
-        # Prevent scheduling on same node as other execution pods
-        # (optional, can be configured via affinity)
     )
 
     # Pod metadata
+    # Add GKE Sandbox annotation if enabled
+    pod_annotations = dict(annotations) if annotations else {}
+    if gke_sandbox_enabled:
+        # GKE Sandbox annotation for gVisor runtime
+        pod_annotations["sandbox.gke.io/runtime"] = "gvisor"
+
     metadata = client.V1ObjectMeta(
         name=name,
         namespace=namespace,
         labels=labels,
-        annotations=annotations or {},
+        annotations=pod_annotations,
     )
 
     return client.V1Pod(
diff --git a/src/services/kubernetes/job_executor.py b/src/services/kubernetes/job_executor.py
index d69d700..d301210 100644
--- a/src/services/kubernetes/job_executor.py
+++ b/src/services/kubernetes/job_executor.py
@@ -5,8 +5,6 @@
 """
 
 import asyncio
-from datetime import datetime
-from typing import Any, Dict, List, Optional
 from uuid import uuid4
 
 import httpx
@@ -41,7 +39,7 @@ def __init__(
         namespace: str | None = None,
         ttl_seconds_after_finished: int = 60,
         active_deadline_seconds: int = 300,
-        sidecar_image: str = "aronmuon/kubecoderun-sidecar:latest",
+        sidecar_image: str = "aronmuon/kubecoderun-sidecar-agent:latest",
     ):
         """Initialize the Job executor.
 
@@ -123,8 +121,16 @@ async def create_job(
             sidecar_memory_limit=spec.sidecar_memory_limit,
             sidecar_cpu_request=spec.sidecar_cpu_request,
             sidecar_memory_request=spec.sidecar_memory_request,
+            execution_mode=spec.execution_mode,
+            executor_port=spec.executor_port,
             seccomp_profile_type=spec.seccomp_profile_type,
             network_isolated=spec.network_isolated,
+            gke_sandbox_enabled=spec.gke_sandbox_enabled,
+            runtime_class_name=spec.runtime_class_name,
+            sandbox_node_selector=spec.sandbox_node_selector,
+            custom_tolerations=spec.custom_tolerations,
+            image_pull_policy=spec.image_pull_policy,
+            image_pull_secrets=spec.image_pull_secrets,
             ttl_seconds_after_finished=self.ttl_seconds_after_finished,
             active_deadline_seconds=self.active_deadline_seconds,
         )
diff --git a/src/services/kubernetes/manager.py b/src/services/kubernetes/manager.py
index 3690b66..6b645a6 100644
--- a/src/services/kubernetes/manager.py
+++ b/src/services/kubernetes/manager.py
@@ -41,13 +41,21 @@ def __init__(
         self,
         namespace: str | None = None,
         pool_configs: list[PoolConfig] | None = None,
-        sidecar_image: str = "aronmuon/kubecoderun-sidecar:latest",
+        sidecar_image: str = "aronmuon/kubecoderun-sidecar-agent:latest",
         default_cpu_limit: str = "1",
         default_memory_limit: str = "512Mi",
         default_cpu_request: str = "100m",
         default_memory_request: str = "128Mi",
+        execution_mode: str = "agent",
+        executor_port: int = 9090,
         seccomp_profile_type: str = "RuntimeDefault",
         network_isolated: bool = False,
+        image_pull_policy: str = "Always",
+        gke_sandbox_enabled: bool = False,
+        runtime_class_name: str = "gvisor",
+        sandbox_node_selector: dict[str, str] | None = None,
+        custom_tolerations: list[dict[str, str]] | None = None,
+        image_pull_secrets: list[str] | None = None,
     ):
         """Initialize the Kubernetes manager.
 
@@ -59,8 +67,16 @@ def __init__(
             default_memory_limit: Default memory limit for pods
             default_cpu_request: Default CPU request for pods
             default_memory_request: Default memory request for pods
+            execution_mode: Execution mode - "agent" (default) or "nsenter"
+            executor_port: Port for executor HTTP server in the main container
             seccomp_profile_type: Seccomp profile type (RuntimeDefault, Unconfined, Localhost)
             network_isolated: Whether network isolation is enabled (disables network-dependent features)
+            image_pull_policy: Image pull policy for execution pods (Always, IfNotPresent, Never)
+            gke_sandbox_enabled: Enable GKE Sandbox (gVisor) for additional kernel isolation
+            runtime_class_name: Runtime class name for sandboxed pods
+            sandbox_node_selector: Node selector for sandbox-enabled nodes
+            custom_tolerations: Custom tolerations for node pool taints
+            image_pull_secrets: List of secret names for pulling images from private registries
         """
         self.namespace = namespace or get_current_namespace()
         self.sidecar_image = sidecar_image
@@ -68,13 +84,22 @@ def __init__(
         self.default_memory_limit = default_memory_limit
         self.default_cpu_request = default_cpu_request
         self.default_memory_request = default_memory_request
+        self.execution_mode = execution_mode
+        self.executor_port = executor_port
         self.seccomp_profile_type = seccomp_profile_type
         self.network_isolated = network_isolated
+        self.image_pull_policy = image_pull_policy
+        self.gke_sandbox_enabled = gke_sandbox_enabled
+        self.runtime_class_name = runtime_class_name
+        self.sandbox_node_selector = sandbox_node_selector
+        self.custom_tolerations = custom_tolerations
+        self.image_pull_secrets = image_pull_secrets
+        self._pool_configs = pool_configs or []
 
         # Pool manager for warm pods
         self._pool_manager = PodPoolManager(
             namespace=self.namespace,
-            configs=pool_configs or [],
+            configs=self._pool_configs,
         )
 
         # Job executor for cold languages
@@ -268,6 +293,15 @@ async def execute_code(
             return result, handle, source
         else:
             # Use Job execution
+            # Get image_pull_secrets from pool config for this language
+            pull_secrets = self.image_pull_secrets
+            pull_policy = self.image_pull_policy
+            for config in self._pool_configs:
+                if config.language.lower() == language.lower():
+                    pull_secrets = config.image_pull_secrets or self.image_pull_secrets
+                    pull_policy = config.image_pull_policy or self.image_pull_policy
+                    break
+
             spec = PodSpec(
                 language=language,
                 image=self.get_image_for_language(language),
@@ -278,8 +312,16 @@ async def execute_code(
                 memory_limit=self.default_memory_limit,
                 cpu_request=self.default_cpu_request,
                 memory_request=self.default_memory_request,
+                execution_mode=self.execution_mode,
+                executor_port=self.executor_port,
                 seccomp_profile_type=self.seccomp_profile_type,
                 network_isolated=self.network_isolated,
+                image_pull_policy=pull_policy,
+                gke_sandbox_enabled=self.gke_sandbox_enabled,
+                runtime_class_name=self.runtime_class_name,
+                sandbox_node_selector=self.sandbox_node_selector,
+                custom_tolerations=self.custom_tolerations,
+                image_pull_secrets=pull_secrets,
             )
 
             result = await self._job_executor.execute_with_job(
diff --git a/src/services/kubernetes/models.py b/src/services/kubernetes/models.py
index f9db8c4..6e06be0 100644
--- a/src/services/kubernetes/models.py
+++ b/src/services/kubernetes/models.py
@@ -106,7 +106,9 @@ class PodSpec:
     cpu_request: str = "100m"
     memory_request: str = "128Mi"
 
-    # Sidecar resource limits (CRITICAL: user code runs in sidecar's cgroup via nsenter)
+    # Sidecar resource limits
+    # In nsenter mode: user code runs in sidecar's cgroup via nsenter
+    # In agent mode: user code runs in main container's cgroup
     sidecar_cpu_limit: str = "500m"
     sidecar_memory_limit: str = "512Mi"
     sidecar_cpu_request: str = "100m"
@@ -116,15 +118,27 @@ class PodSpec:
     run_as_user: int = 65532
     run_as_group: int = 65532
     run_as_non_root: bool = True
+    execution_mode: str = "agent"  # "agent" or "nsenter"
+    executor_port: int = 9090
     seccomp_profile_type: str = "RuntimeDefault"
 
     # Sidecar configuration
-    sidecar_image: str = "aronmuon/kubecoderun-sidecar:latest"
+    sidecar_image: str = "aronmuon/kubecoderun-sidecar-agent:latest"
     sidecar_port: int = 8080
 
+    # Image pull policy and secrets
+    image_pull_policy: str = "Always"
+    image_pull_secrets: list[str] | None = None
+
     # Network isolation mode - disables network-dependent features (e.g., Go module proxy)
     network_isolated: bool = False
 
+    # GKE Sandbox (gVisor) configuration
+    gke_sandbox_enabled: bool = False
+    runtime_class_name: str = "gvisor"
+    sandbox_node_selector: dict[str, str] | None = None
+    custom_tolerations: list[dict[str, str]] | None = None
+
 
 @dataclass
 class PoolConfig:
@@ -133,13 +147,15 @@ class PoolConfig:
     language: str
     image: str
     pool_size: int = 0  # 0 = use Jobs instead of pool
-    sidecar_image: str = "aronmuon/kubecoderun-sidecar:latest"
+    sidecar_image: str = "aronmuon/kubecoderun-sidecar-agent:latest"
 
     # Resource limits (can override defaults)
     cpu_limit: str | None = None
     memory_limit: str | None = None
 
-    # Sidecar resource limits (CRITICAL: user code runs in sidecar's cgroup via nsenter)
+    # Sidecar resource limits
+    # In nsenter mode: user code runs in sidecar's cgroup via nsenter
+    # In agent mode: user code runs in main container's cgroup
     sidecar_cpu_limit: str = "500m"
     sidecar_memory_limit: str = "512Mi"
     sidecar_cpu_request: str = "100m"
@@ -148,12 +164,23 @@ class PoolConfig:
     # Image pull policy (Always, IfNotPresent, Never)
     image_pull_policy: str = "Always"
 
-    # Seccomp profile type (RuntimeDefault, Unconfined, Localhost)
+    # Image pull secrets (list of secret names)
+    image_pull_secrets: list[str] | None = None
+
+    # Execution mode and security settings
+    execution_mode: str = "agent"  # "agent" or "nsenter"
+    executor_port: int = 9090
     seccomp_profile_type: str = "RuntimeDefault"
 
     # Network isolation mode - disables network-dependent features (e.g., Go module proxy)
     network_isolated: bool = False
 
+    # GKE Sandbox (gVisor) configuration
+    gke_sandbox_enabled: bool = False
+    runtime_class_name: str = "gvisor"
+    sandbox_node_selector: dict[str, str] | None = None
+    custom_tolerations: list[dict[str, str]] | None = None
+
     @property
     def uses_pool(self) -> bool:
         """Whether this language uses a warm pod pool."""
diff --git a/src/services/kubernetes/pool.py b/src/services/kubernetes/pool.py
index 98fb572..04eb73a 100644
--- a/src/services/kubernetes/pool.py
+++ b/src/services/kubernetes/pool.py
@@ -185,8 +185,15 @@ async def _create_warm_pod(self) -> PooledPod | None:
             sidecar_memory_limit=self.config.sidecar_memory_limit,
             sidecar_cpu_request=self.config.sidecar_cpu_request,
             sidecar_memory_request=self.config.sidecar_memory_request,
+            execution_mode=self.config.execution_mode,
+            executor_port=self.config.executor_port,
             seccomp_profile_type=self.config.seccomp_profile_type,
             network_isolated=self.config.network_isolated,
+            gke_sandbox_enabled=self.config.gke_sandbox_enabled,
+            runtime_class_name=self.config.runtime_class_name,
+            sandbox_node_selector=self.config.sandbox_node_selector,
+            custom_tolerations=self.config.custom_tolerations,
+            image_pull_secrets=self.config.image_pull_secrets,
         )
 
         try:
diff --git a/src/services/metrics.py b/src/services/metrics.py
index cb5281e..7a1f40a 100644
--- a/src/services/metrics.py
+++ b/src/services/metrics.py
@@ -391,8 +391,10 @@ async def _persist_metrics_to_redis(self) -> None:
             }
 
             # Store in Redis with TTL
+            from ..core.pool import redis_pool
+
             await self._redis_client.setex(
-                "metrics:current",
+                redis_pool.make_key("metrics:current"),
                 86400,
                 str(metrics_data),  # 24 hours TTL
             )
@@ -400,7 +402,7 @@ async def _persist_metrics_to_redis(self) -> None:
             # Store historical data (keep last 24 hours)
             hour_key = datetime.now(UTC).strftime("%Y-%m-%d-%H")
             await self._redis_client.setex(
-                f"metrics:hourly:{hour_key}",
+                redis_pool.make_key(f"metrics:hourly:{hour_key}"),
                 86400 * 7,  # 7 days TTL for hourly data
                 str(metrics_data),
             )
@@ -417,7 +419,9 @@ async def _load_metrics_from_redis(self) -> None:
 
         try:
             # Load current metrics
-            current_data = await self._redis_client.get("metrics:current")
+            from ..core.pool import redis_pool
+
+            current_data = await self._redis_client.get(redis_pool.make_key("metrics:current"))
             if current_data:
                 # In a full implementation, we would parse and restore the metrics
                 # For now, just log that we found existing data
diff --git a/src/services/session.py b/src/services/session.py
index 4dbbd9b..0288d20 100644
--- a/src/services/session.py
+++ b/src/services/session.py
@@ -122,15 +122,15 @@ def _generate_session_id(self) -> str:
 
     def _session_key(self, session_id: str) -> str:
         """Generate Redis key for session data."""
-        return f"sessions:{session_id}"
+        return redis_pool.make_key(f"sessions:{session_id}")
 
     def _session_index_key(self) -> str:
         """Generate Redis key for session index."""
-        return "sessions:index"
+        return redis_pool.make_key("sessions:index")
 
     def _entity_sessions_key(self, entity_id: str) -> str:
         """Generate Redis key for entity-based session grouping."""
-        return f"entity_sessions:{entity_id}"
+        return redis_pool.make_key(f"entity_sessions:{entity_id}")
 
     async def create_session(self, request: SessionCreate) -> Session:
         """Create a new code execution session."""
@@ -169,8 +169,9 @@ async def create_session(self, request: SessionCreate) -> Session:
         # Extract entity_id from metadata if provided
         entity_id = request.metadata.get("entity_id") if request.metadata else None
 
-        # Use Redis transaction to ensure atomicity
-        pipe = await self.redis.pipeline(transaction=True)
+        # Use pipeline for batching (transaction=False for Redis Cluster
+        # compatibility — keys span different hash slots)
+        pipe = self.redis.pipeline(transaction=False)
         try:
             # Store session data
             pipe.hset(session_key, mapping=session_data)
@@ -307,8 +308,9 @@ async def delete_session(self, session_id: str) -> bool:
                 )
                 # Continue with session deletion even if file cleanup fails
 
-        # Use transaction to ensure atomicity
-        pipe = await self.redis.pipeline(transaction=True)
+        # Use pipeline for batching (transaction=False for Redis Cluster
+        # compatibility — keys span different hash slots)
+        pipe = self.redis.pipeline(transaction=False)
         try:
             # Remove session data
             pipe.delete(session_key)
diff --git a/src/services/state.py b/src/services/state.py
index 29d83c9..654c34c 100644
--- a/src/services/state.py
+++ b/src/services/state.py
@@ -55,19 +55,19 @@ def __init__(self, redis_client: redis.Redis | None = None):
 
     def _state_key(self, session_id: str) -> str:
         """Generate Redis key for session state."""
-        return f"{self.KEY_PREFIX}{session_id}"
+        return redis_pool.make_key(f"{self.KEY_PREFIX}{session_id}")
 
     def _hash_key(self, session_id: str) -> str:
         """Generate Redis key for state hash."""
-        return f"{self.HASH_KEY_PREFIX}{session_id}"
+        return redis_pool.make_key(f"{self.HASH_KEY_PREFIX}{session_id}")
 
     def _meta_key(self, session_id: str) -> str:
         """Generate Redis key for state metadata."""
-        return f"{self.META_KEY_PREFIX}{session_id}"
+        return redis_pool.make_key(f"{self.META_KEY_PREFIX}{session_id}")
 
     def _upload_marker_key(self, session_id: str) -> str:
         """Generate Redis key for upload marker."""
-        return f"{self.UPLOAD_MARKER_PREFIX}{session_id}"
+        return redis_pool.make_key(f"{self.UPLOAD_MARKER_PREFIX}{session_id}")
 
     @staticmethod
     def compute_hash(raw_bytes: bytes) -> str:
@@ -133,8 +133,9 @@ async def save_state(
             state_hash = self.compute_hash(raw_bytes)
             now = datetime.now(UTC)
 
-            # Use pipeline for atomic operations
-            pipe = self.redis.pipeline(transaction=True)
+            # Use pipeline for batching (transaction=False for Redis Cluster
+            # compatibility — state/hash/meta keys hash to different slots)
+            pipe = self.redis.pipeline(transaction=False)
 
             # Save state
             pipe.setex(self._state_key(session_id), ttl_seconds, state_b64)
diff --git a/src/utils/config_validator.py b/src/utils/config_validator.py
index 53328aa..82153a0 100644
--- a/src/utils/config_validator.py
+++ b/src/utils/config_validator.py
@@ -5,6 +5,8 @@
 
 import redis
 from minio.error import S3Error
+from redis.cluster import ClusterNode, RedisCluster
+from redis.sentinel import Sentinel
 
 from ..config import settings
 
@@ -94,18 +96,71 @@ def _validate_file_config(self):
                 self.errors.append(f"File extension must start with dot: {ext}")
 
     def _validate_redis_connection(self):
-        """Validate Redis connection."""
+        """Validate Redis connection.
+
+        Uses the correct client type depending on REDIS_MODE (standalone,
+        cluster, or sentinel) and forwards TLS kwargs so that managed
+        services with custom CA certificates are validated correctly.
+        """
         try:
-            # Use Redis URL from settings
-            client = redis.from_url(
-                settings.get_redis_url(),
-                socket_timeout=settings.redis_socket_timeout,
-                socket_connect_timeout=settings.redis_socket_connect_timeout,
-                max_connections=settings.redis_max_connections,
-            )
-
-            # Test connection
-            client.ping()
+            redis_cfg = settings.redis
+            tls_kwargs = redis_cfg.get_tls_kwargs()
+            # ``ssl`` is implied by the ``rediss://`` scheme for standalone;
+            # for cluster/sentinel it's passed directly.
+            tls_standalone = {k: v for k, v in tls_kwargs.items() if k != "ssl"}
+
+            if redis_cfg.mode == "cluster":
+                # --- Cluster mode ---
+                if redis_cfg.cluster_nodes:
+                    startup_nodes = [
+                        ClusterNode(host=h, port=p) for h, p in redis_cfg.parse_nodes(redis_cfg.cluster_nodes)
+                    ]
+                else:
+                    startup_nodes = [ClusterNode(host=redis_cfg.host, port=redis_cfg.port)]
+
+                client = RedisCluster(
+                    startup_nodes=startup_nodes,
+                    password=redis_cfg.password,
+                    socket_timeout=redis_cfg.socket_timeout,
+                    socket_connect_timeout=redis_cfg.socket_connect_timeout,
+                    **tls_kwargs,
+                )
+                client.ping()
+                client.close()
+
+            elif redis_cfg.mode == "sentinel":
+                # --- Sentinel mode ---
+                if redis_cfg.sentinel_nodes:
+                    sentinel_hosts = redis_cfg.parse_nodes(redis_cfg.sentinel_nodes)
+                else:
+                    sentinel_hosts = [(redis_cfg.host, 26379)]
+
+                sentinel = Sentinel(
+                    sentinels=sentinel_hosts,
+                    password=redis_cfg.sentinel_password,
+                    socket_timeout=redis_cfg.socket_timeout,
+                    socket_connect_timeout=redis_cfg.socket_connect_timeout,
+                    **tls_kwargs,
+                )
+                master = sentinel.master_for(
+                    service_name=redis_cfg.sentinel_master,
+                    password=redis_cfg.password,
+                    socket_timeout=redis_cfg.socket_timeout,
+                    socket_connect_timeout=redis_cfg.socket_connect_timeout,
+                    **tls_kwargs,
+                )
+                master.ping()
+
+            else:
+                # --- Standalone mode ---
+                client = redis.from_url(
+                    settings.get_redis_url(),
+                    socket_timeout=settings.redis_socket_timeout,
+                    socket_connect_timeout=settings.redis_socket_connect_timeout,
+                    max_connections=settings.redis_max_connections,
+                    **tls_standalone,
+                )
+                client.ping()
 
         except redis.ConnectionError as e:
             # Treat as warning in development mode to allow startup without Redis
diff --git a/src/utils/logging.py b/src/utils/logging.py
index 37c6347..54ceaee 100644
--- a/src/utils/logging.py
+++ b/src/utils/logging.py
@@ -110,7 +110,7 @@ def configure_third_party_loggers() -> None:
 def add_service_context(logger, method_name, event_dict):
     """Add service context information to log entries."""
     event_dict["service"] = "kubecoderun-api"
-    event_dict["version"] = __version__
+    event_dict["version"] = settings.service_version or __version__
     return event_dict
 
 
diff --git a/tests/integration/test_redis_cluster.py b/tests/integration/test_redis_cluster.py
new file mode 100644
index 0000000..679cc4f
--- /dev/null
+++ b/tests/integration/test_redis_cluster.py
@@ -0,0 +1,295 @@
+"""Integration test for Redis Cluster connectivity.
+
+Requires a running Redis Cluster on localhost:7000-7005.
+Start with: docker compose -f docker-compose.redis-cluster.yml up -d
+
+Usage:
+    uv run python -m pytest tests/integration/test_redis_cluster.py -v
+"""
+
+import asyncio
+import os
+
+import pytest
+import redis as sync_redis
+import redis.asyncio as async_redis
+from redis.asyncio.cluster import RedisCluster as AsyncRedisCluster
+from redis.cluster import ClusterNode, RedisCluster
+
+# Only run when cluster is available
+CLUSTER_HOST = os.environ.get("REDIS_CLUSTER_HOST", "127.0.0.1")
+CLUSTER_PORT = int(os.environ.get("REDIS_CLUSTER_PORT", "7000"))
+
+pytestmark = pytest.mark.integration
+
+
+def _cluster_available() -> bool:
+    """Check if a Redis Cluster is reachable."""
+    try:
+        rc = RedisCluster(
+            startup_nodes=[ClusterNode(host=CLUSTER_HOST, port=CLUSTER_PORT)],
+            decode_responses=True,
+            socket_timeout=2,
+            socket_connect_timeout=2,
+        )
+        rc.ping()
+        rc.close()
+        return True
+    except Exception:
+        return False
+
+
+skip_no_cluster = pytest.mark.skipif(
+    not _cluster_available(),
+    reason=f"Redis Cluster not available at {CLUSTER_HOST}:{CLUSTER_PORT}",
+)
+
+
+# ── Synchronous (validator path) ──────────────────────────────────────────
+
+
+@skip_no_cluster
+class TestSyncRedisCluster:
+    """Tests using synchronous redis-py RedisCluster (same as config_validator)."""
+
+    def test_connect_with_single_startup_node(self):
+        """Cluster discovery works from a single startup node."""
+        rc = RedisCluster(
+            startup_nodes=[ClusterNode(host=CLUSTER_HOST, port=CLUSTER_PORT)],
+            decode_responses=True,
+            socket_timeout=5,
+            socket_connect_timeout=5,
+        )
+        assert rc.ping() is True
+        # Verify the cluster is operational via a targeted node
+        node_info = rc.cluster_info(target_nodes=RedisCluster.RANDOM)
+        assert node_info.get("cluster_state") == "ok"
+        rc.close()
+
+    def test_connect_with_multiple_startup_nodes(self):
+        """Cluster discovery works from multiple startup nodes."""
+        nodes = [
+            ClusterNode(host=CLUSTER_HOST, port=CLUSTER_PORT),
+            ClusterNode(host=CLUSTER_HOST, port=CLUSTER_PORT + 1),
+        ]
+        rc = RedisCluster(
+            startup_nodes=nodes,
+            decode_responses=True,
+            socket_timeout=5,
+            socket_connect_timeout=5,
+        )
+        assert rc.ping() is True
+        rc.close()
+
+    def test_connect_with_no_password(self):
+        """Cluster connects with password=None (no AUTH)."""
+        rc = RedisCluster(
+            startup_nodes=[ClusterNode(host=CLUSTER_HOST, port=CLUSTER_PORT)],
+            password=None,
+            decode_responses=True,
+            socket_timeout=5,
+        )
+        assert rc.ping() is True
+        rc.close()
+
+    def test_empty_password_converted_to_none(self):
+        """Our validator converts empty password to None to avoid spurious AUTH.
+
+        Redis servers without requirepass accept AUTH with any string,
+        so we can't observe the bug via an error.  Instead, verify that
+        our Settings validator normalises empty password to None.
+        """
+        from src.config import Settings
+
+        s = Settings(redis_password="")
+        assert s.redis_password is None
+
+        s2 = Settings(redis_password="  ")
+        assert s2.redis_password is None
+
+        s3 = Settings(redis_password="real-password")
+        assert s3.redis_password == "real-password"
+
+    def test_set_get_operations(self):
+        """Basic SET/GET across cluster slots."""
+        rc = RedisCluster(
+            startup_nodes=[ClusterNode(host=CLUSTER_HOST, port=CLUSTER_PORT)],
+            decode_responses=True,
+        )
+        # These keys hash to different slots
+        for i in range(10):
+            key = f"test:cluster:{i}"
+            rc.set(key, f"value-{i}")
+            assert rc.get(key) == f"value-{i}"
+            rc.delete(key)
+        rc.close()
+
+
+# ── Asynchronous (pool path) ─────────────────────────────────────────────
+
+
+@skip_no_cluster
+class TestAsyncRedisCluster:
+    """Tests using async redis-py RedisCluster (same as RedisPool._init_cluster)."""
+
+    @pytest.mark.asyncio
+    async def test_async_connect_and_ping(self):
+        """Async cluster client connects and pings."""
+        from redis.backoff import ExponentialBackoff
+        from redis.exceptions import ConnectionError, TimeoutError
+        from redis.retry import Retry
+
+        rc = AsyncRedisCluster(
+            startup_nodes=[
+                async_redis.cluster.ClusterNode(host=CLUSTER_HOST, port=CLUSTER_PORT),
+            ],
+            password=None,
+            decode_responses=True,
+            max_connections=20,
+            socket_timeout=5.0,
+            socket_connect_timeout=5.0,
+            retry=Retry(ExponentialBackoff(), retries=3),
+            retry_on_error=[ConnectionError, TimeoutError],
+        )
+        result = await rc.ping()
+        assert result is True
+        await rc.aclose()
+
+    @pytest.mark.asyncio
+    async def test_async_set_get(self):
+        """Async SET/GET across cluster slots."""
+        rc = AsyncRedisCluster(
+            startup_nodes=[
+                async_redis.cluster.ClusterNode(host=CLUSTER_HOST, port=CLUSTER_PORT),
+            ],
+            decode_responses=True,
+        )
+        for i in range(10):
+            key = f"test:async:cluster:{i}"
+            await rc.set(key, f"value-{i}")
+            val = await rc.get(key)
+            assert val == f"value-{i}"
+            await rc.delete(key)
+        await rc.aclose()
+
+
+# ── RedisPool integration ────────────────────────────────────────────────
+
+
+@skip_no_cluster
+class TestRedisPoolClusterMode:
+    """Test RedisPool with actual cluster backend."""
+
+    @pytest.mark.asyncio
+    async def test_pool_cluster_mode(self, monkeypatch):
+        """RedisPool initializes in cluster mode and can SET/GET."""
+        monkeypatch.setenv("REDIS_MODE", "cluster")
+        monkeypatch.setenv("REDIS_HOST", CLUSTER_HOST)
+        monkeypatch.setenv("REDIS_PORT", str(CLUSTER_PORT))
+        monkeypatch.setenv("REDIS_PASSWORD", "")  # empty = no auth
+        monkeypatch.setenv("REDIS_TLS_ENABLED", "false")
+        monkeypatch.setenv("REDIS_CLUSTER_NODES", "")  # empty = fallback to host:port
+
+        # Re-import to pick up new env
+        from src.config import Settings
+
+        settings_obj = Settings()
+        cfg = settings_obj.redis
+
+        # Verify our validators worked
+        assert cfg.password is None, f"Expected None, got {cfg.password!r}"
+        assert cfg.cluster_nodes is None, f"Expected None, got {cfg.cluster_nodes!r}"
+
+        from src.core.pool import RedisPool
+
+        pool = RedisPool()
+        # Inject our test settings
+        monkeypatch.setattr("src.core.pool.settings", settings_obj)
+        pool._initialize()
+
+        client = pool.get_client()
+        assert isinstance(client, AsyncRedisCluster)
+
+        # Test operations
+        await client.set("test:pool:cluster", "works")
+        val = await client.get("test:pool:cluster")
+        assert val == "works"
+        await client.delete("test:pool:cluster")
+        await client.aclose()
+
+    @pytest.mark.asyncio
+    async def test_pool_cluster_mode_with_explicit_nodes(self, monkeypatch):
+        """RedisPool uses REDIS_CLUSTER_NODES when provided."""
+        nodes_str = f"{CLUSTER_HOST}:{CLUSTER_PORT},{CLUSTER_HOST}:{CLUSTER_PORT + 1}"
+        monkeypatch.setenv("REDIS_MODE", "cluster")
+        monkeypatch.setenv("REDIS_CLUSTER_NODES", nodes_str)
+        monkeypatch.setenv("REDIS_PASSWORD", "")
+        monkeypatch.setenv("REDIS_TLS_ENABLED", "false")
+
+        from src.config import Settings
+
+        settings_obj = Settings()
+        cfg = settings_obj.redis
+
+        assert cfg.cluster_nodes == nodes_str
+        assert cfg.password is None
+
+        from src.core.pool import RedisPool
+
+        pool = RedisPool()
+        monkeypatch.setattr("src.core.pool.settings", settings_obj)
+        pool._initialize()
+
+        client = pool.get_client()
+        result = await client.ping()
+        assert result is True
+        await client.aclose()
+
+
+# ── Config Validator integration ─────────────────────────────────────────
+
+
+@skip_no_cluster
+class TestConfigValidatorClusterMode:
+    """Test ConfigValidator._validate_redis_connection with real cluster."""
+
+    def test_validator_cluster_succeeds(self, monkeypatch):
+        """Config validator passes with a real cluster."""
+        monkeypatch.setenv("REDIS_MODE", "cluster")
+        monkeypatch.setenv("REDIS_HOST", CLUSTER_HOST)
+        monkeypatch.setenv("REDIS_PORT", str(CLUSTER_PORT))
+        monkeypatch.setenv("REDIS_PASSWORD", "")
+        monkeypatch.setenv("REDIS_TLS_ENABLED", "false")
+        monkeypatch.setenv("REDIS_CLUSTER_NODES", "")
+
+        from src.config import Settings
+
+        settings_obj = Settings()
+        monkeypatch.setattr("src.utils.config_validator.settings", settings_obj)
+
+        from src.utils.config_validator import ConfigValidator
+
+        validator = ConfigValidator()
+        validator._validate_redis_connection()
+
+        assert not validator.errors, f"Unexpected errors: {validator.errors}"
+
+    def test_validator_cluster_with_explicit_nodes(self, monkeypatch):
+        """Config validator passes with explicit cluster nodes."""
+        nodes_str = f"{CLUSTER_HOST}:{CLUSTER_PORT},{CLUSTER_HOST}:{CLUSTER_PORT + 1},{CLUSTER_HOST}:{CLUSTER_PORT + 2}"
+        monkeypatch.setenv("REDIS_MODE", "cluster")
+        monkeypatch.setenv("REDIS_CLUSTER_NODES", nodes_str)
+        monkeypatch.setenv("REDIS_PASSWORD", "")
+        monkeypatch.setenv("REDIS_TLS_ENABLED", "false")
+
+        from src.config import Settings
+
+        settings_obj = Settings()
+        monkeypatch.setattr("src.utils.config_validator.settings", settings_obj)
+
+        from src.utils.config_validator import ConfigValidator
+
+        validator = ConfigValidator()
+        validator._validate_redis_connection()
+
+        assert not validator.errors, f"Unexpected errors: {validator.errors}"
diff --git a/tests/integration/test_redis_cluster_tls.py b/tests/integration/test_redis_cluster_tls.py
new file mode 100644
index 0000000..e4f9f47
--- /dev/null
+++ b/tests/integration/test_redis_cluster_tls.py
@@ -0,0 +1,454 @@
+"""Integration tests for Redis Cluster with TLS.
+
+Mirrors the user's production GCP Memorystore configuration:
+- REDIS_MODE=cluster
+- REDIS_TLS_ENABLED=true
+- REDIS_TLS_CA_CERT_FILE=/path/to/ca.crt  (server verification)
+- REDIS_TLS_CERT_FILE=""                   (no client cert / no mTLS)
+- REDIS_TLS_KEY_FILE=""                    (no client key / no mTLS)
+- REDIS_TLS_INSECURE=false                 (certificate chain verified)
+- REDIS_TLS_CHECK_HOSTNAME not set         (defaults to false)
+- REDIS_PASSWORD=""                         (no authentication)
+- REDIS_CLUSTER_NODES not set              (falls back to host:port)
+- REDIS_KEY_PREFIX=kubecoderun:
+
+Requires a running TLS Redis Cluster on localhost:6380-6385.
+Start with: docker compose -f docker-compose.redis-cluster-tls.yml up -d
+
+Usage:
+    uv run python -m pytest tests/integration/test_redis_cluster_tls.py -v
+"""
+
+import os
+import ssl as ssl_mod
+from pathlib import Path
+
+import pytest
+import redis as sync_redis
+import redis.asyncio as async_redis
+from redis.asyncio.cluster import RedisCluster as AsyncRedisCluster
+from redis.cluster import ClusterNode, RedisCluster
+
+# ── Configuration matching production ────────────────────────────────────
+
+TLS_CLUSTER_HOST = os.environ.get("REDIS_TLS_CLUSTER_HOST", "127.0.0.1")
+TLS_CLUSTER_PORT = int(os.environ.get("REDIS_TLS_CLUSTER_PORT", "6380"))
+
+# CA cert path (relative to project root, same concept as production
+# REDIS_TLS_CA_CERT_FILE=/app/api/cache/redis-ca.crt)
+CERTS_DIR = Path(__file__).resolve().parent.parent / "tls-certs"
+CA_CERT_FILE = str(CERTS_DIR / "ca.crt")
+
+pytestmark = pytest.mark.integration
+
+
+def _tls_kwargs_production() -> dict:
+    """Build TLS kwargs matching production config.
+
+    This mirrors what RedisConfig.get_tls_kwargs() produces with:
+        REDIS_TLS_ENABLED=true
+        REDIS_TLS_INSECURE=false
+        REDIS_TLS_CHECK_HOSTNAME=false (default)
+        REDIS_TLS_CA_CERT_FILE=/path/to/ca.crt
+        REDIS_TLS_CERT_FILE=""  -> None
+        REDIS_TLS_KEY_FILE=""   -> None
+    """
+    return {
+        "ssl": True,
+        "ssl_cert_reqs": ssl_mod.CERT_REQUIRED,
+        "ssl_check_hostname": False,
+        "ssl_ca_certs": CA_CERT_FILE,
+    }
+
+
+def _tls_cluster_available() -> bool:
+    """Check if a TLS Redis Cluster is reachable."""
+    try:
+        rc = RedisCluster(
+            startup_nodes=[ClusterNode(host=TLS_CLUSTER_HOST, port=TLS_CLUSTER_PORT)],
+            decode_responses=True,
+            socket_timeout=3,
+            socket_connect_timeout=3,
+            **_tls_kwargs_production(),
+        )
+        rc.ping()
+        rc.close()
+        return True
+    except Exception:
+        return False
+
+
+skip_no_tls_cluster = pytest.mark.skipif(
+    not _tls_cluster_available(),
+    reason=f"TLS Redis Cluster not available at {TLS_CLUSTER_HOST}:{TLS_CLUSTER_PORT}",
+)
+
+
+# ── Synchronous TLS Cluster tests ────────────────────────────────────────
+
+
+@skip_no_tls_cluster
+class TestSyncTlsCluster:
+    """Synchronous redis-py with TLS (same path as config_validator)."""
+
+    def test_connect_single_startup_node_tls(self):
+        """TLS cluster discovery from a single startup node."""
+        rc = RedisCluster(
+            startup_nodes=[ClusterNode(host=TLS_CLUSTER_HOST, port=TLS_CLUSTER_PORT)],
+            decode_responses=True,
+            socket_timeout=5,
+            socket_connect_timeout=5,
+            **_tls_kwargs_production(),
+        )
+        assert rc.ping() is True
+        node_info = rc.cluster_info(target_nodes=RedisCluster.RANDOM)
+        assert node_info.get("cluster_state") == "ok"
+        rc.close()
+
+    def test_connect_no_password_tls(self):
+        """TLS cluster with password=None (production has REDIS_PASSWORD='')."""
+        rc = RedisCluster(
+            startup_nodes=[ClusterNode(host=TLS_CLUSTER_HOST, port=TLS_CLUSTER_PORT)],
+            password=None,
+            decode_responses=True,
+            socket_timeout=5,
+            **_tls_kwargs_production(),
+        )
+        assert rc.ping() is True
+        rc.close()
+
+    def test_set_get_across_slots_tls(self):
+        """SET/GET across cluster slots over TLS."""
+        rc = RedisCluster(
+            startup_nodes=[ClusterNode(host=TLS_CLUSTER_HOST, port=TLS_CLUSTER_PORT)],
+            decode_responses=True,
+            **_tls_kwargs_production(),
+        )
+        for i in range(10):
+            key = f"test:tls:cluster:{i}"
+            rc.set(key, f"value-{i}")
+            assert rc.get(key) == f"value-{i}"
+            rc.delete(key)
+        rc.close()
+
+    def test_key_prefix_operations_tls(self):
+        """Operations with kubecoderun: prefix (matching production key_prefix)."""
+        rc = RedisCluster(
+            startup_nodes=[ClusterNode(host=TLS_CLUSTER_HOST, port=TLS_CLUSTER_PORT)],
+            decode_responses=True,
+            **_tls_kwargs_production(),
+        )
+        prefix = "kubecoderun:"
+        key = f"{prefix}session:test-abc"
+        rc.set(key, "session-data")
+        assert rc.get(key) == "session-data"
+        rc.delete(key)
+        rc.close()
+
+
+# ── Asynchronous TLS Cluster tests ───────────────────────────────────────
+
+
+@skip_no_tls_cluster
+class TestAsyncTlsCluster:
+    """Async redis-py with TLS (same path as RedisPool._init_cluster)."""
+
+    @pytest.mark.asyncio
+    async def test_async_connect_tls(self):
+        """Async TLS cluster client connects and pings."""
+        from redis.backoff import ExponentialBackoff
+        from redis.exceptions import ConnectionError, TimeoutError
+        from redis.retry import Retry
+
+        rc = AsyncRedisCluster(
+            startup_nodes=[
+                async_redis.cluster.ClusterNode(host=TLS_CLUSTER_HOST, port=TLS_CLUSTER_PORT),
+            ],
+            password=None,
+            decode_responses=True,
+            max_connections=20,
+            socket_timeout=5.0,
+            socket_connect_timeout=5.0,
+            retry=Retry(ExponentialBackoff(), retries=3),
+            retry_on_error=[ConnectionError, TimeoutError],
+            **_tls_kwargs_production(),
+        )
+        assert await rc.ping() is True
+        await rc.aclose()
+
+    @pytest.mark.asyncio
+    async def test_async_set_get_tls(self):
+        """Async SET/GET over TLS cluster."""
+        rc = AsyncRedisCluster(
+            startup_nodes=[
+                async_redis.cluster.ClusterNode(host=TLS_CLUSTER_HOST, port=TLS_CLUSTER_PORT),
+            ],
+            decode_responses=True,
+            **_tls_kwargs_production(),
+        )
+        for i in range(10):
+            key = f"test:async:tls:{i}"
+            await rc.set(key, f"tls-value-{i}")
+            val = await rc.get(key)
+            assert val == f"tls-value-{i}"
+            await rc.delete(key)
+        await rc.aclose()
+
+    @pytest.mark.asyncio
+    async def test_async_prefixed_operations_tls(self):
+        """Async operations with production-like key prefix over TLS."""
+        rc = AsyncRedisCluster(
+            startup_nodes=[
+                async_redis.cluster.ClusterNode(host=TLS_CLUSTER_HOST, port=TLS_CLUSTER_PORT),
+            ],
+            decode_responses=True,
+            **_tls_kwargs_production(),
+        )
+        prefix = "kubecoderun:"
+        keys = [f"{prefix}session:{i}" for i in range(5)]
+        for key in keys:
+            await rc.set(key, "data")
+            assert await rc.get(key) == "data"
+        for key in keys:
+            await rc.delete(key)
+        await rc.aclose()
+
+
+# ── RedisPool with TLS Cluster ───────────────────────────────────────────
+
+
+@skip_no_tls_cluster
+class TestRedisPoolTlsCluster:
+    """Test RedisPool with TLS cluster backend — mirrors production config."""
+
+    @pytest.mark.asyncio
+    async def test_pool_tls_cluster_production_config(self, monkeypatch):
+        """RedisPool initializes with the exact production configuration.
+
+        Env vars set here match the user's Helm values:
+            REDIS_MODE: "cluster"
+            REDIS_HOST: <host>
+            REDIS_PORT: "6380"
+            REDIS_PASSWORD: ""
+            REDIS_DB: "0"
+            REDIS_MAX_CONNECTIONS: "20"
+            REDIS_SOCKET_TIMEOUT: "5"
+            REDIS_SOCKET_CONNECT_TIMEOUT: "5"
+            REDIS_KEY_PREFIX: "kubecoderun:"
+            REDIS_TLS_ENABLED: "true"
+            REDIS_TLS_CA_CERT_FILE: <ca cert path>
+            REDIS_TLS_CERT_FILE: ""
+            REDIS_TLS_KEY_FILE: ""
+            REDIS_TLS_INSECURE: "false"
+        """
+        # Set env vars exactly as Helm renders them in production
+        monkeypatch.setenv("REDIS_MODE", "cluster")
+        monkeypatch.setenv("REDIS_HOST", TLS_CLUSTER_HOST)
+        monkeypatch.setenv("REDIS_PORT", str(TLS_CLUSTER_PORT))
+        monkeypatch.setenv("REDIS_PASSWORD", "")  # empty -> None via validator
+        monkeypatch.setenv("REDIS_DB", "0")
+        monkeypatch.setenv("REDIS_MAX_CONNECTIONS", "20")
+        monkeypatch.setenv("REDIS_SOCKET_TIMEOUT", "5")
+        monkeypatch.setenv("REDIS_SOCKET_CONNECT_TIMEOUT", "5")
+        monkeypatch.setenv("REDIS_KEY_PREFIX", "kubecoderun:")
+        monkeypatch.setenv("REDIS_TLS_ENABLED", "true")
+        monkeypatch.setenv("REDIS_TLS_CA_CERT_FILE", CA_CERT_FILE)
+        monkeypatch.setenv("REDIS_TLS_CERT_FILE", "")  # no client cert
+        monkeypatch.setenv("REDIS_TLS_KEY_FILE", "")  # no client key
+        monkeypatch.setenv("REDIS_TLS_INSECURE", "false")
+        monkeypatch.setenv("REDIS_CLUSTER_NODES", "")  # empty -> None, fallback to host:port
+
+        from src.config import Settings
+
+        settings_obj = Settings()
+        cfg = settings_obj.redis
+
+        # Verify validators worked correctly
+        assert cfg.mode == "cluster"
+        assert cfg.host == TLS_CLUSTER_HOST
+        assert cfg.port == TLS_CLUSTER_PORT
+        assert cfg.password is None, f"Expected None, got {cfg.password!r}"
+        assert cfg.cluster_nodes is None, f"Expected None, got {cfg.cluster_nodes!r}"
+        assert cfg.tls_enabled is True
+        assert cfg.tls_ca_cert_file == CA_CERT_FILE
+        assert cfg.tls_cert_file is None or cfg.tls_cert_file == ""
+        assert cfg.tls_key_file is None or cfg.tls_key_file == ""
+        assert cfg.tls_insecure is False
+        assert cfg.tls_check_hostname is False  # default
+        assert cfg.key_prefix == "kubecoderun:"
+
+        # Verify TLS kwargs
+        tls_kwargs = cfg.get_tls_kwargs()
+        assert tls_kwargs["ssl"] is True
+        assert tls_kwargs["ssl_cert_reqs"] == ssl_mod.CERT_REQUIRED
+        assert tls_kwargs["ssl_check_hostname"] is False
+        assert tls_kwargs["ssl_ca_certs"] == CA_CERT_FILE
+        assert "ssl_certfile" not in tls_kwargs  # no client cert
+        assert "ssl_keyfile" not in tls_kwargs  # no client key
+
+        # Initialize pool
+        from src.core.pool import RedisPool
+
+        pool = RedisPool()
+        monkeypatch.setattr("src.core.pool.settings", settings_obj)
+        pool._initialize()
+
+        client = pool.get_client()
+        assert isinstance(client, AsyncRedisCluster)
+        assert pool.key_prefix == "kubecoderun:"
+
+        # Test operations with prefix
+        full_key = pool.make_key("session:test-tls")
+        assert full_key == "kubecoderun:session:test-tls"
+
+        await client.set(full_key, "tls-session-data")
+        val = await client.get(full_key)
+        assert val == "tls-session-data"
+        await client.delete(full_key)
+
+        await pool.close()
+
+    @pytest.mark.asyncio
+    async def test_pool_tls_cluster_without_key_prefix(self, monkeypatch):
+        """RedisPool works in TLS cluster mode without key prefix."""
+        monkeypatch.setenv("REDIS_MODE", "cluster")
+        monkeypatch.setenv("REDIS_HOST", TLS_CLUSTER_HOST)
+        monkeypatch.setenv("REDIS_PORT", str(TLS_CLUSTER_PORT))
+        monkeypatch.setenv("REDIS_PASSWORD", "")
+        monkeypatch.setenv("REDIS_KEY_PREFIX", "")
+        monkeypatch.setenv("REDIS_TLS_ENABLED", "true")
+        monkeypatch.setenv("REDIS_TLS_CA_CERT_FILE", CA_CERT_FILE)
+        monkeypatch.setenv("REDIS_TLS_INSECURE", "false")
+        monkeypatch.setenv("REDIS_CLUSTER_NODES", "")
+
+        from src.config import Settings
+        from src.core.pool import RedisPool
+
+        settings_obj = Settings()
+        pool = RedisPool()
+        monkeypatch.setattr("src.core.pool.settings", settings_obj)
+        pool._initialize()
+
+        client = pool.get_client()
+        assert pool.key_prefix == ""
+        assert pool.make_key("mykey") == "mykey"
+
+        await client.set("test:no-prefix:tls", "ok")
+        assert await client.get("test:no-prefix:tls") == "ok"
+        await client.delete("test:no-prefix:tls")
+        await pool.close()
+
+
+# ── ConfigValidator with TLS Cluster ─────────────────────────────────────
+
+
+@skip_no_tls_cluster
+class TestConfigValidatorTlsCluster:
+    """Test ConfigValidator._validate_redis_connection with TLS cluster."""
+
+    def test_validator_tls_cluster_production_config(self, monkeypatch):
+        """Config validator passes with production-like TLS cluster config."""
+        monkeypatch.setenv("REDIS_MODE", "cluster")
+        monkeypatch.setenv("REDIS_HOST", TLS_CLUSTER_HOST)
+        monkeypatch.setenv("REDIS_PORT", str(TLS_CLUSTER_PORT))
+        monkeypatch.setenv("REDIS_PASSWORD", "")
+        monkeypatch.setenv("REDIS_TLS_ENABLED", "true")
+        monkeypatch.setenv("REDIS_TLS_CA_CERT_FILE", CA_CERT_FILE)
+        monkeypatch.setenv("REDIS_TLS_CERT_FILE", "")
+        monkeypatch.setenv("REDIS_TLS_KEY_FILE", "")
+        monkeypatch.setenv("REDIS_TLS_INSECURE", "false")
+        monkeypatch.setenv("REDIS_CLUSTER_NODES", "")
+
+        from src.config import Settings
+
+        settings_obj = Settings()
+        monkeypatch.setattr("src.utils.config_validator.settings", settings_obj)
+
+        from src.utils.config_validator import ConfigValidator
+
+        validator = ConfigValidator()
+        validator._validate_redis_connection()
+
+        assert not validator.errors, f"Unexpected errors: {validator.errors}"
+
+    def test_validator_tls_cluster_bad_ca_cert_fails(self, monkeypatch):
+        """Config validator fails when CA cert path is wrong."""
+        monkeypatch.setenv("REDIS_MODE", "cluster")
+        monkeypatch.setenv("REDIS_HOST", TLS_CLUSTER_HOST)
+        monkeypatch.setenv("REDIS_PORT", str(TLS_CLUSTER_PORT))
+        monkeypatch.setenv("REDIS_PASSWORD", "")
+        monkeypatch.setenv("REDIS_TLS_ENABLED", "true")
+        monkeypatch.setenv("REDIS_TLS_CA_CERT_FILE", "/nonexistent/ca.crt")
+        monkeypatch.setenv("REDIS_TLS_INSECURE", "false")
+        monkeypatch.setenv("REDIS_CLUSTER_NODES", "")
+
+        from src.config import Settings
+
+        settings_obj = Settings()
+        monkeypatch.setattr("src.utils.config_validator.settings", settings_obj)
+
+        from src.utils.config_validator import ConfigValidator
+
+        validator = ConfigValidator()
+        validator._validate_redis_connection()
+
+        assert len(validator.errors) > 0, "Expected validation error for bad CA cert"
+
+
+# ── RedisConfig TLS kwargs verification ──────────────────────────────────
+
+
+@skip_no_tls_cluster
+class TestRedisConfigTlsKwargs:
+    """Verify RedisConfig.get_tls_kwargs() produces correct kwargs for production."""
+
+    def test_production_tls_kwargs(self, monkeypatch):
+        """get_tls_kwargs() output matches what RedisCluster needs for TLS."""
+        monkeypatch.setenv("REDIS_MODE", "cluster")
+        monkeypatch.setenv("REDIS_TLS_ENABLED", "true")
+        monkeypatch.setenv("REDIS_TLS_CA_CERT_FILE", CA_CERT_FILE)
+        monkeypatch.setenv("REDIS_TLS_CERT_FILE", "")
+        monkeypatch.setenv("REDIS_TLS_KEY_FILE", "")
+        monkeypatch.setenv("REDIS_TLS_INSECURE", "false")
+
+        from src.config.redis import RedisConfig
+
+        cfg = RedisConfig(
+            redis_mode="cluster",
+            redis_tls_enabled=True,
+            redis_tls_ca_cert_file=CA_CERT_FILE,
+            redis_tls_cert_file="",
+            redis_tls_key_file="",
+            redis_tls_insecure=False,
+        )
+        kwargs = cfg.get_tls_kwargs()
+
+        assert kwargs["ssl"] is True
+        assert kwargs["ssl_cert_reqs"] == ssl_mod.CERT_REQUIRED
+        assert kwargs["ssl_check_hostname"] is False
+        assert kwargs["ssl_ca_certs"] == CA_CERT_FILE
+        # Empty string cert/key files should NOT be in kwargs
+        assert "ssl_certfile" not in kwargs
+        assert "ssl_keyfile" not in kwargs
+
+    def test_tls_insecure_kwargs(self, monkeypatch):
+        """get_tls_kwargs() with insecure mode skips cert verification."""
+        from src.config.redis import RedisConfig
+
+        cfg = RedisConfig(
+            redis_mode="cluster",
+            redis_tls_enabled=True,
+            redis_tls_insecure=True,
+        )
+        kwargs = cfg.get_tls_kwargs()
+
+        assert kwargs["ssl"] is True
+        assert kwargs["ssl_cert_reqs"] == ssl_mod.CERT_NONE
+        assert kwargs["ssl_check_hostname"] is False
+
+    def test_tls_disabled_returns_empty(self):
+        """get_tls_kwargs() returns empty dict when TLS is off."""
+        from src.config.redis import RedisConfig
+
+        cfg = RedisConfig(redis_tls_enabled=False)
+        assert cfg.get_tls_kwargs() == {}
diff --git a/tests/tls-certs/.gitignore b/tests/tls-certs/.gitignore
new file mode 100644
index 0000000..3ba3676
--- /dev/null
+++ b/tests/tls-certs/.gitignore
@@ -0,0 +1,6 @@
+# Generated TLS certificates — do not commit
+*.key
+*.crt
+*.csr
+*.srl
+*.cnf
diff --git a/tests/tls-certs/cleanup.sh b/tests/tls-certs/cleanup.sh
new file mode 100644
index 0000000..340cc1d
--- /dev/null
+++ b/tests/tls-certs/cleanup.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+# Remove generated TLS certificates from tests/tls-certs/.
+#
+# Usage:
+#   cd tests/tls-certs && ./cleanup.sh
+set -euo pipefail
+cd "$(dirname "$0")"
+
+rm -f ca.key ca.crt ca.srl ca-ext.cnf
+rm -f redis.key redis.crt redis.csr redis-ext.cnf
+
+echo "TLS certificates cleaned up."
diff --git a/tests/tls-certs/generate.sh b/tests/tls-certs/generate.sh
new file mode 100644
index 0000000..2c62473
--- /dev/null
+++ b/tests/tls-certs/generate.sh
@@ -0,0 +1,94 @@
+#!/usr/bin/env bash
+# Generate self-signed TLS certificates for Redis Cluster integration testing.
+#
+# Creates:
+#   ca.key / ca.crt    — Certificate Authority (with keyUsage extensions for Python 3.14+)
+#   redis.key / redis.crt — Server cert signed by the CA (SANs for localhost + docker IPs)
+#
+# Usage:
+#   cd tests/tls-certs && ./generate.sh
+set -euo pipefail
+cd "$(dirname "$0")"
+
+echo "Generating CA key + certificate..."
+cat > ca-ext.cnf << 'EOF'
+[req]
+default_bits = 4096
+prompt = no
+distinguished_name = dn
+x509_extensions = v3_ca
+
+[dn]
+C = PT
+ST = Lisboa
+L = Lisboa
+O = NOS Testing
+CN = Redis Test CA
+
+[v3_ca]
+subjectKeyIdentifier = hash
+authorityKeyIdentifier = keyid:always,issuer
+basicConstraints = critical, CA:TRUE
+keyUsage = critical, keyCertSign, cRLSign
+EOF
+
+openssl genrsa -out ca.key 4096 2>/dev/null
+openssl req -x509 -new -nodes -key ca.key -sha256 -days 3650 \
+  -out ca.crt -config ca-ext.cnf 2>/dev/null
+
+echo "Generating server key + certificate..."
+cat > redis-ext.cnf << 'EOF'
+[req]
+default_bits = 2048
+prompt = no
+distinguished_name = dn
+req_extensions = v3_req
+
+[dn]
+C = PT
+ST = Lisboa
+L = Lisboa
+O = NOS Testing
+CN = redis-node
+
+[v3_req]
+subjectAltName = @alt_names
+basicConstraints = CA:FALSE
+keyUsage = digitalSignature, keyEncipherment
+extendedKeyUsage = serverAuth, clientAuth
+
+[alt_names]
+DNS.1 = redis-tls-node-0
+DNS.2 = redis-tls-node-1
+DNS.3 = redis-tls-node-2
+DNS.4 = redis-tls-node-3
+DNS.5 = redis-tls-node-4
+DNS.6 = redis-tls-node-5
+DNS.7 = localhost
+IP.1 = 127.0.0.1
+IP.2 = 172.17.0.1
+IP.3 = 172.18.0.1
+IP.4 = 172.19.0.1
+IP.5 = 172.20.0.1
+IP.6 = 172.21.0.1
+IP.7 = 172.22.0.1
+IP.8 = 172.23.0.1
+IP.9 = 172.24.0.1
+IP.10 = 172.25.0.1
+EOF
+
+openssl genrsa -out redis.key 2048 2>/dev/null
+openssl req -new -key redis.key -out redis.csr -config redis-ext.cnf 2>/dev/null
+openssl x509 -req -in redis.csr -CA ca.crt -CAkey ca.key -CAcreateserial \
+  -out redis.crt -days 3650 -sha256 \
+  -extfile redis-ext.cnf -extensions v3_req 2>/dev/null
+
+# Redis needs world-readable key files (containers run as redis user)
+chmod 644 redis.key
+# CA private key should stay restricted — it is not needed by Redis containers
+chmod 600 ca.key
+
+echo "Verifying certificate chain..."
+openssl verify -CAfile ca.crt redis.crt
+
+echo "Done. Certificates generated in $(pwd)/"
diff --git a/tests/unit/test_cluster_pipeline_compat.py b/tests/unit/test_cluster_pipeline_compat.py
new file mode 100644
index 0000000..b308e8f
--- /dev/null
+++ b/tests/unit/test_cluster_pipeline_compat.py
@@ -0,0 +1,244 @@
+"""Unit tests verifying that all Redis pipelines use transaction=False.
+
+Redis Cluster does not support MULTI/EXEC transactions across keys in
+different hash slots.  Every pipeline that touches keys with different
+prefixes (e.g. session data + session index) MUST use transaction=False
+so redis-py's ClusterPipeline can split commands by node.
+
+These tests act as a safety net: if someone accidentally changes a
+pipeline back to transaction=True, the test will catch it.
+"""
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from src.models.session import SessionCreate
+from src.services.api_key_manager import ApiKeyManagerService
+from src.services.session import SessionService
+from src.services.state import StateService
+
+# ── Session Service ─────────────────────────────────────────────────────
+
+
+@pytest.fixture
+def mock_redis_session():
+    """Mock Redis client for session tests."""
+    redis_mock = AsyncMock()
+
+    pipeline_mock = AsyncMock()
+    pipeline_mock.hset = MagicMock()
+    pipeline_mock.expire = MagicMock()
+    pipeline_mock.sadd = MagicMock()
+    pipeline_mock.delete = MagicMock()
+    pipeline_mock.srem = MagicMock()
+    pipeline_mock.execute = AsyncMock(return_value=[True, True, True])
+    pipeline_mock.reset = AsyncMock()
+
+    redis_mock.pipeline = MagicMock(return_value=pipeline_mock)
+    redis_mock.hgetall = AsyncMock(return_value={})
+    return redis_mock
+
+
+@pytest.fixture
+def session_service(mock_redis_session):
+    return SessionService(redis_client=mock_redis_session)
+
+
+@pytest.mark.asyncio
+async def test_session_create_uses_non_transactional_pipeline(session_service, mock_redis_session):
+    """create_session() must use transaction=False for cluster compat."""
+    request = SessionCreate(metadata={"test": "value"})
+    await session_service.create_session(request)
+
+    mock_redis_session.pipeline.assert_called_once_with(transaction=False)
+
+
+@pytest.mark.asyncio
+async def test_session_delete_uses_non_transactional_pipeline(session_service, mock_redis_session):
+    """delete_session() must use transaction=False for cluster compat."""
+    session_id = "session-to-delete"
+    # Provide minimal session data so delete_session finds the session
+    mock_redis_session.hgetall.return_value = {
+        "session_id": session_id,
+        "status": "active",
+        "created_at": "2025-01-01T00:00:00",
+        "last_activity": "2025-01-01T00:00:00",
+        "expires_at": "2026-01-01T00:00:00",
+        "files": "{}",
+        "metadata": "{}",
+        "working_directory": "/workspace",
+    }
+
+    pipeline_mock = mock_redis_session.pipeline.return_value
+    pipeline_mock.execute = AsyncMock(return_value=[1, 1])
+
+    await session_service.delete_session(session_id)
+
+    mock_redis_session.pipeline.assert_called_with(transaction=False)
+
+
+# ── API Key Manager ─────────────────────────────────────────────────────
+
+
+@pytest.fixture
+def mock_redis_apikey():
+    """Mock Redis client for API key manager tests."""
+    redis_mock = AsyncMock()
+    redis_mock.hgetall = AsyncMock(return_value={})
+    redis_mock.hset = AsyncMock(return_value=1)
+    redis_mock.exists = AsyncMock(return_value=True)
+    redis_mock.delete = AsyncMock(return_value=1)
+    redis_mock.sadd = AsyncMock(return_value=1)
+    redis_mock.srem = AsyncMock(return_value=1)
+    redis_mock.smembers = AsyncMock(return_value=set())
+    redis_mock.get = AsyncMock(return_value=None)
+    redis_mock.setex = AsyncMock(return_value=True)
+    redis_mock.incr = AsyncMock(return_value=1)
+    redis_mock.expire = AsyncMock(return_value=True)
+    redis_mock.hincrby = AsyncMock(return_value=1)
+
+    pipeline_mock = AsyncMock()
+    pipeline_mock.hset = MagicMock()
+    pipeline_mock.sadd = MagicMock()
+    pipeline_mock.delete = MagicMock()
+    pipeline_mock.srem = MagicMock()
+    pipeline_mock.incr = MagicMock()
+    pipeline_mock.expire = MagicMock()
+    pipeline_mock.hincrby = MagicMock()
+    pipeline_mock.execute = AsyncMock(return_value=[True, True, True])
+    redis_mock.pipeline = MagicMock(return_value=pipeline_mock)
+
+    return redis_mock
+
+
+@pytest.fixture
+def api_key_manager(mock_redis_apikey):
+    return ApiKeyManagerService(redis_client=mock_redis_apikey)
+
+
+@pytest.mark.asyncio
+async def test_create_key_uses_non_transactional_pipeline(api_key_manager, mock_redis_apikey):
+    """create_key() must use transaction=False for cluster compat."""
+    result = await api_key_manager.create_key(
+        name="test-key",
+    )
+
+    # create_key calls pipeline at least once
+    mock_redis_apikey.pipeline.assert_called()
+    for call in mock_redis_apikey.pipeline.call_args_list:
+        assert call == ((), {"transaction": False}), f"Expected pipeline(transaction=False), got {call}"
+
+
+@pytest.mark.asyncio
+async def test_ensure_single_env_key_uses_non_transactional_pipeline(api_key_manager, mock_redis_apikey):
+    """_ensure_single_env_key_record() must use transaction=False."""
+    # Call the internal method directly
+    await api_key_manager._ensure_single_env_key_record("test-hash", "test-env")
+
+    mock_redis_apikey.pipeline.assert_called()
+    for call in mock_redis_apikey.pipeline.call_args_list:
+        assert call == ((), {"transaction": False}), f"Expected pipeline(transaction=False), got {call}"
+
+
+@pytest.mark.asyncio
+async def test_revoke_key_uses_non_transactional_pipeline(api_key_manager, mock_redis_apikey):
+    """revoke_key() must use transaction=False for cluster compat."""
+    # Setup: make the key "exist" so revoke proceeds
+    mock_redis_apikey.hgetall.return_value = {
+        "name": "test-key",
+        "key_hash": "abc123",
+        "environment": "test",
+        "status": "active",
+        "created_at": "2025-01-01T00:00:00+00:00",
+    }
+    mock_redis_apikey.exists.return_value = True
+
+    await api_key_manager.revoke_key("abc123")
+
+    mock_redis_apikey.pipeline.assert_called()
+    for call in mock_redis_apikey.pipeline.call_args_list:
+        assert call == ((), {"transaction": False}), f"Expected pipeline(transaction=False), got {call}"
+
+
+# ── State Service ───────────────────────────────────────────────────────
+
+
+@pytest.fixture
+def mock_redis_state():
+    """Mock Redis client for state service tests."""
+    client = AsyncMock()
+    client.get = AsyncMock(return_value=None)
+    client.setex = AsyncMock()
+    client.delete = AsyncMock()
+    client.strlen = AsyncMock(return_value=0)
+    client.ttl = AsyncMock(return_value=-1)
+    client.expire = AsyncMock()
+
+    pipeline_mock = AsyncMock()
+    pipeline_mock.set = MagicMock()
+    pipeline_mock.setex = MagicMock()
+    pipeline_mock.expire = MagicMock()
+    pipeline_mock.execute = AsyncMock(return_value=[True, True, True, True, True])
+    client.pipeline = MagicMock(return_value=pipeline_mock)
+
+    return client
+
+
+@pytest.fixture
+def state_service(mock_redis_state):
+    with patch("src.services.state.redis_pool") as mock_pool:
+        mock_pool.get_client.return_value = mock_redis_state
+        service = StateService(redis_client=mock_redis_state)
+        return service
+
+
+@pytest.mark.asyncio
+async def test_save_state_uses_non_transactional_pipeline(state_service, mock_redis_state):
+    """save_state() must use transaction=False for cluster compat."""
+    import base64
+
+    session_id = "state-test-session"
+    raw_bytes = b"\x02test state data"
+    state_b64 = base64.b64encode(raw_bytes).decode("utf-8")
+
+    await state_service.save_state(session_id, state_b64)
+
+    mock_redis_state.pipeline.assert_called()
+    for call in mock_redis_state.pipeline.call_args_list:
+        assert call == ((), {"transaction": False}), f"Expected pipeline(transaction=False), got {call}"
+
+
+# ── Version resolution ──────────────────────────────────────────────────
+
+
+class TestVersionResolution:
+    """Tests for SERVICE_VERSION env var override."""
+
+    def test_logging_uses_service_version_when_set(self):
+        """add_service_context should prefer settings.service_version."""
+        with (
+            patch("src.utils.logging.settings") as mock_settings,
+            patch("src.utils.logging.__version__", "0.0.0.dev0"),
+        ):
+            mock_settings.service_version = "2.1.4"
+            from src.utils.logging import add_service_context
+
+            event_dict = {}
+            add_service_context(None, None, event_dict)
+
+            assert event_dict["version"] == "2.1.4"
+
+    def test_logging_falls_back_to_build_version(self):
+        """add_service_context should fall back to __version__ when SERVICE_VERSION unset."""
+        with (
+            patch("src.utils.logging.settings") as mock_settings,
+            patch("src.utils.logging.__version__", "1.2.3"),
+        ):
+            mock_settings.service_version = None
+            from src.utils.logging import add_service_context
+
+            event_dict = {}
+            add_service_context(None, None, event_dict)
+
+            assert event_dict["version"] == "1.2.3"
diff --git a/tests/unit/test_core_pool.py b/tests/unit/test_core_pool.py
index 9d486a8..8d46d60 100644
--- a/tests/unit/test_core_pool.py
+++ b/tests/unit/test_core_pool.py
@@ -51,25 +51,60 @@ def test_initialize_creates_pool(self):
         assert pool._initialized is True
         assert pool._client is not None
 
-    def test_initialize_fallback_on_error(self):
-        """Test _initialize creates fallback client on error."""
+    def test_initialize_raises_on_error(self):
+        """Test _initialize propagates errors instead of silently falling back."""
         pool = RedisPool()
 
         with patch("src.core.pool.settings") as mock_settings:
-            mock_settings.get_redis_url.side_effect = Exception("Connection failed")
-
-            with patch("src.core.pool.redis.from_url") as mock_from_url:
-                mock_from_url.return_value = MagicMock()
-
+            mock_settings.redis.mode = "standalone"
+            mock_settings.redis.get_url.side_effect = Exception("Connection failed")
+            mock_settings.redis.get_tls_kwargs.return_value = {}
+            mock_settings.redis.key_prefix = ""
+            mock_settings.redis.max_connections = 20
+            mock_settings.redis.socket_timeout = 5
+            mock_settings.redis.socket_connect_timeout = 5
+
+            with pytest.raises(Exception, match="Connection failed"):
                 pool._initialize()
 
-        assert pool._initialized is True
-        assert pool._client is not None
+        assert pool._initialized is False
+        assert pool._client is None
 
 
 class TestGetClient:
     """Tests for get_client method."""
 
+    def test_init_cluster_does_not_pass_retry_on_timeout(self):
+        """Test _init_cluster uses retry/retry_on_error instead of retry_on_timeout.
+
+        RedisCluster (async) does not accept retry_on_timeout as a kwarg.
+        """
+        pool = RedisPool()
+
+        with patch("src.core.pool.settings") as mock_settings:
+            cfg = mock_settings.redis
+            cfg.mode = "cluster"
+            cfg.host = "redis-host"
+            cfg.port = 6379
+            cfg.password = None
+            cfg.cluster_nodes = None
+            cfg.key_prefix = ""
+            cfg.tls_enabled = False
+            cfg.max_connections = 20
+            cfg.socket_timeout = 5
+            cfg.socket_connect_timeout = 5
+            cfg.get_tls_kwargs.return_value = {}
+
+            with patch("src.core.pool.RedisCluster") as mock_cluster:
+                mock_cluster.return_value = MagicMock()
+                pool._initialize()
+
+                mock_cluster.assert_called_once()
+                call_kwargs = mock_cluster.call_args[1]
+                assert "retry_on_timeout" not in call_kwargs, "RedisCluster does not accept retry_on_timeout"
+                assert "retry" in call_kwargs
+                assert "retry_on_error" in call_kwargs
+
     def test_get_client_initializes_if_needed(self):
         """Test get_client initializes the pool if not initialized."""
         pool = RedisPool()
@@ -106,7 +141,7 @@ def test_pool_stats_not_initialized(self):
 
         stats = pool.pool_stats
 
-        assert stats == {"initialized": False}
+        assert stats == {"initialized": False, "mode": "standalone"}
 
     def test_pool_stats_initialized(self):
         """Test pool_stats when pool is initialized."""
@@ -114,10 +149,12 @@ def test_pool_stats_initialized(self):
         mock_pool = MagicMock()
         mock_pool.max_connections = 20
         pool._pool = mock_pool
+        pool._initialized = True
 
         stats = pool.pool_stats
 
         assert stats["initialized"] is True
+        assert stats["mode"] == "standalone"
         assert stats["max_connections"] == 20
 
 
diff --git a/tests/unit/test_job_executor.py b/tests/unit/test_job_executor.py
index 1036c66..d07bc0a 100644
--- a/tests/unit/test_job_executor.py
+++ b/tests/unit/test_job_executor.py
@@ -62,7 +62,7 @@ def test_init_with_defaults(self):
             assert executor.namespace == "default"
             assert executor.ttl_seconds_after_finished == 60
             assert executor.active_deadline_seconds == 300
-            assert executor.sidecar_image == "aronmuon/kubecoderun-sidecar:latest"
+            assert executor.sidecar_image == "aronmuon/kubecoderun-sidecar-agent:latest"
 
     def test_init_with_custom_values(self):
         """Test initialization with custom values."""
@@ -499,3 +499,90 @@ async def test_execute_with_job_cleanup_on_error(self, job_executor, pod_spec, j
 
                     # Give asyncio.create_task time to schedule
                     await asyncio.sleep(0.1)
+
+
+class TestCreateJobPassesAllPodSpecFields:
+    """Tests that create_job passes all PodSpec fields to create_job_manifest."""
+
+    @pytest.mark.asyncio
+    async def test_create_job_passes_image_pull_secrets(self, job_executor):
+        """Test that image_pull_secrets are forwarded to create_job_manifest."""
+        spec = PodSpec(
+            image="python:3.11",
+            language="python",
+            namespace="test-namespace",
+            image_pull_secrets=["my-registry-secret", "other-secret"],
+        )
+        mock_batch_api = MagicMock()
+        mock_job = MagicMock()
+        mock_job.metadata.uid = "job-uid-123"
+        mock_batch_api.create_namespaced_job.return_value = mock_job
+
+        with patch("src.services.kubernetes.job_executor.get_batch_api", return_value=mock_batch_api):
+            with patch("src.services.kubernetes.job_executor.create_job_manifest", return_value={}) as mock_manifest:
+                await job_executor.create_job(spec, "session-123")
+
+        _, kwargs = mock_manifest.call_args
+        assert kwargs["image_pull_secrets"] == ["my-registry-secret", "other-secret"]
+
+    @pytest.mark.asyncio
+    async def test_create_job_passes_image_pull_policy(self, job_executor):
+        """Test that image_pull_policy is forwarded to create_job_manifest."""
+        spec = PodSpec(
+            image="python:3.11",
+            language="python",
+            namespace="test-namespace",
+            image_pull_policy="IfNotPresent",
+        )
+        mock_batch_api = MagicMock()
+        mock_job = MagicMock()
+        mock_job.metadata.uid = "job-uid-123"
+        mock_batch_api.create_namespaced_job.return_value = mock_job
+
+        with patch("src.services.kubernetes.job_executor.get_batch_api", return_value=mock_batch_api):
+            with patch("src.services.kubernetes.job_executor.create_job_manifest", return_value={}) as mock_manifest:
+                await job_executor.create_job(spec, "session-123")
+
+        _, kwargs = mock_manifest.call_args
+        assert kwargs["image_pull_policy"] == "IfNotPresent"
+
+    @pytest.mark.asyncio
+    async def test_create_job_passes_execution_mode(self, job_executor):
+        """Test that execution_mode is forwarded to create_job_manifest."""
+        spec = PodSpec(
+            image="python:3.11",
+            language="python",
+            namespace="test-namespace",
+            execution_mode="nsenter",
+        )
+        mock_batch_api = MagicMock()
+        mock_job = MagicMock()
+        mock_job.metadata.uid = "job-uid-123"
+        mock_batch_api.create_namespaced_job.return_value = mock_job
+
+        with patch("src.services.kubernetes.job_executor.get_batch_api", return_value=mock_batch_api):
+            with patch("src.services.kubernetes.job_executor.create_job_manifest", return_value={}) as mock_manifest:
+                await job_executor.create_job(spec, "session-123")
+
+        _, kwargs = mock_manifest.call_args
+        assert kwargs["execution_mode"] == "nsenter"
+
+    @pytest.mark.asyncio
+    async def test_create_job_no_image_pull_secrets_by_default(self, job_executor):
+        """Test that image_pull_secrets defaults to None."""
+        spec = PodSpec(
+            image="python:3.11",
+            language="python",
+            namespace="test-namespace",
+        )
+        mock_batch_api = MagicMock()
+        mock_job = MagicMock()
+        mock_job.metadata.uid = "job-uid-123"
+        mock_batch_api.create_namespaced_job.return_value = mock_job
+
+        with patch("src.services.kubernetes.job_executor.get_batch_api", return_value=mock_batch_api):
+            with patch("src.services.kubernetes.job_executor.create_job_manifest", return_value={}) as mock_manifest:
+                await job_executor.create_job(spec, "session-123")
+
+        _, kwargs = mock_manifest.call_args
+        assert kwargs["image_pull_secrets"] is None
diff --git a/tests/unit/test_kubernetes_client.py b/tests/unit/test_kubernetes_client.py
index 35ef7b3..d0e145d 100644
--- a/tests/unit/test_kubernetes_client.py
+++ b/tests/unit/test_kubernetes_client.py
@@ -417,6 +417,102 @@ def test_create_pod_manifest_security_context(self):
         assert main_container.security_context.run_as_user == 1001
         assert main_container.security_context.run_as_non_root is True
 
+    def test_create_pod_manifest_agent_mode_default(self):
+        """Test that agent mode is the default execution mode."""
+        pod = client.create_pod_manifest(
+            name="test-pod",
+            namespace="test-ns",
+            main_image="python:3.12",
+            sidecar_image="sidecar:latest",
+            language="python",
+            labels={"app": "test"},
+        )
+
+        # Agent mode: no shareProcessNamespace
+        assert pod.spec.share_process_namespace is False
+
+        # Agent mode: init container copies executor agent to shared volume
+        assert pod.spec.init_containers is not None
+        assert len(pod.spec.init_containers) == 1
+        init_container = pod.spec.init_containers[0]
+        assert init_container.name == "agent-init"
+        assert init_container.command[0] == "python"
+        assert "/opt/executor-agent" in init_container.command[2]
+        assert "/mnt/data/.executor-agent" in init_container.command[2]
+
+        # Agent mode: main container runs executor agent with port
+        main_container = next(c for c in pod.spec.containers if c.name == "main")
+        assert main_container.args == ["/mnt/data/.executor-agent", "--port", "9090"]
+
+        # Agent mode: sidecar has EXECUTION_MODE and EXECUTOR_PORT env vars
+        sidecar = next(c for c in pod.spec.containers if c.name == "sidecar")
+        env_dict = {e.name: e.value for e in sidecar.env}
+        assert env_dict["EXECUTION_MODE"] == "agent"
+        assert env_dict["EXECUTOR_PORT"] == "9090"
+
+        # Agent mode: no capabilities, no privilege escalation for sidecar
+        assert sidecar.security_context.allow_privilege_escalation is False
+        assert sidecar.security_context.capabilities.drop == ["ALL"]
+        assert sidecar.security_context.capabilities.add is None
+
+        # Agent mode: no capabilities, no privilege escalation for main
+        assert main_container.security_context.allow_privilege_escalation is False
+        assert main_container.security_context.capabilities.drop == ["ALL"]
+
+        # Agent mode: init container also has minimal security
+        assert init_container.security_context.allow_privilege_escalation is False
+        assert init_container.security_context.capabilities.drop == ["ALL"]
+
+    def test_create_pod_manifest_nsenter_mode(self):
+        """Test nsenter mode has the required capabilities and settings."""
+        pod = client.create_pod_manifest(
+            name="test-pod",
+            namespace="test-ns",
+            main_image="python:3.12",
+            sidecar_image="sidecar:latest",
+            language="python",
+            labels={"app": "test"},
+            execution_mode="nsenter",
+        )
+
+        # nsenter mode: shareProcessNamespace required
+        assert pod.spec.share_process_namespace is True
+
+        # nsenter mode: no init containers
+        assert pod.spec.init_containers is None
+
+        # nsenter mode: main container uses default CMD (no args override)
+        main_container = next(c for c in pod.spec.containers if c.name == "main")
+        assert main_container.args is None
+
+        # nsenter mode: sidecar has elevated privileges
+        sidecar = next(c for c in pod.spec.containers if c.name == "sidecar")
+        assert sidecar.security_context.allow_privilege_escalation is True
+        assert set(sidecar.security_context.capabilities.add) == {"SYS_PTRACE", "SYS_ADMIN", "SYS_CHROOT"}
+
+        # nsenter mode: EXECUTION_MODE is set to nsenter
+        env_dict = {e.name: e.value for e in sidecar.env}
+        assert env_dict["EXECUTION_MODE"] == "nsenter"
+        # nsenter mode: EXECUTOR_PORT is still present (used by both modes)
+        assert env_dict["EXECUTOR_PORT"] == "9090"
+
+    def test_create_pod_manifest_agent_mode_executor_port(self):
+        """Test that agent mode uses the configured executor port."""
+        pod = client.create_pod_manifest(
+            name="test-pod",
+            namespace="test-ns",
+            main_image="python:3.12",
+            sidecar_image="sidecar:latest",
+            language="python",
+            labels={"app": "test"},
+            execution_mode="agent",
+            executor_port=8888,
+        )
+
+        sidecar = next(c for c in pod.spec.containers if c.name == "sidecar")
+        env_dict = {e.name: e.value for e in sidecar.env}
+        assert env_dict["EXECUTOR_PORT"] == "8888"
+
     def test_create_pod_manifest_seccomp_profile_default(self):
         """Test pod manifest has RuntimeDefault seccomp profile by default."""
         pod = client.create_pod_manifest(
@@ -509,3 +605,246 @@ def test_create_pod_manifest_network_isolated_default(self):
         env_dict = {e.name: e.value for e in sidecar.env}
         assert "NETWORK_ISOLATED" in env_dict
         assert env_dict["NETWORK_ISOLATED"] == "false"
+
+    def test_create_pod_manifest_gke_sandbox_enabled(self):
+        """Test GKE Sandbox adds runtime class, node selector, tolerations, and annotation."""
+        pod = client.create_pod_manifest(
+            name="test-pod",
+            namespace="test-ns",
+            main_image="python:3.12",
+            sidecar_image="sidecar:latest",
+            language="python",
+            labels={"app": "test"},
+            gke_sandbox_enabled=True,
+        )
+
+        # Runtime class
+        assert pod.spec.runtime_class_name == "gvisor"
+
+        # Node selector
+        assert pod.spec.node_selector is not None
+        assert pod.spec.node_selector["sandbox.gke.io/runtime"] == "gvisor"
+
+        # Tolerations
+        assert pod.spec.tolerations is not None
+        assert len(pod.spec.tolerations) == 1
+        tol = pod.spec.tolerations[0]
+        assert tol.key == "sandbox.gke.io/runtime"
+        assert tol.operator == "Equal"
+        assert tol.value == "gvisor"
+        assert tol.effect == "NoSchedule"
+
+        # Annotation
+        assert pod.metadata.annotations["sandbox.gke.io/runtime"] == "gvisor"
+
+    def test_create_pod_manifest_gke_sandbox_disabled(self):
+        """Test GKE Sandbox disabled has no runtime class, node selector, or tolerations."""
+        pod = client.create_pod_manifest(
+            name="test-pod",
+            namespace="test-ns",
+            main_image="python:3.12",
+            sidecar_image="sidecar:latest",
+            language="python",
+            labels={"app": "test"},
+            gke_sandbox_enabled=False,
+        )
+
+        assert pod.spec.runtime_class_name is None
+        assert pod.spec.node_selector is None
+        assert pod.spec.tolerations is None
+
+    def test_create_pod_manifest_gke_sandbox_custom_runtime_class(self):
+        """Test GKE Sandbox with custom runtime class name."""
+        pod = client.create_pod_manifest(
+            name="test-pod",
+            namespace="test-ns",
+            main_image="python:3.12",
+            sidecar_image="sidecar:latest",
+            language="python",
+            labels={"app": "test"},
+            gke_sandbox_enabled=True,
+            runtime_class_name="custom-runtime",
+        )
+
+        assert pod.spec.runtime_class_name == "custom-runtime"
+
+    def test_create_pod_manifest_gke_sandbox_custom_node_selector(self):
+        """Test GKE Sandbox with additional custom node selector."""
+        pod = client.create_pod_manifest(
+            name="test-pod",
+            namespace="test-ns",
+            main_image="python:3.12",
+            sidecar_image="sidecar:latest",
+            language="python",
+            labels={"app": "test"},
+            gke_sandbox_enabled=True,
+            sandbox_node_selector={"pool": "sandbox"},
+        )
+
+        assert pod.spec.node_selector["sandbox.gke.io/runtime"] == "gvisor"
+        assert pod.spec.node_selector["pool"] == "sandbox"
+
+    def test_create_pod_manifest_gke_sandbox_custom_tolerations(self):
+        """Test GKE Sandbox with additional custom tolerations."""
+        pod = client.create_pod_manifest(
+            name="test-pod",
+            namespace="test-ns",
+            main_image="python:3.12",
+            sidecar_image="sidecar:latest",
+            language="python",
+            labels={"app": "test"},
+            gke_sandbox_enabled=True,
+            custom_tolerations=[{"key": "pool", "value": "sandbox"}],
+        )
+
+        # Should have both the GKE default + custom toleration
+        assert len(pod.spec.tolerations) == 2
+        keys = [t.key for t in pod.spec.tolerations]
+        assert "sandbox.gke.io/runtime" in keys
+        assert "pool" in keys
+
+    def test_create_pod_manifest_image_pull_secrets(self):
+        """Test pod manifest with image pull secrets."""
+        pod = client.create_pod_manifest(
+            name="test-pod",
+            namespace="test-ns",
+            main_image="python:3.12",
+            sidecar_image="sidecar:latest",
+            language="python",
+            labels={"app": "test"},
+            image_pull_secrets=["my-registry-secret", "other-secret"],
+        )
+
+        assert pod.spec.image_pull_secrets is not None
+        assert len(pod.spec.image_pull_secrets) == 2
+        secret_names = [s.name for s in pod.spec.image_pull_secrets]
+        assert "my-registry-secret" in secret_names
+        assert "other-secret" in secret_names
+
+    def test_create_pod_manifest_no_image_pull_secrets(self):
+        """Test pod manifest without image pull secrets."""
+        pod = client.create_pod_manifest(
+            name="test-pod",
+            namespace="test-ns",
+            main_image="python:3.12",
+            sidecar_image="sidecar:latest",
+            language="python",
+            labels={"app": "test"},
+        )
+
+        assert pod.spec.image_pull_secrets is None
+
+    def test_create_pod_manifest_gke_sandbox_with_annotations(self):
+        """Test GKE Sandbox merges with existing annotations."""
+        pod = client.create_pod_manifest(
+            name="test-pod",
+            namespace="test-ns",
+            main_image="python:3.12",
+            sidecar_image="sidecar:latest",
+            language="python",
+            labels={"app": "test"},
+            annotations={"custom": "value"},
+            gke_sandbox_enabled=True,
+        )
+
+        assert pod.metadata.annotations["custom"] == "value"
+        assert pod.metadata.annotations["sandbox.gke.io/runtime"] == "gvisor"
+
+    def test_create_pod_manifest_gke_sandbox_requires_agent_mode(self):
+        """Test that GKE Sandbox works with agent mode (default)."""
+        pod = client.create_pod_manifest(
+            name="test-pod",
+            namespace="test-ns",
+            main_image="python:3.12",
+            sidecar_image="sidecar:latest",
+            language="python",
+            labels={"app": "test"},
+            gke_sandbox_enabled=True,
+            execution_mode="agent",
+        )
+
+        # Should have both GKE Sandbox and agent mode features
+        assert pod.spec.runtime_class_name == "gvisor"
+        assert pod.spec.share_process_namespace is False
+        assert pod.spec.init_containers is not None
+
+        # Sidecar should have minimal privileges (agent mode)
+        sidecar = next(c for c in pod.spec.containers if c.name == "sidecar")
+        assert sidecar.security_context.allow_privilege_escalation is False
+
+    def test_create_pod_manifest_gke_sandbox_warns_on_nsenter_mode(self):
+        """Test that GKE Sandbox with nsenter mode logs a warning."""
+        with patch("src.services.kubernetes.client.logger") as mock_logger:
+            pod = client.create_pod_manifest(
+                name="test-pod",
+                namespace="test-ns",
+                main_image="python:3.12",
+                sidecar_image="sidecar:latest",
+                language="python",
+                labels={"app": "test"},
+                gke_sandbox_enabled=True,
+                execution_mode="nsenter",
+            )
+
+            # Should still create the pod (warning, not error)
+            assert pod.spec.runtime_class_name == "gvisor"
+
+            # nsenter mode features should be present
+            assert pod.spec.share_process_namespace is True
+            assert pod.spec.init_containers is None
+
+            # Sidecar should have elevated privileges (nsenter mode)
+            sidecar = next(c for c in pod.spec.containers if c.name == "sidecar")
+            assert sidecar.security_context.allow_privilege_escalation is True
+
+            # Should have logged a warning about incompatibility
+            mock_logger.warning.assert_called_once()
+            warning_msg = mock_logger.warning.call_args[0][0]
+            assert "gVisor" in warning_msg or "GKE Sandbox" in warning_msg
+
+    def test_create_pod_manifest_annotations_not_mutated(self):
+        """Test that the caller's annotations dict is not mutated by GKE Sandbox."""
+        original_annotations = {"custom": "value"}
+        annotations_copy = dict(original_annotations)
+
+        client.create_pod_manifest(
+            name="test-pod",
+            namespace="test-ns",
+            main_image="python:3.12",
+            sidecar_image="sidecar:latest",
+            language="python",
+            labels={"app": "test"},
+            annotations=original_annotations,
+            gke_sandbox_enabled=True,
+        )
+
+        # Original dict must be unchanged
+        assert original_annotations == annotations_copy
+
+    def test_create_pod_manifest_custom_tolerations_missing_key_skipped(self):
+        """Test that custom tolerations without a 'key' field are skipped with a warning."""
+        with patch("src.services.kubernetes.client.logger") as mock_logger:
+            pod = client.create_pod_manifest(
+                name="test-pod",
+                namespace="test-ns",
+                main_image="python:3.12",
+                sidecar_image="sidecar:latest",
+                language="python",
+                labels={"app": "test"},
+                gke_sandbox_enabled=True,
+                custom_tolerations=[
+                    {"key": "pool", "value": "sandbox"},
+                    {"operator": "Exists", "effect": "NoSchedule"},  # missing key
+                    {"key": "other", "value": "val"},
+                ],
+            )
+
+            # The GKE default + 2 valid custom (the one missing 'key' is skipped)
+            assert len(pod.spec.tolerations) == 3
+            keys = [t.key for t in pod.spec.tolerations]
+            assert "pool" in keys
+            assert "other" in keys
+            assert "sandbox.gke.io/runtime" in keys
+
+            # Should warn about the skipped toleration
+            mock_logger.warning.assert_called_once()
diff --git a/tests/unit/test_session_service.py b/tests/unit/test_session_service.py
index 579e571..f4e205a 100644
--- a/tests/unit/test_session_service.py
+++ b/tests/unit/test_session_service.py
@@ -26,8 +26,8 @@ def mock_redis():
     pipeline_mock.execute = AsyncMock(return_value=[True, True, True])
     pipeline_mock.reset = AsyncMock()
 
-    # Make pipeline() return the pipeline mock when awaited
-    redis_mock.pipeline = AsyncMock(return_value=pipeline_mock)
+    # Make pipeline() return the pipeline mock (synchronous, like redis.asyncio)
+    redis_mock.pipeline = MagicMock(return_value=pipeline_mock)
     return redis_mock
 
 
diff --git a/tests/unit/test_settings_validators.py b/tests/unit/test_settings_validators.py
index 01744c4..496eeda 100644
--- a/tests/unit/test_settings_validators.py
+++ b/tests/unit/test_settings_validators.py
@@ -3,6 +3,8 @@
 Tests that our Settings class validates configuration values correctly.
 """
 
+import logging
+
 import pytest
 from pydantic import ValidationError
 
@@ -42,3 +44,125 @@ def test_default_is_runtime_default(self):
         """Test that the default seccomp profile type is RuntimeDefault."""
         settings = Settings()
         assert settings.k8s_seccomp_profile_type == "RuntimeDefault"
+
+
+class TestKubernetesPropertyJsonParsing:
+    """Tests for kubernetes property JSON parsing of GKE fields."""
+
+    def test_valid_node_selector_json(self):
+        """Test valid JSON for GKE_SANDBOX_NODE_SELECTOR is parsed."""
+        settings = Settings(gke_sandbox_node_selector='{"pool": "sandbox"}')
+        k8s = settings.kubernetes
+        assert k8s.sandbox_node_selector == {"pool": "sandbox"}
+
+    def test_invalid_node_selector_json_logs_warning(self, caplog):
+        """Test invalid JSON for GKE_SANDBOX_NODE_SELECTOR logs a warning."""
+        settings = Settings(gke_sandbox_node_selector="not-valid-json")
+        with caplog.at_level(logging.WARNING):
+            k8s = settings.kubernetes
+        assert k8s.sandbox_node_selector is None
+        assert "GKE_SANDBOX_NODE_SELECTOR" in caplog.text
+
+    def test_valid_custom_tolerations_json(self):
+        """Test valid JSON for GKE_SANDBOX_CUSTOM_TOLERATIONS is parsed."""
+        settings = Settings(gke_sandbox_custom_tolerations='[{"key": "pool", "value": "sandbox"}]')
+        k8s = settings.kubernetes
+        assert k8s.custom_tolerations == [{"key": "pool", "value": "sandbox"}]
+
+    def test_invalid_custom_tolerations_json_logs_warning(self, caplog):
+        """Test invalid JSON for GKE_SANDBOX_CUSTOM_TOLERATIONS logs a warning."""
+        settings = Settings(gke_sandbox_custom_tolerations="[broken")
+        with caplog.at_level(logging.WARNING):
+            k8s = settings.kubernetes
+        assert k8s.custom_tolerations is None
+        assert "GKE_SANDBOX_CUSTOM_TOLERATIONS" in caplog.text
+
+    def test_image_pull_policy_default_is_always(self):
+        """Test that the default image_pull_policy is 'Always' (matches Settings)."""
+        settings = Settings()
+        k8s = settings.kubernetes
+        assert k8s.image_pull_policy == "Always"
+
+
+class TestRedisPasswordValidator:
+    """Tests for empty-string-to-None password sanitization."""
+
+    def test_empty_password_becomes_none(self):
+        """Empty string REDIS_PASSWORD is converted to None."""
+        settings = Settings(redis_password="")
+        assert settings.redis_password is None
+
+    def test_whitespace_password_becomes_none(self):
+        """Whitespace-only REDIS_PASSWORD is converted to None."""
+        settings = Settings(redis_password="  ")
+        assert settings.redis_password is None
+
+    def test_real_password_preserved(self):
+        """Non-empty password is kept as-is."""
+        settings = Settings(redis_password="s3cret")
+        assert settings.redis_password == "s3cret"
+
+    def test_none_password_stays_none(self):
+        """None password stays None."""
+        settings = Settings(redis_password=None)
+        assert settings.redis_password is None
+
+    def test_empty_sentinel_password_becomes_none(self):
+        """Empty sentinel password is converted to None."""
+        settings = Settings(redis_sentinel_password="")
+        assert settings.redis_sentinel_password is None
+
+
+class TestRedisClusterNodesValidator:
+    """Tests for empty-string-to-None cluster/sentinel node sanitization."""
+
+    def test_empty_cluster_nodes_becomes_none(self):
+        """Empty REDIS_CLUSTER_NODES is converted to None."""
+        settings = Settings(redis_cluster_nodes="")
+        assert settings.redis_cluster_nodes is None
+
+    def test_whitespace_cluster_nodes_becomes_none(self):
+        """Whitespace-only REDIS_CLUSTER_NODES is converted to None."""
+        settings = Settings(redis_cluster_nodes="   ")
+        assert settings.redis_cluster_nodes is None
+
+    def test_real_cluster_nodes_preserved(self):
+        """Valid node list is kept."""
+        settings = Settings(redis_cluster_nodes="node1:7000,node2:7001")
+        assert settings.redis_cluster_nodes == "node1:7000,node2:7001"
+
+    def test_empty_sentinel_nodes_becomes_none(self):
+        """Empty REDIS_SENTINEL_NODES is converted to None."""
+        settings = Settings(redis_sentinel_nodes="")
+        assert settings.redis_sentinel_nodes is None
+
+    def test_real_sentinel_nodes_preserved(self):
+        """Valid sentinel node list is kept."""
+        settings = Settings(redis_sentinel_nodes="sent1:26379,sent2:26379")
+        assert settings.redis_sentinel_nodes == "sent1:26379,sent2:26379"
+
+
+class TestRedisConfigValidators:
+    """Tests for RedisConfig-level validators (password + nodes)."""
+
+    def test_redis_config_empty_password_to_none(self):
+        """RedisConfig also converts empty password to None."""
+        from src.config.redis import RedisConfig
+
+        cfg = RedisConfig(redis_password="")
+        assert cfg.password is None
+
+    def test_redis_config_empty_cluster_nodes_to_none(self):
+        """RedisConfig also converts empty cluster nodes to None."""
+        from src.config.redis import RedisConfig
+
+        cfg = RedisConfig(redis_cluster_nodes="")
+        assert cfg.cluster_nodes is None
+
+    def test_redis_config_real_values_preserved(self):
+        """Non-empty values pass through."""
+        from src.config.redis import RedisConfig
+
+        cfg = RedisConfig(redis_password="pass", redis_cluster_nodes="h:7000")
+        assert cfg.password == "pass"
+        assert cfg.cluster_nodes == "h:7000"