las7 · ethanbailie · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026
@@ -291,6 +291,35 @@ job_types:
     network_enabled: true
 ```
 
+### Ollama GPU Sessions (Minimal)
+
+Use a persistent Docker volume for model files, then configure an Ollama GPU session job type:
+
+```yaml
+security_mode: permissive
+sessions_enabled: true
+session_model_cache_volume: "tako-ollama-models"
+
+job_types:
+  - name: ollama-nvidia
+    base_image: "ollama/ollama:latest"
+    network_enabled: true
+    memory_limit: "8g"
+    cpu_limit: 4.0
+    timeout: 3600
+    session_enabled: true
+    environment:
+      OLLAMA_MODELS: "/models"
+      OLLAMA_HOST: "0.0.0.0:11434"
+    gpu:
+      enabled: true
+      vendor: nvidia
+```
+
+Notes:
+- GPU sessions/jobs require `security_mode: permissive`.
+- `/sessions/{id}/send` writes to Tako's inbox contract; it does not proxy Ollama HTTP endpoints.
+
 ### Network Control
 
 By default, containers have **no network access** (`--network=none`).
@@ -369,6 +398,11 @@ curl http://localhost:8000/jobs/abc123/result
 | `/jobs/{id}` | GET | Get job status |
 | `/jobs/{id}/result` | GET | Wait for job result |
 | `/jobs/{id}/cancel` | POST | Cancel pending/running job |
+| `/sessions` | POST | Create long-running session |
+| `/sessions/{id}` | GET | Get session status |
+| `/sessions/{id}/send` | POST | Send message to session inbox |
+| `/sessions/{id}/events` | GET | Poll session output events |
+| `/sessions/{id}/terminate` | POST | Terminate session |
 | `/job-types` | GET | List available job types |
 | `/health` | GET | Health check |
 
@@ -544,6 +578,10 @@ Or via environment variable for testing:
 TAKO_VM_SECURITY_MODE=permissive pytest tests/ -v
 ```
 
+**GPU policy:**
+- GPU workloads (NVIDIA/AMD) run with `runc` and do not use gVisor.
+- In `security_mode: strict`, GPU sessions/jobs are rejected.
+
 **Additional isolation options:**
 - **AppArmor/SELinux** (Linux only) - Can block `/proc` reads if needed
 - **Kata Containers** - VM-level isolation for multi-tenant deployments

@@ -14,6 +14,11 @@ Complete reference for the Tako VM HTTP API.
 | `/jobs/{id}/cancel` | POST | Cancel running job |
 | `/jobs/{id}/rerun` | POST | Re-execute with same code/inputs |
 | `/jobs/{id}/fork` | POST | Re-execute with modified code |
+| `/sessions` | POST | Create long-running session |
+| `/sessions/{id}` | GET | Get session status |
+| `/sessions/{id}/send` | POST | Send message to session inbox |
+| `/sessions/{id}/events` | GET | Poll session events (cursor-based) |
+| `/sessions/{id}/terminate` | POST | End session and remove container |
 | `/health` | GET | Health check with queue stats |
 
 ## Base URL
@@ -174,6 +179,60 @@ The status will reflect the current state of the existing job (e.g., `queued`, `
 
 ---
 
+## Sessions API (Long-Running Containers)
+
+Sessions are disabled by default. Enable `sessions_enabled: true` in config and mark job types with `session_enabled: true`.
+
+If a job type enables GPU, sessions run with `runc` (gVisor disabled). In `security_mode: strict`, GPU sessions are rejected.
+
+### Create Session
+
+```http
+POST /sessions
+```
+
+```json
+{
+  "job_type": "ollama-nvidia",
+  "metadata": {"workflow": "assistant"},
+  "idle_timeout_seconds": 1800,
+  "ttl_seconds": 86400
+}
+```
+
+### Send Input to Session
+
+```http
+POST /sessions/{session_id}/send
+```
+
+```json
+{
+  "event_type": "input",
+  "payload": {
+    "message": "summarize the latest logs"
+  }
+}
+```
+
+This endpoint writes messages into the session inbox contract. It does not proxy the Ollama HTTP API.
+
+### Poll Session Events
+
+```http
+GET /sessions/{session_id}/events?after=0&limit=100
+```
+
+The response includes `next_cursor`; pass it as `after` in the next poll request.
+
+### Terminate Session
+
+```http
+POST /sessions/{session_id}/terminate
+```
+
+---
+
 ## Get Job Status
 
 Get the current status of a job.

@@ -268,6 +268,17 @@ Tako VM uses gVisor (runsc) by default for strong container isolation:
 - **strict** (default): Fails with `RuntimeUnavailableError` if gVisor is not installed. Use this in production for guaranteed strong isolation.
 - **permissive**: Falls back to standard runc runtime with a warning if gVisor is unavailable. Useful for development on systems without gVisor.
 
+**GPU compatibility note:**
+
+- GPU workloads run with `runc` (gVisor disabled).
+- If `security_mode: strict` is enabled, GPU sessions/jobs are rejected.
+
+For Ollama-style long-running sessions, configure a persistent Docker volume for model cache:
+
+```yaml
+session_model_cache_volume: "tako-ollama-models"
+```
+
 ```yaml
 # Development (allow fallback to runc)
 security_mode: permissive
@@ -306,3 +317,9 @@ limactl shell tako-gvisor
 | `enable_seccomp` | Enable seccomp syscall filtering | `true` |
 | `enable_cap_restrictions` | Enable capability restrictions (`--cap-drop=ALL`) | `true` |
 | `enable_userns` | Enable user namespace isolation | `false` |
+| `sessions_enabled` | Enable long-running session API endpoints | `false` |
+| `session_idle_timeout_seconds` | Idle timeout before auto-expire | `1800` |
+| `session_max_ttl_seconds` | Max lifetime for any session | `86400` |
+| `session_max_message_bytes` | Max `/sessions/{id}/send` payload size | `262144` |
+| `session_max_events_per_poll` | Max events returned per poll request | `100` |
+| `session_model_cache_volume` | Optional Docker volume mounted at `/models` in session containers | `null` |
@@ -88,9 +88,40 @@ print(response.json()["output"])
 | `cpu_limit` | CPU cores | `1.0` |
 | `timeout` | Default timeout (seconds) | `30` |
 | `network_enabled` | Allow outbound network | `false` |
+| `session_enabled` | Allow long-running session API usage | `false` |
+| `gpu.enabled` | Enable GPU access for this job type | `false` |
+| `gpu.vendor` | GPU vendor (`nvidia` or `amd`) | `null` |
+| `gpu.count` | Number of GPUs (NVIDIA only) | `null` |
+| `gpu.device_ids` | Specific GPU IDs/UUIDs | `[]` |
 | `environment` | Environment variables | `{}` |
 | `shared_code` | Python files to include | `[]` |
 
+### GPU + Sessions Example (Ollama)
+
+```yaml
+session_model_cache_volume: "tako-ollama-models"
+
+job_types:
+  - name: ollama-nvidia
+    base_image: "ollama/ollama:latest"
+    network_enabled: true
+    memory_limit: "8g"
+    cpu_limit: 4.0
+    timeout: 3600
+    session_enabled: true
+    environment:
+      OLLAMA_MODELS: "/models"
+      OLLAMA_HOST: "0.0.0.0:11434"
+    gpu:
+      enabled: true
+      vendor: nvidia
+      # count: 1
+```
+
+GPU workloads run with `runc` (gVisor disabled). If `security_mode: strict` is enabled, GPU sessions are rejected.
+
+`/sessions/{id}/send` writes to the session inbox contract. It does not proxy Ollama HTTP APIs.
+
 ## Network Access
 
 By default, containers have **no network access** for security. To enable network for specific job types:

@@ -70,6 +70,16 @@ container_runtime: runsc
 # - 'permissive': Falls back to runc with warnings. ONLY for local development.
 security_mode: strict
 
+# Long-running sessions (disabled by default)
+sessions_enabled: false
+session_idle_timeout_seconds: 1800   # Auto-expire idle sessions after 30m
+session_max_ttl_seconds: 86400       # Hard cap on session lifetime (24h)
+session_max_message_bytes: 262144    # Max inbox message size (256KB)
+session_max_events_per_poll: 100     # Max events returned per /sessions/{id}/events poll
+# Optional Docker named volume mounted at /models for session containers
+# Use this for persistent Ollama model cache across sessions
+# session_model_cache_volume: "tako-ollama-models"
+
 # =============================================================================
 # Container Resource Limits
 # =============================================================================
@@ -117,6 +127,9 @@ job_types:
     memory_limit: "1g"
     cpu_limit: 2.0
     timeout: 60
+    session_enabled: false
+    gpu:
+      enabled: false
 
   # ML inference
   - name: ml-inference
@@ -126,6 +139,9 @@ job_types:
     memory_limit: "2g"
     cpu_limit: 2.0
     timeout: 120
+    session_enabled: false
+    gpu:
+      enabled: false
 
   # API client with network access
   - name: api-client
@@ -135,3 +151,39 @@ job_types:
     network_enabled: true
     memory_limit: "512m"
     timeout: 30
+    session_enabled: false
+    gpu:
+      enabled: false
+
+  # OpenClaw/Ollama-style long-running GPU session (NVIDIA example)
+  - name: openclaw-nvidia
+    base_image: "tako-openclaw:latest"
+    network_enabled: true
+    memory_limit: "8g"
+    cpu_limit: 4.0
+    timeout: 3600
+    session_enabled: true
+    gpu:
+      enabled: true
+      vendor: nvidia
+      # count: 1
+      # device_ids: ["GPU-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"]
+
+  # Minimal Ollama GPU session (NVIDIA)
+  # Requires:
+  # - sessions_enabled: true
+  # - security_mode: permissive (GPU workloads run with runc)
+  # - session_model_cache_volume configured for persistent model cache (recommended)
+  - name: ollama-nvidia
+    base_image: "ollama/ollama:latest"
+    network_enabled: true
+    memory_limit: "8g"
+    cpu_limit: 4.0
+    timeout: 3600
+    session_enabled: true
+    environment:
+      OLLAMA_MODELS: "/models"
+      OLLAMA_HOST: "0.0.0.0:11434"
+    gpu:
+      enabled: true
+      vendor: nvidia
@@ -517,17 +517,38 @@ def _mask_database_url(url: str) -> str:
 
         print("[Docker]")
         print(f"  docker_image: {config.docker_image}")
+        print(f"  container_runtime: {config.container_runtime}")
+        print(f"  security_mode: {config.security_mode}")
         print(f"  enable_seccomp: {config.enable_seccomp}")
+        print(f"  enable_cap_restrictions: {config.enable_cap_restrictions}")
         print(f"  enable_userns: {config.enable_userns}")
         print()
 
+        print("[Sessions]")
+        print(f"  sessions_enabled: {config.sessions_enabled}")
+        print(f"  session_idle_timeout_seconds: {config.session_idle_timeout_seconds}")
+        print(f"  session_max_ttl_seconds: {config.session_max_ttl_seconds}")
+        print(f"  session_max_message_bytes: {config.session_max_message_bytes}")
+        print(f"  session_max_events_per_poll: {config.session_max_events_per_poll}")
+        print()
+
         if config.job_types:
             print("[Job Types]")
             for jt in config.job_types:
                 print(f"  - {jt.name}:")
                 print(
                     f"      memory: {jt.memory_limit}, cpu: {jt.cpu_limit}, timeout: {jt.timeout}s"
                 )
+                print(f"      session_enabled: {jt.session_enabled}")
+                if jt.gpu.enabled:
+                    gpu_parts = [f"vendor={jt.gpu.vendor}"]
+                    if jt.gpu.count is not None:
+                        gpu_parts.append(f"count={jt.gpu.count}")
+                    if jt.gpu.device_ids:
+                        gpu_parts.append(f"device_ids={','.join(jt.gpu.device_ids)}")
+                    print(f"      gpu: enabled ({', '.join(gpu_parts)})")
+                else:
+                    print("      gpu: disabled")
                 if jt.requirements:
                     print(f"      requirements: {', '.join(jt.requirements)}")