diff --git a/nexus-ami/README.md b/nexus-ami/README.md index d960dffe..65d653e1 100644 --- a/nexus-ami/README.md +++ b/nexus-ami/README.md @@ -69,6 +69,49 @@ systemctl status nexus-first-boot postgresql valkey nats \ # Per-instance secret uniqueness is the most important first-boot invariant. ``` +## Service endpoints (nginx reverse proxy on :443) + +Everything is reached over HTTPS on port 443 (the self-signed cert from +`first-boot-ca.sh`); only 443, 3128 (Compliance Proxy CONNECT), and 22 need +to be open in the EC2 Security Group. `nginx-nexus.conf` maps: + +| Path | Backend | Purpose | +|---|---|---| +| `/` | UI (static) | Control Plane SPA | +| `/api/`, `/oauth/`, `/authserver/`, `/.well-known/`, `/scim/` | Control Plane :3001 | Admin API, OAuth/OIDC, SCIM provisioning | +| `/v1/`, `/v1beta/`, `/openai/deployments/`, `/api/paas/` | AI Gateway :3050 | LLM ingress — OpenAI / Gemini / Azure / GLM wire formats | +| `/ws`, `/api/internal/things/` | Nexus Hub :3060 | Remote endpoint-agent enrollment + control WebSocket | +| `/healthz`, `/ready` | Control Plane :3001 | Unauthenticated health / readiness | + +Verify the OpenAI provider you configured in the UI: + +```bash +# List the models the gateway serves (OpenAI providers show owned_by:"openai"): +curl -sk https:///v1/models | jq '.data[] | select(.owned_by=="openai")' + +# End-to-end round-trip through the gateway (needs a virtual key from the UI): +curl -sk https:///v1/chat/completions \ + -H "Authorization: Bearer " -H "Content-Type: application/json" \ + -d '{"model":"gpt-4o-mini","messages":[{"role":"user","content":"ping"}]}' +``` + +The admin UI's **Simulator** page and the per-credential **Test** button +(which probes the provider's real `/v1/models` with your stored key) also work +without any client-side setup. + +Remote agents enroll against the Hub over the same 443 endpoint: + +```bash +nexus-agent enroll --hub-url https:// --token \ + --hub-ca +``` + +**Security note:** `/v1/*`, `/ws`, and `/api/internal/things/*` are now +internet-reachable. They are not anonymous — `/v1/*` requires a virtual key, +and the Hub agent surface is gated by per-device / enrollment / internal-service +tokens. The Hub admin API (`/api/hub/*`), Prometheus `/metrics`, and +`/debug/runtime` are deliberately NOT proxied and stay loopback-only. + ## Self-Service AMI Scan iteration Run AWS's Self-Service Scan from the Partner Central → Marketplace diff --git a/nexus-ami/artifacts/configs/nginx-nexus.conf b/nexus-ami/artifacts/configs/nginx-nexus.conf index 30005a73..4d630d27 100644 --- a/nexus-ami/artifacts/configs/nginx-nexus.conf +++ b/nexus-ami/artifacts/configs/nginx-nexus.conf @@ -35,8 +35,90 @@ server { # Admin API + auth-server endpoints (both live in the control-plane # binary on :3001). + # + # proxy_buffering off so the admin SSE surfaces stream instead of being + # buffered into one late chunk: the compliance-overview export + # (GET /api/admin/.../compliance export) and the AI Gateway Simulator + # forward (POST /api/admin/ai-gateway-simulator/forward), which flushes + # per upstream chat-completion chunk. location /api/ { proxy_pass http://127.0.0.1:3001; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_buffering off; + proxy_read_timeout 300s; + proxy_send_timeout 300s; + } + + # SCIM 2.0 provisioning (Okta / Entra ID push user+group sync). Served by + # the control-plane binary at /scim/v2/*. Without this block the SPA + # try_files fallback returns index.html (HTML) to the IdP, which fails + # SCIM discovery. Internal-service-token / bearer gated by the handler. + location /scim/ { + proxy_pass http://127.0.0.1:3001; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # AI Gateway ingress (:3050). One regex block covers every ingress wire + # format the gateway speaks: + # /v1/... OpenAI canonical (chat/completions, + # embeddings, responses, messages, + # estimate, models, models/{id}, usage) + # /v1beta/models/{model}:... Gemini native + # /openai/deployments/{dep}/... Azure OpenAI native + # /api/paas/v4/... GLM / Zhipu native + # Regex so /api/paas wins over the plain `/api/` prefix above (regex + # locations are matched before plain-prefix locations in nginx). The + # gateway's /internal/* control endpoints and /metrics are deliberately + # NOT matched here — they stay loopback-only. + # proxy_buffering off + HTTP/1.1 so SSE streaming (stream:true chat, + # streamGenerateContent) is relayed chunk-by-chunk, not buffered. + location ~ ^/(v1|v1beta|openai/deployments|api/paas)(/|$) { + proxy_pass http://127.0.0.1:3050; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_buffering off; + proxy_read_timeout 600s; + proxy_send_timeout 600s; + } + + # Nexus Hub (:3060) — remote endpoint-agent connectivity only. Agents + # enrolled with `--hub-url https://` reach the Hub over 443: + # GET /ws persistent control WebSocket + # POST /api/internal/things/... enrollment + thingclient HTTP fallback + # (register/heartbeat/shadow/config/ + # audit/deregister/break-glass) + # Both are bearer-token gated by the Hub (per-device token, enrollment + # token, or internal-service token), so internet exposure is auth-bounded. + # `/api/internal/things/` is a more specific prefix than the `/api/` + # control-plane block above, so it wins for agent traffic while every + # other /api/internal/* path still routes to the control-plane. + # The Hub admin API (/api/hub/*), /metrics, and /healthz are NOT exposed. + location /ws { + proxy_pass http://127.0.0.1:3060; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 3600s; + proxy_send_timeout 3600s; + } + + location /api/internal/things/ { + proxy_pass http://127.0.0.1:3060; + proxy_http_version 1.1; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; @@ -78,10 +160,19 @@ server { proxy_set_header X-Forwarded-Proto $scheme; } - # Health endpoint exposed for ELB / customer monitoring without - # authentication. + # Health + readiness endpoints exposed for ELB / customer monitoring + # without authentication. The control-plane serves these at the ROOT + # (`/healthz`, `/ready`), not under /api — the proxy_pass therefore omits + # a URI so nginx forwards the original path verbatim. (The previous + # `proxy_pass .../api/healthz` rewrote to a path the CP does not serve and + # returned 404 {"message":"Not Found"}.) location = /healthz { - proxy_pass http://127.0.0.1:3001/api/healthz; + proxy_pass http://127.0.0.1:3001; + proxy_set_header Host $host; + } + + location = /ready { + proxy_pass http://127.0.0.1:3001; proxy_set_header Host $host; } }