From 87b9dd9e3ed9703edbbfbea82e8c6075b8f80aaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Titsworth-Morin?= Date: Mon, 11 May 2026 10:09:58 +0000 Subject: [PATCH 01/13] Add devcontainer cache backend experiments --- .../devcontainer-cache-experiments.yml | 214 ++++++++++++++++++ .../parse-cf-registry-credentials.mjs | 91 ++++++++ 2 files changed, 305 insertions(+) create mode 100644 .github/workflows/devcontainer-cache-experiments.yml create mode 100644 scripts/experiments/parse-cf-registry-credentials.mjs diff --git a/.github/workflows/devcontainer-cache-experiments.yml b/.github/workflows/devcontainer-cache-experiments.yml new file mode 100644 index 000000000..1d3547f12 --- /dev/null +++ b/.github/workflows/devcontainer-cache-experiments.yml @@ -0,0 +1,214 @@ +name: Devcontainer Cache Experiments + +on: + push: + branches: + - "sam/cloudflare-devcontainer-cache-experiments-*" + paths: + - ".github/workflows/devcontainer-cache-experiments.yml" + - "scripts/experiments/**" + workflow_dispatch: + inputs: + run_cloudflare_registry: + description: "Run Cloudflare managed registry push/pull experiment" + required: true + default: "true" + type: choice + options: ["true", "false"] + run_r2: + description: "Run R2 tarball and BuildKit S3 cache experiments" + required: true + default: "true" + type: choice + options: ["true", "false"] + +permissions: + contents: read + +jobs: + cloudflare-registry: + if: ${{ github.event_name == 'push' || inputs.run_cloudflare_registry == 'true' }} + runs-on: ubuntu-latest + timeout-minutes: 25 + env: + CLOUDFLARE_API_TOKEN: ${{ secrets.CF_API_TOKEN }} + CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CF_ACCOUNT_ID }} + IMAGE_NAME: sam-devcontainer-cache-exp + steps: + - uses: actions/checkout@v6 + + - uses: pnpm/action-setup@v4 + with: + version: 9.15.9 + + - uses: actions/setup-node@v6 + with: + node-version: 22 + cache: pnpm + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Show Wrangler container command help + run: | + pnpm --filter @simple-agent-manager/api exec wrangler containers registries credentials --help + + - name: Build local test image + run: | + cat > Dockerfile.cache-exp <<'DOCKERFILE' + FROM alpine:3.20 + RUN dd if=/dev/zero of=/cache-test.bin bs=1M count=64 + CMD ["sh", "-c", "test -f /cache-test.bin && echo ok"] + DOCKERFILE + docker build -t "$IMAGE_NAME:${GITHUB_RUN_ID}" -f Dockerfile.cache-exp . + + - name: Push through wrangler containers push + id: wrangler_push + continue-on-error: true + run: | + set -o pipefail + pnpm --filter @simple-agent-manager/api exec wrangler containers push "$IMAGE_NAME:${GITHUB_RUN_ID}" 2>&1 | tee wrangler-push.log + { + echo "### Wrangler containers push" + echo + echo '```text' + sed -E 's/[A-Za-z0-9_-]{24,}/***/g' wrangler-push.log + echo '```' + } >> "$GITHUB_STEP_SUMMARY" + + - name: Generate Cloudflare registry credentials + id: cf_creds_raw + run: | + set +e + pnpm --filter @simple-agent-manager/api exec wrangler containers registries credentials registry.cloudflare.com --push --pull --expiration-minutes=60 > cf-creds.txt 2>&1 + status=$? + if [ "$status" -ne 0 ]; then + pnpm --filter @simple-agent-manager/api exec wrangler containers registries credentials --push --pull --expiration-minutes=60 > cf-creds.txt 2>&1 + status=$? + fi + { + echo "### Wrangler registry credentials command" + echo + echo "Exit status: \`$status\`" + echo + echo '```text' + sed -E 's/[A-Za-z0-9_-]{24,}/***/g' cf-creds.txt + echo '```' + } >> "$GITHUB_STEP_SUMMARY" + exit "$status" + + - name: Parse registry credentials + id: cf_creds + run: node scripts/experiments/parse-cf-registry-credentials.mjs cf-creds.txt + + - name: Docker login/push/pull against registry.cloudflare.com + run: | + set -euxo pipefail + REGISTRY="${{ steps.cf_creds.outputs.registry }}" + USERNAME="${{ steps.cf_creds.outputs.username }}" + PASSWORD="${{ steps.cf_creds.outputs.password }}" + REF="$REGISTRY/${CLOUDFLARE_ACCOUNT_ID}/${IMAGE_NAME}:${GITHUB_RUN_ID}" + echo "$PASSWORD" | docker login "$REGISTRY" --username "$USERNAME" --password-stdin + docker tag "$IMAGE_NAME:${GITHUB_RUN_ID}" "$REF" + docker push "$REF" + docker rmi "$REF" + docker pull "$REF" + echo "### Docker registry.cloudflare.com push/pull" >> "$GITHUB_STEP_SUMMARY" + echo "" >> "$GITHUB_STEP_SUMMARY" + echo "- Ref: \`$REF\`" >> "$GITHUB_STEP_SUMMARY" + echo "- Result: push and pull succeeded" >> "$GITHUB_STEP_SUMMARY" + + r2-cache: + if: ${{ github.event_name == 'push' || inputs.run_r2 == 'true' }} + runs-on: ubuntu-latest + timeout-minutes: 35 + env: + CLOUDFLARE_API_TOKEN: ${{ secrets.CF_API_TOKEN }} + CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CF_ACCOUNT_ID }} + AWS_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} + AWS_DEFAULT_REGION: auto + steps: + - uses: actions/checkout@v6 + + - uses: pnpm/action-setup@v4 + with: + version: 9.15.9 + + - uses: actions/setup-node@v6 + with: + node-version: 22 + cache: pnpm + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Prepare experiment context + run: | + mkdir -p /tmp/sam-cache-exp + cat > /tmp/sam-cache-exp/Dockerfile <<'DOCKERFILE' + FROM alpine:3.20 + RUN dd if=/dev/zero of=/r2-cache-test.bin bs=1M count=64 + RUN sha256sum /r2-cache-test.bin > /r2-cache-test.sha256 + CMD ["cat", "/r2-cache-test.sha256"] + DOCKERFILE + docker build -t sam-r2-cache-exp:${GITHUB_RUN_ID} /tmp/sam-cache-exp + docker save sam-r2-cache-exp:${GITHUB_RUN_ID} -o /tmp/sam-cache-exp-image.tar + + - name: Create temporary R2 bucket + run: | + BUCKET="sam-devcontainer-cache-exp-${GITHUB_RUN_ID}" + echo "BUCKET=$BUCKET" >> "$GITHUB_ENV" + pnpm --filter @simple-agent-manager/api exec wrangler r2 bucket create "$BUCKET" + + - name: Test R2 tarball upload/download + run: | + set -euxo pipefail + KEY="docker-save/sam-r2-cache-exp-${GITHUB_RUN_ID}.tar" + pnpm --filter @simple-agent-manager/api exec wrangler r2 object put "$BUCKET/$KEY" --file /tmp/sam-cache-exp-image.tar + pnpm --filter @simple-agent-manager/api exec wrangler r2 object get "$BUCKET/$KEY" --file /tmp/sam-cache-exp-image-downloaded.tar + docker rmi sam-r2-cache-exp:${GITHUB_RUN_ID} + docker load -i /tmp/sam-cache-exp-image-downloaded.tar + docker run --rm sam-r2-cache-exp:${GITHUB_RUN_ID} + echo "### R2 docker save/load tarball" >> "$GITHUB_STEP_SUMMARY" + echo "" >> "$GITHUB_STEP_SUMMARY" + echo "- Bucket: \`$BUCKET\`" >> "$GITHUB_STEP_SUMMARY" + echo "- Key: \`$KEY\`" >> "$GITHUB_STEP_SUMMARY" + echo "- Result: upload, download, load, run succeeded" >> "$GITHUB_STEP_SUMMARY" + + - uses: docker/setup-buildx-action@v3 + with: + driver: docker-container + buildkitd-flags: --debug + + - name: Test BuildKit S3 cache against R2 + id: buildkit_s3 + continue-on-error: true + run: | + set -o pipefail + CACHE_ARGS="type=s3,region=auto,bucket=${BUCKET},name=sam-buildkit-cache-${GITHUB_RUN_ID},endpoint_url=https://${CLOUDFLARE_ACCOUNT_ID}.r2.cloudflarestorage.com,use_path_style=true,access_key_id=${AWS_ACCESS_KEY_ID},secret_access_key=${AWS_SECRET_ACCESS_KEY},mode=max" + docker buildx build \ + --progress=plain \ + --cache-to "$CACHE_ARGS" \ + --cache-from "$CACHE_ARGS" \ + --load \ + -t "sam-r2-buildkit-cache-exp:${GITHUB_RUN_ID}" \ + /tmp/sam-cache-exp 2>&1 | tee buildkit-s3.log + { + echo "### BuildKit S3 cache to R2" + echo + echo "Exit status: \`${PIPESTATUS[0]}\`" + echo + echo '```text' + tail -120 buildkit-s3.log | sed -E 's/(access_key_id=)[^,]+/\1***/g; s/(secret_access_key=)[^,]+/\1***/g' + echo '```' + } >> "$GITHUB_STEP_SUMMARY" + + - name: Cleanup temporary R2 bucket + if: always() + continue-on-error: true + run: | + if [ -n "${BUCKET:-}" ]; then + aws --endpoint-url "https://${CLOUDFLARE_ACCOUNT_ID}.r2.cloudflarestorage.com" s3 rm "s3://${BUCKET}" --recursive || true + pnpm --filter @simple-agent-manager/api exec wrangler r2 bucket delete "$BUCKET" --yes || true + fi diff --git a/scripts/experiments/parse-cf-registry-credentials.mjs b/scripts/experiments/parse-cf-registry-credentials.mjs new file mode 100644 index 000000000..48dc77a39 --- /dev/null +++ b/scripts/experiments/parse-cf-registry-credentials.mjs @@ -0,0 +1,91 @@ +import { readFileSync, appendFileSync } from 'node:fs'; + +const [filePath] = process.argv.slice(2); + +if (!filePath) { + console.error('Usage: parse-cf-registry-credentials.mjs '); + process.exit(2); +} + +const raw = readFileSync(filePath, 'utf8'); +const outputPath = process.env.GITHUB_OUTPUT; +const summaryPath = process.env.GITHUB_STEP_SUMMARY; + +function redact(value) { + if (!value) return ''; + if (value.length <= 8) return '***'; + return `${value.slice(0, 4)}...${value.slice(-4)}`; +} + +function parseJson(text) { + const parsed = JSON.parse(text); + return { + registry: parsed.registry || parsed.registry_url || parsed.url || 'registry.cloudflare.com', + username: parsed.username || parsed.user || parsed.login, + password: parsed.password || parsed.token || parsed.secret, + }; +} + +function parseText(text) { + const get = (...labels) => { + for (const label of labels) { + const pattern = new RegExp(`^\\s*${label}\\s*[:=]\\s*(.+?)\\s*$`, 'im'); + const match = text.match(pattern); + if (match?.[1]) return match[1].trim(); + } + return undefined; + }; + + return { + registry: get('registry', 'server') || 'registry.cloudflare.com', + username: get('username', 'user', 'login'), + password: get('password', 'token', 'secret'), + }; +} + +let credentials; +try { + credentials = parseJson(raw); +} catch { + credentials = parseText(raw); +} + +if (!credentials.username || !credentials.password) { + if (summaryPath) { + appendFileSync( + summaryPath, + [ + '### Cloudflare registry credential parse failed', + '', + 'Wrangler output, redacted:', + '', + '```text', + raw.replace(/[A-Za-z0-9_-]{24,}/g, '***'), + '```', + '', + ].join('\n') + ); + } + console.error('Could not parse username/password from Wrangler registry credential output.'); + process.exit(1); +} + +if (outputPath) { + appendFileSync(outputPath, `registry=${credentials.registry}\n`); + appendFileSync(outputPath, `username=${credentials.username}\n`); + appendFileSync(outputPath, `password=${credentials.password}\n`); +} + +if (summaryPath) { + appendFileSync( + summaryPath, + [ + '### Cloudflare registry credentials', + '', + `- Registry: \`${credentials.registry}\``, + `- Username: \`${redact(credentials.username)}\``, + `- Password/token: \`${redact(credentials.password)}\``, + '', + ].join('\n') + ); +} From c27a55a27962ecffb6185b4af3888fe56b2ba2a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Titsworth-Morin?= Date: Mon, 11 May 2026 10:11:53 +0000 Subject: [PATCH 02/13] Use staging credentials for cache experiments --- .github/workflows/devcontainer-cache-experiments.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/devcontainer-cache-experiments.yml b/.github/workflows/devcontainer-cache-experiments.yml index 1d3547f12..5e36921b9 100644 --- a/.github/workflows/devcontainer-cache-experiments.yml +++ b/.github/workflows/devcontainer-cache-experiments.yml @@ -29,6 +29,7 @@ jobs: cloudflare-registry: if: ${{ github.event_name == 'push' || inputs.run_cloudflare_registry == 'true' }} runs-on: ubuntu-latest + environment: staging timeout-minutes: 25 env: CLOUDFLARE_API_TOKEN: ${{ secrets.CF_API_TOKEN }} @@ -121,6 +122,7 @@ jobs: r2-cache: if: ${{ github.event_name == 'push' || inputs.run_r2 == 'true' }} runs-on: ubuntu-latest + environment: staging timeout-minutes: 35 env: CLOUDFLARE_API_TOKEN: ${{ secrets.CF_API_TOKEN }} From d4c10fd8580f0e908b747ae207e2603f7327294d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Titsworth-Morin?= Date: Mon, 11 May 2026 10:14:43 +0000 Subject: [PATCH 03/13] Probe plain Docker push to Cloudflare registry --- .../devcontainer-cache-experiments.yml | 33 +------ .../parse-cf-registry-credentials.mjs | 91 ------------------- 2 files changed, 2 insertions(+), 122 deletions(-) delete mode 100644 scripts/experiments/parse-cf-registry-credentials.mjs diff --git a/.github/workflows/devcontainer-cache-experiments.yml b/.github/workflows/devcontainer-cache-experiments.yml index 5e36921b9..6008a1637 100644 --- a/.github/workflows/devcontainer-cache-experiments.yml +++ b/.github/workflows/devcontainer-cache-experiments.yml @@ -77,39 +77,10 @@ jobs: echo '```' } >> "$GITHUB_STEP_SUMMARY" - - name: Generate Cloudflare registry credentials - id: cf_creds_raw - run: | - set +e - pnpm --filter @simple-agent-manager/api exec wrangler containers registries credentials registry.cloudflare.com --push --pull --expiration-minutes=60 > cf-creds.txt 2>&1 - status=$? - if [ "$status" -ne 0 ]; then - pnpm --filter @simple-agent-manager/api exec wrangler containers registries credentials --push --pull --expiration-minutes=60 > cf-creds.txt 2>&1 - status=$? - fi - { - echo "### Wrangler registry credentials command" - echo - echo "Exit status: \`$status\`" - echo - echo '```text' - sed -E 's/[A-Za-z0-9_-]{24,}/***/g' cf-creds.txt - echo '```' - } >> "$GITHUB_STEP_SUMMARY" - exit "$status" - - - name: Parse registry credentials - id: cf_creds - run: node scripts/experiments/parse-cf-registry-credentials.mjs cf-creds.txt - - - name: Docker login/push/pull against registry.cloudflare.com + - name: Plain Docker push/pull against registry.cloudflare.com run: | set -euxo pipefail - REGISTRY="${{ steps.cf_creds.outputs.registry }}" - USERNAME="${{ steps.cf_creds.outputs.username }}" - PASSWORD="${{ steps.cf_creds.outputs.password }}" - REF="$REGISTRY/${CLOUDFLARE_ACCOUNT_ID}/${IMAGE_NAME}:${GITHUB_RUN_ID}" - echo "$PASSWORD" | docker login "$REGISTRY" --username "$USERNAME" --password-stdin + REF="registry.cloudflare.com/${CLOUDFLARE_ACCOUNT_ID}/${IMAGE_NAME}:docker-${GITHUB_RUN_ID}" docker tag "$IMAGE_NAME:${GITHUB_RUN_ID}" "$REF" docker push "$REF" docker rmi "$REF" diff --git a/scripts/experiments/parse-cf-registry-credentials.mjs b/scripts/experiments/parse-cf-registry-credentials.mjs deleted file mode 100644 index 48dc77a39..000000000 --- a/scripts/experiments/parse-cf-registry-credentials.mjs +++ /dev/null @@ -1,91 +0,0 @@ -import { readFileSync, appendFileSync } from 'node:fs'; - -const [filePath] = process.argv.slice(2); - -if (!filePath) { - console.error('Usage: parse-cf-registry-credentials.mjs '); - process.exit(2); -} - -const raw = readFileSync(filePath, 'utf8'); -const outputPath = process.env.GITHUB_OUTPUT; -const summaryPath = process.env.GITHUB_STEP_SUMMARY; - -function redact(value) { - if (!value) return ''; - if (value.length <= 8) return '***'; - return `${value.slice(0, 4)}...${value.slice(-4)}`; -} - -function parseJson(text) { - const parsed = JSON.parse(text); - return { - registry: parsed.registry || parsed.registry_url || parsed.url || 'registry.cloudflare.com', - username: parsed.username || parsed.user || parsed.login, - password: parsed.password || parsed.token || parsed.secret, - }; -} - -function parseText(text) { - const get = (...labels) => { - for (const label of labels) { - const pattern = new RegExp(`^\\s*${label}\\s*[:=]\\s*(.+?)\\s*$`, 'im'); - const match = text.match(pattern); - if (match?.[1]) return match[1].trim(); - } - return undefined; - }; - - return { - registry: get('registry', 'server') || 'registry.cloudflare.com', - username: get('username', 'user', 'login'), - password: get('password', 'token', 'secret'), - }; -} - -let credentials; -try { - credentials = parseJson(raw); -} catch { - credentials = parseText(raw); -} - -if (!credentials.username || !credentials.password) { - if (summaryPath) { - appendFileSync( - summaryPath, - [ - '### Cloudflare registry credential parse failed', - '', - 'Wrangler output, redacted:', - '', - '```text', - raw.replace(/[A-Za-z0-9_-]{24,}/g, '***'), - '```', - '', - ].join('\n') - ); - } - console.error('Could not parse username/password from Wrangler registry credential output.'); - process.exit(1); -} - -if (outputPath) { - appendFileSync(outputPath, `registry=${credentials.registry}\n`); - appendFileSync(outputPath, `username=${credentials.username}\n`); - appendFileSync(outputPath, `password=${credentials.password}\n`); -} - -if (summaryPath) { - appendFileSync( - summaryPath, - [ - '### Cloudflare registry credentials', - '', - `- Registry: \`${credentials.registry}\``, - `- Username: \`${redact(credentials.username)}\``, - `- Password/token: \`${redact(credentials.password)}\``, - '', - ].join('\n') - ); -} From b566c190e6813139406fec1235cb9790d67cbf88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Titsworth-Morin?= Date: Mon, 11 May 2026 10:17:23 +0000 Subject: [PATCH 04/13] Document Cloudflare cache experiment results --- ...oudflare-devcontainer-cache-experiments.md | 137 ++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 docs/notes/2026-05-11-cloudflare-devcontainer-cache-experiments.md diff --git a/docs/notes/2026-05-11-cloudflare-devcontainer-cache-experiments.md b/docs/notes/2026-05-11-cloudflare-devcontainer-cache-experiments.md new file mode 100644 index 000000000..de7985f81 --- /dev/null +++ b/docs/notes/2026-05-11-cloudflare-devcontainer-cache-experiments.md @@ -0,0 +1,137 @@ +# Cloudflare Devcontainer Cache Experiments + +Date: 2026-05-11 +PR: #963 +Workflow run: `25664066831` + +## Context + +SAM's first GHCR-based devcontainer cache implementation successfully pulled, +built, tagged, and attempted to push cache images, but GHCR rejected pushes made +with GitHub App installation tokens: + +```text +denied: permission_denied: installation not allowed to Create organization package +``` + +This experiment tested two Cloudflare-based replacements: + +1. Cloudflare managed Containers Registry at `registry.cloudflare.com` +2. R2-backed cache storage, both as Docker tarballs and BuildKit S3 cache + +## Result Summary + +Both Cloudflare strategies worked in GitHub Actions with staging environment +credentials. + +The managed Containers Registry is the best fit for SAM's current VM agent flow +because it preserves the existing `docker pull`, `docker tag`, and `docker push` +model. + +BuildKit S3 cache against R2 also worked, but it requires a `docker-container` +Buildx builder and does not map cleanly onto the current `devcontainer up` +wrapper flow. + +R2 tarballs worked, but they would require custom `docker save/load` code and do +not get registry layer deduplication or native Docker transfer behavior. + +## Cloudflare Managed Registry + +The experiment built a 64 MiB image and pushed it through Wrangler: + +```text +wrangler containers push sam-devcontainer-cache-exp: +``` + +Wrangler authenticated Docker and pushed: + +```text +Pushed image: registry.cloudflare.com//sam-devcontainer-cache-exp: +``` + +Then the workflow reused that Docker login and tested plain Docker commands: + +```text +docker tag sam-devcontainer-cache-exp: registry.cloudflare.com//sam-devcontainer-cache-exp:docker- +docker push registry.cloudflare.com//sam-devcontainer-cache-exp:docker- +docker pull registry.cloudflare.com//sam-devcontainer-cache-exp:docker- +``` + +Result: + +```text +docker-: digest: sha256:7eafb128de623003b2a956dd721be06eabd3c046c3408adc0cde7402214caf2b size: 737 +Status: Downloaded newer image for registry.cloudflare.com//sam-devcontainer-cache-exp:docker- +``` + +This confirms the registry supports the plain Docker push/pull behavior the VM +agent needs. + +## R2 Docker Tarball + +The experiment built the same image, saved it as a Docker tarball, uploaded it +to a temporary R2 bucket, downloaded it, loaded it back into Docker, and ran it: + +```text +docker save sam-r2-cache-exp: -o /tmp/sam-cache-exp-image.tar +wrangler r2 object put /docker-save/sam-r2-cache-exp-.tar --file /tmp/sam-cache-exp-image.tar +wrangler r2 object get /docker-save/sam-r2-cache-exp-.tar --file /tmp/sam-cache-exp-image-downloaded.tar +docker load -i /tmp/sam-cache-exp-image-downloaded.tar +docker run --rm sam-r2-cache-exp: +``` + +Result: + +```text +Upload complete. +Download complete. +Loaded image: sam-r2-cache-exp: +3b6a07d0d404fab4e23b6d34bc6696a6a312dd92821332385e5af7c01c421351 /r2-cache-test.bin +``` + +This is feasible but not recommended as the primary path. + +## R2 BuildKit S3 Cache + +The experiment used `docker/setup-buildx-action` with the `docker-container` +driver and pointed BuildKit's S3 cache backend at a temporary R2 bucket: + +```text +docker buildx build \ + --cache-to type=s3,region=auto,bucket=,name=sam-buildkit-cache-,endpoint_url=https://.r2.cloudflarestorage.com,use_path_style=true,...,mode=max \ + --cache-from type=s3,region=auto,bucket=,name=sam-buildkit-cache-,endpoint_url=https://.r2.cloudflarestorage.com,use_path_style=true,... \ + --load \ + /tmp/sam-cache-exp +``` + +Result: + +```text +#6 importing cache manifest from s3:11828236366541811895 +#11 exporting cache to Amazon S3 +#11 sending cache export 0.9s done +#11 DONE 0.9s +``` + +This proves R2 can serve as a BuildKit S3 cache backend, but using it in SAM +would require owning the Buildx invocation instead of relying on `devcontainer +up`'s current `cacheFrom` image-reference support. + +## Recommendation + +Use Cloudflare managed Containers Registry for the next production iteration. + +Implementation direction: + +1. Generate short-lived Cloudflare registry credentials in the API/control + plane. +2. Pass `DEVCONTAINER_CACHE_REGISTRY=registry.cloudflare.com`, + `DEVCONTAINER_CACHE_USERNAME`, and `DEVCONTAINER_CACHE_TOKEN` to the VM agent + bootstrap environment. +3. Change cache refs from `ghcr.io//:devcontainer-cache` to + `registry.cloudflare.com//-:devcontainer-cache`. +4. Keep the existing VM agent `docker pull`, `cacheFrom`, `docker tag`, and + `docker push` flow. + +R2 BuildKit cache is worth keeping as a future option only if SAM later takes +direct ownership of Buildx/devcontainer build execution. From 0d5234b5f0f8bed1148870d6ff858f72fefe555f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Titsworth-Morin?= Date: Mon, 11 May 2026 12:57:46 +0000 Subject: [PATCH 05/13] Add SAM devcontainer registry stress experiment --- .../devcontainer-cache-experiments.yml | 122 ++++++++++++++++++ 1 file changed, 122 insertions(+) diff --git a/.github/workflows/devcontainer-cache-experiments.yml b/.github/workflows/devcontainer-cache-experiments.yml index 6008a1637..86f848172 100644 --- a/.github/workflows/devcontainer-cache-experiments.yml +++ b/.github/workflows/devcontainer-cache-experiments.yml @@ -21,6 +21,12 @@ on: default: "true" type: choice options: ["true", "false"] + run_sam_devcontainer_stress: + description: "Build and push the real SAM devcontainer to Cloudflare registry" + required: true + default: "false" + type: choice + options: ["true", "false"] permissions: contents: read @@ -185,3 +191,119 @@ jobs: aws --endpoint-url "https://${CLOUDFLARE_ACCOUNT_ID}.r2.cloudflarestorage.com" s3 rm "s3://${BUCKET}" --recursive || true pnpm --filter @simple-agent-manager/api exec wrangler r2 bucket delete "$BUCKET" --yes || true fi + + sam-devcontainer-registry-stress: + if: ${{ github.event_name == 'workflow_dispatch' && inputs.run_sam_devcontainer_stress == 'true' }} + runs-on: ubuntu-latest + environment: staging + timeout-minutes: 75 + env: + CLOUDFLARE_API_TOKEN: ${{ secrets.CF_API_TOKEN }} + CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CF_ACCOUNT_ID }} + IMAGE_NAME: sam-devcontainer-cache-stress + steps: + - uses: actions/checkout@v6 + + - uses: pnpm/action-setup@v4 + with: + version: 9.15.9 + + - uses: actions/setup-node@v6 + with: + node-version: 22 + cache: pnpm + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Install devcontainer CLI + run: npm install -g @devcontainers/cli + + - name: Build and start SAM devcontainer + run: | + set -euxo pipefail + OUTPUT="$(devcontainer up --workspace-folder .)" + echo "$OUTPUT" + CONTAINER_ID="$(echo "$OUTPUT" | jq -r '.containerId // empty')" + if [ -z "$CONTAINER_ID" ]; then + echo "Could not extract containerId from devcontainer output" + exit 1 + fi + IMAGE_ID="$(docker inspect "$CONTAINER_ID" --format '{{.Image}}')" + SIZE_BYTES="$(docker image inspect "$IMAGE_ID" --format '{{.Size}}')" + SIZE_MIB="$(awk "BEGIN { printf \"%.1f\", ${SIZE_BYTES} / 1024 / 1024 }")" + echo "CONTAINER_ID=$CONTAINER_ID" >> "$GITHUB_ENV" + echo "IMAGE_ID=$IMAGE_ID" >> "$GITHUB_ENV" + echo "SIZE_BYTES=$SIZE_BYTES" >> "$GITHUB_ENV" + echo "SIZE_MIB=$SIZE_MIB" >> "$GITHUB_ENV" + { + echo "### SAM devcontainer image" + echo + echo "- Container ID: \`$CONTAINER_ID\`" + echo "- Image ID: \`$IMAGE_ID\`" + echo "- Local image size: \`${SIZE_BYTES}\` bytes (${SIZE_MIB} MiB)" + echo + echo "#### Image inspect" + echo + echo '```json' + docker image inspect "$IMAGE_ID" | jq '.[0] | {Id, RepoTags, RepoDigests, Size, VirtualSize, Architecture, Os, RootFS, Config: {Image: .Config.Image, Labels: .Config.Labels}}' + echo '```' + echo + echo "#### Docker history" + echo + echo '```text' + docker history --no-trunc "$IMAGE_ID" + echo '```' + } >> "$GITHUB_STEP_SUMMARY" + + - name: Mint Cloudflare registry credentials + run: | + set -euo pipefail + pnpm --filter @simple-agent-manager/api exec wrangler containers registries credentials registry.cloudflare.com \ + --push \ + --pull \ + --expiration-minutes 120 \ + --json > registry-credentials.json + REGISTRY_HOST="$(jq -r '.registry_host' registry-credentials.json)" + REGISTRY_USERNAME="$(jq -r '.username' registry-credentials.json)" + REGISTRY_PASSWORD="$(jq -r '.password' registry-credentials.json)" + if [ -z "$REGISTRY_PASSWORD" ] || [ "$REGISTRY_PASSWORD" = "null" ]; then + echo "Cloudflare registry credentials did not include a password" + exit 1 + fi + echo "::add-mask::$REGISTRY_PASSWORD" + { + echo "REGISTRY_HOST=$REGISTRY_HOST" + echo "REGISTRY_USERNAME=$REGISTRY_USERNAME" + echo "REGISTRY_PASSWORD=$REGISTRY_PASSWORD" + } >> "$GITHUB_ENV" + + - name: Push and pull SAM devcontainer with plain Docker + run: | + set -euo pipefail + REF="${REGISTRY_HOST}/${CLOUDFLARE_ACCOUNT_ID}/${IMAGE_NAME}:sam-real-${GITHUB_RUN_ID}" + echo "$REGISTRY_PASSWORD" | docker login "$REGISTRY_HOST" -u "$REGISTRY_USERNAME" --password-stdin + docker tag "$IMAGE_ID" "$REF" + docker push "$REF" 2>&1 | tee docker-push.log + docker rm -f "$CONTAINER_ID" + docker rmi "$REF" "$IMAGE_ID" || true + docker pull "$REF" 2>&1 | tee docker-pull.log + { + echo "### SAM devcontainer Cloudflare registry stress result" + echo + echo "- Ref: \`$REF\`" + echo "- Local image size before push: \`${SIZE_BYTES}\` bytes (${SIZE_MIB} MiB)" + echo "- Result: plain Docker push and pull succeeded" + echo + echo "#### Push tail" + echo + echo '```text' + tail -80 docker-push.log + echo '```' + echo + echo "#### Pull tail" + echo + echo '```text' + tail -80 docker-pull.log + echo '```' + } >> "$GITHUB_STEP_SUMMARY" From 531346a3fca076fc3f516f8d5e99be078aebde43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Titsworth-Morin?= Date: Mon, 11 May 2026 13:19:39 +0000 Subject: [PATCH 06/13] Use build-only SAM devcontainer stress test --- .../devcontainer-cache-experiments.yml | 20 +++++++------------ 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/.github/workflows/devcontainer-cache-experiments.yml b/.github/workflows/devcontainer-cache-experiments.yml index 86f848172..e012a9a6d 100644 --- a/.github/workflows/devcontainer-cache-experiments.yml +++ b/.github/workflows/devcontainer-cache-experiments.yml @@ -219,27 +219,22 @@ jobs: - name: Install devcontainer CLI run: npm install -g @devcontainers/cli - - name: Build and start SAM devcontainer + - name: Build SAM devcontainer image run: | set -euxo pipefail - OUTPUT="$(devcontainer up --workspace-folder .)" - echo "$OUTPUT" - CONTAINER_ID="$(echo "$OUTPUT" | jq -r '.containerId // empty')" - if [ -z "$CONTAINER_ID" ]; then - echo "Could not extract containerId from devcontainer output" - exit 1 - fi - IMAGE_ID="$(docker inspect "$CONTAINER_ID" --format '{{.Image}}')" + LOCAL_REF="${IMAGE_NAME}:local-${GITHUB_RUN_ID}" + devcontainer build --workspace-folder . --image-name "$LOCAL_REF" + IMAGE_ID="$(docker image inspect "$LOCAL_REF" --format '{{.Id}}')" SIZE_BYTES="$(docker image inspect "$IMAGE_ID" --format '{{.Size}}')" SIZE_MIB="$(awk "BEGIN { printf \"%.1f\", ${SIZE_BYTES} / 1024 / 1024 }")" - echo "CONTAINER_ID=$CONTAINER_ID" >> "$GITHUB_ENV" + echo "LOCAL_REF=$LOCAL_REF" >> "$GITHUB_ENV" echo "IMAGE_ID=$IMAGE_ID" >> "$GITHUB_ENV" echo "SIZE_BYTES=$SIZE_BYTES" >> "$GITHUB_ENV" echo "SIZE_MIB=$SIZE_MIB" >> "$GITHUB_ENV" { echo "### SAM devcontainer image" echo - echo "- Container ID: \`$CONTAINER_ID\`" + echo "- Local ref: \`$LOCAL_REF\`" echo "- Image ID: \`$IMAGE_ID\`" echo "- Local image size: \`${SIZE_BYTES}\` bytes (${SIZE_MIB} MiB)" echo @@ -285,8 +280,7 @@ jobs: echo "$REGISTRY_PASSWORD" | docker login "$REGISTRY_HOST" -u "$REGISTRY_USERNAME" --password-stdin docker tag "$IMAGE_ID" "$REF" docker push "$REF" 2>&1 | tee docker-push.log - docker rm -f "$CONTAINER_ID" - docker rmi "$REF" "$IMAGE_ID" || true + docker rmi "$REF" "$LOCAL_REF" "$IMAGE_ID" || true docker pull "$REF" 2>&1 | tee docker-pull.log { echo "### SAM devcontainer Cloudflare registry stress result" From 9cf9cc266a0235c68eda385b5f84fbf1a701f202 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Titsworth-Morin?= Date: Mon, 11 May 2026 13:23:45 +0000 Subject: [PATCH 07/13] Handle Wrangler warning output in stress test --- .github/workflows/devcontainer-cache-experiments.yml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/devcontainer-cache-experiments.yml b/.github/workflows/devcontainer-cache-experiments.yml index e012a9a6d..ebdee5f27 100644 --- a/.github/workflows/devcontainer-cache-experiments.yml +++ b/.github/workflows/devcontainer-cache-experiments.yml @@ -258,7 +258,17 @@ jobs: --push \ --pull \ --expiration-minutes 120 \ - --json > registry-credentials.json + --json > registry-credentials.raw + node -e " + const fs = require('fs'); + const raw = fs.readFileSync('registry-credentials.raw', 'utf8'); + const start = raw.lastIndexOf('{'); + if (start < 0) { + console.error(raw); + throw new Error('No JSON object found in Wrangler credentials output'); + } + fs.writeFileSync('registry-credentials.json', raw.slice(start)); + " REGISTRY_HOST="$(jq -r '.registry_host' registry-credentials.json)" REGISTRY_USERNAME="$(jq -r '.username' registry-credentials.json)" REGISTRY_PASSWORD="$(jq -r '.password' registry-credentials.json)" From 1eb5965576d941d974f895e9aeb38eeb9b836450 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Titsworth-Morin?= Date: Mon, 11 May 2026 13:29:53 +0000 Subject: [PATCH 08/13] Document SAM devcontainer registry stress test --- ...oudflare-devcontainer-cache-experiments.md | 35 ++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/docs/notes/2026-05-11-cloudflare-devcontainer-cache-experiments.md b/docs/notes/2026-05-11-cloudflare-devcontainer-cache-experiments.md index de7985f81..5d17d0d8a 100644 --- a/docs/notes/2026-05-11-cloudflare-devcontainer-cache-experiments.md +++ b/docs/notes/2026-05-11-cloudflare-devcontainer-cache-experiments.md @@ -2,7 +2,7 @@ Date: 2026-05-11 PR: #963 -Workflow run: `25664066831` +Workflow runs: `25664066831`, `25672922644` ## Context @@ -67,6 +67,39 @@ Status: Downloaded newer image for registry.cloudflare.com//sam-devc This confirms the registry supports the plain Docker push/pull behavior the VM agent needs. +## SAM Devcontainer Stress Test + +The follow-up stress test built SAM's real `.devcontainer/devcontainer.json` +image and pushed it through the Cloudflare managed Containers Registry with +plain Docker commands. + +Workflow run: + +The workflow intentionally used `devcontainer build` instead of +`devcontainer up`. The VM agent cache path stores the built devcontainer image, +and `devcontainer up` also runs lifecycle hooks that are not required to stress +registry storage and transfer. An earlier `devcontainer up` run was cancelled +after hanging in the start/lifecycle phase. + +Result: + +```text +SIZE_BYTES=2741386134 +SIZE_MIB=2614.4 +registry.cloudflare.com//sam-devcontainer-cache-stress:sam-real-25672922644: + digest: sha256:baeb7e14758e5b4284cd7b9b2faec8e736ed97fd1c37b153614ce06306cfc07e + size: 7436 +Status: Downloaded newer image for registry.cloudflare.com//sam-devcontainer-cache-stress:sam-real-25672922644 +``` + +The local Docker image size was 2,741,386,134 bytes, or 2,614.4 MiB. The full +job completed in 4 minutes 18 seconds. The devcontainer build took about 2 +minutes 25 seconds, and the push/pull phase took about 85 seconds from push +start to successful pull. + +This confirms the managed registry handled a real SAM devcontainer image, not +just the earlier synthetic 64 MiB test image. + ## R2 Docker Tarball The experiment built the same image, saved it as a Docker tarball, uploaded it From 06876cc1293106cc732f2fb2a074b959db821b21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Titsworth-Morin?= Date: Mon, 11 May 2026 14:12:25 +0000 Subject: [PATCH 09/13] Add Cloudflare cache implementation handoff --- ...1-cloudflare-devcontainer-cache-handoff.md | 144 ++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 docs/notes/2026-05-11-cloudflare-devcontainer-cache-handoff.md diff --git a/docs/notes/2026-05-11-cloudflare-devcontainer-cache-handoff.md b/docs/notes/2026-05-11-cloudflare-devcontainer-cache-handoff.md new file mode 100644 index 000000000..974e9dcbd --- /dev/null +++ b/docs/notes/2026-05-11-cloudflare-devcontainer-cache-handoff.md @@ -0,0 +1,144 @@ +# Cloudflare Devcontainer Cache Handoff + +Date: 2026-05-11 +PR: +Branch: `sam/cloudflare-devcontainer-cache-experiments-01krb4` + +## Current Conclusion + +Use Cloudflare managed Containers Registry as the production replacement for +GHCR-based devcontainer cache images. + +The managed registry is the best fit because SAM's VM agent already uses a +Docker-native flow: + +1. `docker pull` previous cache image, if present +2. pass `cacheFrom` into the devcontainer build +3. `docker tag` the resulting image +4. `docker push` the cache image for the next run + +Cloudflare managed registry preserves this flow. R2 tarballs and BuildKit S3 +cache both worked in experiments, but each requires a larger implementation +change. + +## Evidence + +Detailed experiment notes: + +- `docs/notes/2026-05-11-cloudflare-devcontainer-cache-experiments.md` +- Workflow experiments: `.github/workflows/devcontainer-cache-experiments.yml` + +Successful SAM real-devcontainer stress test: + +- Run: +- Real SAM devcontainer image size: 2,741,386,134 bytes / 2,614.4 MiB +- Full job duration: 4 minutes 18 seconds +- Build phase: about 2 minutes 25 seconds +- Push/pull phase: about 85 seconds from push start to successful pull +- Digest: `sha256:baeb7e14758e5b4284cd7b9b2faec8e736ed97fd1c37b153614ce06306cfc07e` + +Earlier synthetic experiment: + +- Verified Cloudflare managed registry push/pull with plain Docker after + Wrangler minted credentials. +- Verified R2 Docker tarball save/upload/download/load. +- Verified R2 as a BuildKit S3 cache backend. + +## Important Implementation Constraint + +Do not install Wrangler on VM agent nodes for production behavior. + +Wrangler was useful only in GitHub Actions experiments. Production should mint +short-lived registry credentials from the API/control plane and pass Docker +registry credentials to the VM agent. + +The relevant Cloudflare endpoint is the same endpoint Wrangler uses: + +```text +POST /accounts/{account_id}/containers/registries/registry.cloudflare.com/credentials +``` + +Expected body shape: + +```json +{ + "expiration_minutes": 120, + "permissions": ["pull", "push"] +} +``` + +The response includes registry host, username, and password. Treat the password +as sensitive and never log it. + +## Suggested Production Shape + +Add configuration for Cloudflare managed registry caching, using environment +variables rather than hardcoded values. Suggested names are illustrative; follow +existing env naming conventions after inspecting `apps/api/src/env.ts`, +deployment scripts, and docs: + +- Cloudflare account ID for the registry account +- Cloudflare API token with permission to mint managed registry credentials +- Registry namespace/repository prefix, if needed +- Credential expiration minutes, configurable with a safe default + +API/control plane responsibilities: + +1. Detect whether Cloudflare devcontainer cache config is present. +2. Mint short-lived pull/push credentials before VM agent bootstrap. +3. Build the registry image reference, for example: + `registry.cloudflare.com//-:devcontainer-cache` +4. Pass `DEVCONTAINER_CACHE_REGISTRY`, cache image ref, username, and password + into the VM agent bootstrap environment. +5. Preserve existing GHCR/no-cache behavior as fallback when Cloudflare config is + absent. + +VM agent responsibilities: + +1. Use supplied Docker registry credentials for `docker login`. +2. Pull cache image if present and tolerate cache misses. +3. Pass cache ref into devcontainer build. +4. Tag and push the resulting image when build succeeds. +5. Avoid logging registry passwords or tokens. + +## Areas To Inspect First + +Start by reading these files/directories: + +- `packages/vm-agent/` +- `apps/api/src/durable-objects/task-runner/` +- `apps/api/src/services/task-runner-do.ts` +- `apps/api/src/env.ts` +- deployment/secrets documentation and scripts +- `.github/workflows/devcontainer-cache-experiments.yml` +- `docs/notes/2026-05-11-cloudflare-devcontainer-cache-experiments.md` + +Search terms that should help: + +- `DEVCONTAINER_CACHE` +- `cacheFrom` +- `GHCR` +- `docker login` +- `docker push` +- `devcontainer` + +## Testing Expectations + +Add focused unit tests for: + +- Cloudflare registry credential minting request construction +- sensitive value redaction/no logging behavior where applicable +- fallback behavior when Cloudflare registry config is absent +- VM agent environment/config parsing for registry credentials + +If practical, keep the experiment workflow available as a manual validation +path, but the production implementation should not depend on Wrangler. + +## Non-Goals + +- Do not merge PR #963. +- Do not replace the devcontainer build system wholesale. +- Do not switch production to R2 tarballs unless Cloudflare managed registry + proves impossible during implementation. +- Do not hardcode account IDs, repo names, registry URLs beyond stable provider + host constants, credential TTLs, or secrets. From 0af2c5188c38022626d174124cd7c82a9e277ea1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Titsworth-Morin?= Date: Mon, 11 May 2026 14:42:03 +0000 Subject: [PATCH 10/13] feat: use Cloudflare registry for devcontainer cache --- .claude/skills/env-reference/SKILL.md | 9 + apps/api/.env.example | 10 +- .../task-runner/workspace-steps.ts | 19 ++ apps/api/src/env.ts | 7 +- apps/api/src/services/devcontainer-cache.ts | 193 ++++++++++++++++++ apps/api/src/services/node-agent.ts | 7 + .../unit/services/devcontainer-cache.test.ts | 97 +++++++++ apps/api/wrangler.toml | 3 + docs/architecture/secrets-taxonomy.md | 2 + docs/guides/self-hosting.md | 16 ++ ...oudflare-devcontainer-cache-experiments.md | 4 +- packages/vm-agent/.env.example | 12 +- .../vm-agent/internal/bootstrap/bootstrap.go | 52 +++-- packages/vm-agent/internal/cache/cache.go | 14 +- .../vm-agent/internal/cache/cache_test.go | 36 ++-- packages/vm-agent/internal/config/config.go | 75 +++---- .../vm-agent/internal/config/config_test.go | 42 +++- packages/vm-agent/internal/server/server.go | 8 + .../internal/server/workspace_provisioning.go | 16 ++ .../server/workspace_provisioning_test.go | 21 ++ .../internal/server/workspace_routing.go | 39 ++-- .../vm-agent/internal/server/workspaces.go | 12 ++ scripts/deploy/configure-secrets.sh | 4 + ...1-cloudflare-managed-devcontainer-cache.md | 36 ++++ 24 files changed, 641 insertions(+), 93 deletions(-) create mode 100644 apps/api/src/services/devcontainer-cache.ts create mode 100644 apps/api/tests/unit/services/devcontainer-cache.test.ts create mode 100644 tasks/active/2026-05-11-cloudflare-managed-devcontainer-cache.md diff --git a/.claude/skills/env-reference/SKILL.md b/.claude/skills/env-reference/SKILL.md index e46579f3a..1720f65a3 100644 --- a/.claude/skills/env-reference/SKILL.md +++ b/.claude/skills/env-reference/SKILL.md @@ -18,6 +18,8 @@ Uses `GH_*` prefix because GitHub Actions secret names cannot start with `GITHUB | Secret | `CF_API_TOKEN` | Yes | | Secret | `CF_ACCOUNT_ID` | Yes | | Secret | `CF_ZONE_ID` | Yes | +| Secret | `DEVCONTAINER_CACHE_CLOUDFLARE_API_TOKEN` | No (falls back to `CF_API_TOKEN`) | +| Secret | `DEVCONTAINER_CACHE_CLOUDFLARE_ACCOUNT_ID` | No (falls back to `CF_ACCOUNT_ID`) | | Secret | `R2_ACCESS_KEY_ID` | Yes | | Secret | `R2_SECRET_ACCESS_KEY` | Yes | | Secret | `PULUMI_CONFIG_PASSPHRASE` | Yes | @@ -57,6 +59,13 @@ See `apps/api/.env.example` for the full list. Key variables: - `WRANGLER_PORT` — Local dev port (default: 8787) - `BASE_DOMAIN` — Set automatically by sync scripts +### Devcontainer Cache + +- `DEVCONTAINER_CACHE_ENABLED` — Enables opportunistic devcontainer image caching +- `DEVCONTAINER_CACHE_REGISTRY_HOST` — Managed registry host (default: `registry.cloudflare.com`) +- `DEVCONTAINER_CACHE_REPOSITORY_PREFIX` — Prefix for generated cache repository names +- `DEVCONTAINER_CACHE_CREDENTIAL_EXPIRATION_MINUTES` — TTL for short-lived registry credentials minted by the API + ### Resource Limits - `MAX_NODES_PER_USER` — Runtime node cap diff --git a/apps/api/.env.example b/apps/api/.env.example index 81b4cee3d..7d99b3336 100644 --- a/apps/api/.env.example +++ b/apps/api/.env.example @@ -12,10 +12,18 @@ BASE_DOMAIN=workspaces.example.com # GITHUB_APP_ID= # GITHUB_APP_PRIVATE_KEY= -# Cloudflare credentials (for DNS operations) +# Cloudflare credentials (for DNS operations and optional managed registry cache) # CF_API_TOKEN= +# CF_ACCOUNT_ID= # CF_ZONE_ID= +# Optional narrower credentials for Cloudflare managed devcontainer cache +# DEVCONTAINER_CACHE_CLOUDFLARE_API_TOKEN= +# DEVCONTAINER_CACHE_CLOUDFLARE_ACCOUNT_ID= +# DEVCONTAINER_CACHE_REGISTRY_HOST=registry.cloudflare.com +# DEVCONTAINER_CACHE_REPOSITORY_PREFIX=sam- +# DEVCONTAINER_CACHE_CREDENTIAL_EXPIRATION_MINUTES=120 + # Security keys (auto-generated if not provided) # ENCRYPTION_KEY= # Shared fallback key — used when purpose-specific keys below are not set # JWT_PRIVATE_KEY= diff --git a/apps/api/src/durable-objects/task-runner/workspace-steps.ts b/apps/api/src/durable-objects/task-runner/workspace-steps.ts index 6c50cd0d6..1cee1fa65 100644 --- a/apps/api/src/durable-objects/task-runner/workspace-steps.ts +++ b/apps/api/src/durable-objects/task-runner/workspace-steps.ts @@ -136,8 +136,26 @@ export async function handleWorkspaceCreation( // Create workspace on VM agent const { signCallbackToken } = await import('../../services/jwt'); const { createWorkspaceOnNode } = await import('../../services/node-agent'); + const { getDevcontainerCacheCredentials } = await import('../../services/devcontainer-cache'); const callbackToken = await signCallbackToken(workspaceId, rc.env); + let devcontainerCache = null; + if (state.config.workspaceProfile !== 'lightweight') { + try { + devcontainerCache = await getDevcontainerCacheCredentials( + rc.env, + state.config.repository, + state.config.devcontainerConfigName + ); + } catch (err) { + log.warn('task_runner_do.devcontainer_cache_credentials_failed', { + taskId: state.taskId, + workspaceId, + error: err instanceof Error ? err.message : String(err), + }); + } + } + await createWorkspaceOnNode(state.stepResults.nodeId, rc.env, state.userId, { workspaceId, repository: state.config.repository, @@ -148,6 +166,7 @@ export async function handleWorkspaceCreation( githubId: state.config.githubId, lightweight: state.config.workspaceProfile === 'lightweight', devcontainerConfigName: state.config.devcontainerConfigName ?? undefined, + devcontainerCache, }); await rc.ctx.storage.put('state', state); diff --git a/apps/api/src/env.ts b/apps/api/src/env.ts index ca680e1af..5c8d7f6cc 100644 --- a/apps/api/src/env.ts +++ b/apps/api/src/env.ts @@ -365,7 +365,12 @@ export interface Env { VM_AGENT_PROTOCOL?: string; // "https" (default) or "http" VM_AGENT_PORT?: string; // "8443" (default) or custom port // Devcontainer image caching - DEVCONTAINER_CACHE_ENABLED?: string; // "true" to enable GHCR caching (default: disabled) + DEVCONTAINER_CACHE_ENABLED?: string; // "true" to enable managed registry caching (default: disabled) + DEVCONTAINER_CACHE_CLOUDFLARE_ACCOUNT_ID?: string; // Cloudflare account for managed registry credentials + DEVCONTAINER_CACHE_CLOUDFLARE_API_TOKEN?: string; // Token allowed to mint managed registry credentials + DEVCONTAINER_CACHE_REGISTRY_HOST?: string; // Registry host (default: registry.cloudflare.com) + DEVCONTAINER_CACHE_REPOSITORY_PREFIX?: string; // Optional cache repository name prefix + DEVCONTAINER_CACHE_CREDENTIAL_EXPIRATION_MINUTES?: string; // Temporary registry credential TTL // Workspace tool proxy configuration (unified from workspace-mcp) WORKSPACE_TOOL_TIMEOUT_MS?: string; // Timeout for VM agent proxy calls (default: 15000) WORKSPACE_TOOL_GITHUB_TIMEOUT_MS?: string; // Timeout for GitHub API calls (default: 10000) diff --git a/apps/api/src/services/devcontainer-cache.ts b/apps/api/src/services/devcontainer-cache.ts new file mode 100644 index 000000000..73ca7529e --- /dev/null +++ b/apps/api/src/services/devcontainer-cache.ts @@ -0,0 +1,193 @@ +import type { Env } from '../env'; +import { fetchWithTimeout, getTimeoutMs } from './fetch-timeout'; + +const DEFAULT_CLOUDFLARE_REGISTRY_HOST = 'registry.cloudflare.com'; +const DEFAULT_CREDENTIAL_EXPIRATION_MINUTES = 120; +const CLOUDFLARE_API_BASE = 'https://api.cloudflare.com/client/v4'; + +export interface DevcontainerCacheCredentials { + registry: string; + username: string; + password: string; + ref: string; +} + +interface CloudflareRegistryCredentialsResponse { + success?: boolean; + errors?: Array<{ message?: string }>; + result?: { + registry_host?: string; + username?: string; + password?: string; + }; +} + +interface CacheConfig { + accountId: string; + apiToken: string; + registryHost: string; + repositoryPrefix: string; + expirationMinutes: number; + timeoutMs: number; +} + +export function isDevcontainerCacheEnabled(env: Pick): boolean { + return env.DEVCONTAINER_CACHE_ENABLED === 'true'; +} + +function parsePositiveInteger(value: string | undefined, fallback: number): number { + if (!value) { + return fallback; + } + const parsed = Number.parseInt(value, 10); + return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback; +} + +function getCacheConfig(env: Env): CacheConfig | null { + if (!isDevcontainerCacheEnabled(env)) { + return null; + } + + const accountId = (env.DEVCONTAINER_CACHE_CLOUDFLARE_ACCOUNT_ID || env.CF_ACCOUNT_ID || '').trim(); + const apiToken = (env.DEVCONTAINER_CACHE_CLOUDFLARE_API_TOKEN || env.CF_API_TOKEN || '').trim(); + if (!accountId || !apiToken) { + return null; + } + + return { + accountId, + apiToken, + registryHost: (env.DEVCONTAINER_CACHE_REGISTRY_HOST || DEFAULT_CLOUDFLARE_REGISTRY_HOST).trim(), + repositoryPrefix: (env.DEVCONTAINER_CACHE_REPOSITORY_PREFIX || '').trim(), + expirationMinutes: parsePositiveInteger( + env.DEVCONTAINER_CACHE_CREDENTIAL_EXPIRATION_MINUTES, + DEFAULT_CREDENTIAL_EXPIRATION_MINUTES + ), + timeoutMs: getTimeoutMs(env.CF_API_TIMEOUT_MS), + }; +} + +function parseGitHubRepo(repository: string): { owner: string; repo: string } | null { + const value = repository.trim(); + if (!value) { + return null; + } + if (value.startsWith('git@github.com:')) { + return splitOwnerRepo(value.slice('git@github.com:'.length).replace(/\.git$/, '')); + } + if (value.includes('://')) { + let url: URL; + try { + url = new URL(value); + } catch { + return null; + } + if (url.hostname !== 'github.com') { + return null; + } + return splitOwnerRepo(decodeURIComponent(url.pathname).replace(/^\/+|\/+$/g, '').replace(/\.git$/, '')); + } + return splitOwnerRepo(value); +} + +function splitOwnerRepo(path: string): { owner: string; repo: string } | null { + const [owner, repo] = path.split('/'); + if (!owner || !repo) { + return null; + } + return { owner, repo }; +} + +function sanitizeRepositoryComponent(value: string): string { + return value.toLowerCase().replace(/[^a-z0-9._-]+/g, '-').replace(/^-+|-+$/g, ''); +} + +function sanitizeRepositoryPrefix(value: string): string { + return value.toLowerCase().replace(/[^a-z0-9._-]+/g, '-').replace(/^-+/g, ''); +} + +function cacheTag(devcontainerConfigName?: string | null): string { + const configName = (devcontainerConfigName || '').trim(); + if (!configName) { + return 'devcontainer-cache'; + } + return `devcontainer-cache-${sanitizeRepositoryComponent(configName)}`; +} + +export function buildCloudflareDevcontainerCacheRef( + registryHost: string, + accountId: string, + repositoryPrefix: string, + repository: string, + devcontainerConfigName?: string | null +): string | null { + const parsed = parseGitHubRepo(repository); + if (!parsed) { + return null; + } + + const prefix = sanitizeRepositoryPrefix(repositoryPrefix); + const owner = sanitizeRepositoryComponent(parsed.owner); + const repo = sanitizeRepositoryComponent(parsed.repo); + const repositoryName = `${prefix}${owner}-${repo}`; + if (!repositoryName || !owner || !repo) { + return null; + } + + return `${registryHost}/${accountId}/${repositoryName}:${cacheTag(devcontainerConfigName)}`; +} + +async function mintCloudflareRegistryCredentials(config: CacheConfig): Promise> { + const url = `${CLOUDFLARE_API_BASE}/accounts/${config.accountId}/containers/registries/${config.registryHost}/credentials`; + const response = await fetchWithTimeout(url, { + method: 'POST', + headers: { + Authorization: `Bearer ${config.apiToken}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + expiration_minutes: config.expirationMinutes, + permissions: ['pull', 'push'], + }), + }, config.timeoutMs); + + const body = await response.json().catch(() => null); + if (!response.ok || !body?.result) { + const message = body?.errors?.[0]?.message || `HTTP ${response.status}`; + throw new Error(`Cloudflare registry credential mint failed: ${message}`); + } + + const registry = (body.result.registry_host || config.registryHost).trim(); + const username = (body.result.username || '').trim(); + const password = body.result.password || ''; + if (!registry || !username || !password) { + throw new Error('Cloudflare registry credential response was missing registry, username, or password'); + } + + return { registry, username, password }; +} + +export async function getDevcontainerCacheCredentials( + env: Env, + repository: string, + devcontainerConfigName?: string | null +): Promise { + const config = getCacheConfig(env); + if (!config) { + return null; + } + + const ref = buildCloudflareDevcontainerCacheRef( + config.registryHost, + config.accountId, + config.repositoryPrefix, + repository, + devcontainerConfigName + ); + if (!ref) { + return null; + } + + const credentials = await mintCloudflareRegistryCredentials(config); + return { ...credentials, ref }; +} diff --git a/apps/api/src/services/node-agent.ts b/apps/api/src/services/node-agent.ts index 0f5593e44..d71351d66 100644 --- a/apps/api/src/services/node-agent.ts +++ b/apps/api/src/services/node-agent.ts @@ -194,6 +194,13 @@ export async function createWorkspaceOnNode( lightweight?: boolean; /** Devcontainer config name (subdirectory under .devcontainer/). Undefined = auto-discover. */ devcontainerConfigName?: string; + /** Optional explicit devcontainer cache credentials minted by the control plane. */ + devcontainerCache?: { + registry: string; + username: string; + password: string; + ref: string; + } | null; } ): Promise { return nodeAgentRequest(nodeId, env, '/workspaces', { diff --git a/apps/api/tests/unit/services/devcontainer-cache.test.ts b/apps/api/tests/unit/services/devcontainer-cache.test.ts new file mode 100644 index 000000000..f8d6b00f3 --- /dev/null +++ b/apps/api/tests/unit/services/devcontainer-cache.test.ts @@ -0,0 +1,97 @@ +import { afterEach, describe, expect, it, vi } from 'vitest'; + +import type { Env } from '../../../src/env'; +import { + buildCloudflareDevcontainerCacheRef, + getDevcontainerCacheCredentials, +} from '../../../src/services/devcontainer-cache'; + +function env(overrides: Partial = {}): Env { + return { + DEVCONTAINER_CACHE_ENABLED: 'true', + CF_ACCOUNT_ID: 'acct-123', + CF_API_TOKEN: 'cf-token', + ...overrides, + } as Env; +} + +describe('devcontainer-cache service', () => { + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('returns null when cache config is absent', async () => { + const credentials = await getDevcontainerCacheCredentials( + env({ DEVCONTAINER_CACHE_ENABLED: 'false' }), + 'octo/repo' + ); + + expect(credentials).toBeNull(); + }); + + it('builds Cloudflare managed registry cache refs from repository metadata', () => { + const ref = buildCloudflareDevcontainerCacheRef( + 'registry.cloudflare.com', + 'acct-123', + 'sam-', + 'https://github.com/Octo/Hello World.git', + 'node:20' + ); + + expect(ref).toBe('registry.cloudflare.com/acct-123/sam-octo-hello-world:devcontainer-cache-node-20'); + }); + + it('mints short-lived pull and push credentials with configurable TTL', async () => { + const fetchMock = vi.spyOn(globalThis, 'fetch').mockResolvedValue( + new Response( + JSON.stringify({ + success: true, + result: { + registry_host: 'registry.cloudflare.com', + username: 'user', + password: 'secret-password', + }, + }), + { status: 200 } + ) + ); + + const credentials = await getDevcontainerCacheCredentials( + env({ + DEVCONTAINER_CACHE_CLOUDFLARE_ACCOUNT_ID: 'cache-account', + DEVCONTAINER_CACHE_CLOUDFLARE_API_TOKEN: 'cache-token', + DEVCONTAINER_CACHE_CREDENTIAL_EXPIRATION_MINUTES: '45', + }), + 'octo/repo' + ); + + expect(credentials).toEqual({ + registry: 'registry.cloudflare.com', + username: 'user', + password: 'secret-password', + ref: 'registry.cloudflare.com/cache-account/octo-repo:devcontainer-cache', + }); + expect(fetchMock).toHaveBeenCalledWith( + 'https://api.cloudflare.com/client/v4/accounts/cache-account/containers/registries/registry.cloudflare.com/credentials', + expect.objectContaining({ + method: 'POST', + body: JSON.stringify({ + expiration_minutes: 45, + permissions: ['pull', 'push'], + }), + }) + ); + const headers = fetchMock.mock.calls[0]?.[1]?.headers as Record; + expect(headers.Authorization).toBe('Bearer cache-token'); + }); + + it('rejects malformed credential responses', async () => { + vi.spyOn(globalThis, 'fetch').mockResolvedValue( + new Response(JSON.stringify({ success: true, result: { username: 'user' } }), { status: 200 }) + ); + + await expect(getDevcontainerCacheCredentials(env(), 'octo/repo')).rejects.toThrow( + 'missing registry, username, or password' + ); + }); +}); diff --git a/apps/api/wrangler.toml b/apps/api/wrangler.toml index 64e2428bf..091bcd77a 100644 --- a/apps/api/wrangler.toml +++ b/apps/api/wrangler.toml @@ -30,6 +30,9 @@ R2_BUCKET_NAME = "workspaces-dev-assets" VM_AGENT_PROTOCOL = "https" VM_AGENT_PORT = "8443" DEVCONTAINER_CACHE_ENABLED = "true" +DEVCONTAINER_CACHE_REGISTRY_HOST = "registry.cloudflare.com" +DEVCONTAINER_CACHE_REPOSITORY_PREFIX = "sam-" +DEVCONTAINER_CACHE_CREDENTIAL_EXPIRATION_MINUTES = "120" # Hetzner VM base image. `docker-ce` marketplace image skips Docker install # (saves ~30-60s on cold provisioning). Set to `ubuntu-24.04` for emergency # rollback without a code change (also set via GH repo variable HETZNER_BASE_IMAGE). diff --git a/docs/architecture/secrets-taxonomy.md b/docs/architecture/secrets-taxonomy.md index f6fa28442..c4ccc92e1 100644 --- a/docs/architecture/secrets-taxonomy.md +++ b/docs/architecture/secrets-taxonomy.md @@ -44,6 +44,8 @@ These are configured once during deployment and apply to the entire platform. | `JWT_PUBLIC_KEY` | Verify JWT tokens | Corresponding RSA public key | | `CF_API_TOKEN` | Cloudflare API operations for deploy, DNS, observability, AI Gateway, and admin log viewing | Cloudflare API token with the permissions listed in `docs/guides/self-hosting.md` | | `CF_ACCOUNT_ID` | Cloudflare account for deploy operations and admin log viewer | From Cloudflare dashboard | +| `DEVCONTAINER_CACHE_CLOUDFLARE_API_TOKEN` | Optional narrower token for minting short-lived managed Containers Registry credentials | Cloudflare API token with Containers Registry credential permission | +| `DEVCONTAINER_CACHE_CLOUDFLARE_ACCOUNT_ID` | Optional account for managed Containers Registry cache images; falls back to `CF_ACCOUNT_ID` | From Cloudflare dashboard | | `CF_ZONE_ID` | DNS zone for workspace subdomains | From Cloudflare dashboard | | `ORIGIN_CA_CERT` | TLS certificate for VM agent (Origin CA) | Auto-generated by Pulumi (`infra/resources/origin-ca.ts`) | | `ORIGIN_CA_KEY` | TLS private key for VM agent | Auto-generated by Pulumi | diff --git a/docs/guides/self-hosting.md b/docs/guides/self-hosting.md index c37d2b9e3..1731f4596 100644 --- a/docs/guides/self-hosting.md +++ b/docs/guides/self-hosting.md @@ -51,6 +51,17 @@ Automated deployment configuration lives in a **GitHub Environment** named `prod | `REQUIRE_APPROVAL` | Require admin approval for new users. First user becomes superadmin. | _(unset — all users active)_ | | `HETZNER_BASE_IMAGE` | Hetzner VM base image. Set to `ubuntu-24.04` for emergency rollback from the faster `docker-ce` marketplace default. | `docker-ce` | +**Optional devcontainer cache variables** (Worker `vars`): + +SAM can cache built devcontainer images in Cloudflare's managed Containers Registry. The API mints short-lived registry credentials and passes them to VM agents; Wrangler is not installed on VM nodes for this path. If the Cloudflare registry account/token configuration is absent, workspaces fall back to the existing no-cache/GHCR-compatible behavior. + +| Variable | Description | Default | +| ---------------------------------------------------- | ---------------------------------------------------------------------- | ------------------------ | +| `DEVCONTAINER_CACHE_ENABLED` | Enables opportunistic devcontainer image caching | `true` in hosted config | +| `DEVCONTAINER_CACHE_REGISTRY_HOST` | Docker registry host | `registry.cloudflare.com` | +| `DEVCONTAINER_CACHE_REPOSITORY_PREFIX` | Optional prefix for generated cache repository names | `sam-` | +| `DEVCONTAINER_CACHE_CREDENTIAL_EXPIRATION_MINUTES` | TTL for short-lived registry credentials minted by the API | `120` | + **Optional runtime-config limit variables** (Worker `vars`): These are runtime Worker variables, not GitHub Environment variables in the current workflow. To change them for automated deployments, edit the top-level `[vars]` in `apps/api/wrangler.toml` before deploying, or extend `.github/workflows/deploy-reusable.yml` and `scripts/deploy/sync-wrangler-config.ts` to pass them through. Cloudflare Wrangler environment `vars` are non-inheritable, so the sync script copies top-level `[vars]` into the generated `[env.production.vars]` / `[env.staging.vars]` sections. @@ -82,6 +93,8 @@ These are runtime Worker variables, not GitHub Environment variables in the curr | -------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `CF_API_TOKEN` | Cloudflare API token with D1, KV, R2, DNS, Workers Scripts, Workers Observability, AI Gateway, Workers Routes, Pages, and SSL/Certificates permissions | | `CF_ACCOUNT_ID` | Your Cloudflare account ID (32-char hex). Also used as a Worker secret for the admin observability log viewer. | +| `DEVCONTAINER_CACHE_CLOUDFLARE_API_TOKEN` | Optional narrower Cloudflare API token for minting managed Containers Registry credentials. Falls back to `CF_API_TOKEN` when unset. | +| `DEVCONTAINER_CACHE_CLOUDFLARE_ACCOUNT_ID` | Optional Cloudflare account ID for the managed Containers Registry cache. Falls back to `CF_ACCOUNT_ID` when unset. | | `CF_ZONE_ID` | Your domain's zone ID (32-char hex) | | `R2_ACCESS_KEY_ID` | R2 API token access key | | `R2_SECRET_ACCESS_KEY` | R2 API token secret key | @@ -697,6 +710,9 @@ cd apps/api wrangler secret put CF_API_TOKEN wrangler secret put CF_ACCOUNT_ID wrangler secret put CF_ZONE_ID +# Optional: use narrower credentials for Cloudflare managed devcontainer cache. +wrangler secret put DEVCONTAINER_CACHE_CLOUDFLARE_API_TOKEN +wrangler secret put DEVCONTAINER_CACHE_CLOUDFLARE_ACCOUNT_ID wrangler secret put GITHUB_CLIENT_ID wrangler secret put GITHUB_CLIENT_SECRET wrangler secret put GITHUB_APP_ID diff --git a/docs/notes/2026-05-11-cloudflare-devcontainer-cache-experiments.md b/docs/notes/2026-05-11-cloudflare-devcontainer-cache-experiments.md index 5d17d0d8a..701205a91 100644 --- a/docs/notes/2026-05-11-cloudflare-devcontainer-cache-experiments.md +++ b/docs/notes/2026-05-11-cloudflare-devcontainer-cache-experiments.md @@ -159,8 +159,8 @@ Implementation direction: 1. Generate short-lived Cloudflare registry credentials in the API/control plane. 2. Pass `DEVCONTAINER_CACHE_REGISTRY=registry.cloudflare.com`, - `DEVCONTAINER_CACHE_USERNAME`, and `DEVCONTAINER_CACHE_TOKEN` to the VM agent - bootstrap environment. + `DEVCONTAINER_CACHE_USERNAME`, `DEVCONTAINER_CACHE_PASSWORD`, and + `DEVCONTAINER_CACHE_REF` to the VM agent bootstrap environment. 3. Change cache refs from `ghcr.io//:devcontainer-cache` to `registry.cloudflare.com//-:devcontainer-cache`. 4. Keep the existing VM agent `docker pull`, `cacheFrom`, `docker tag`, and diff --git a/packages/vm-agent/.env.example b/packages/vm-agent/.env.example index 27dcb5812..c5161aab6 100644 --- a/packages/vm-agent/.env.example +++ b/packages/vm-agent/.env.example @@ -55,13 +55,17 @@ LOG_STREAM_PONG_TIMEOUT=90s # FILE_DOWNLOAD_TIMEOUT=60s # Timeout for download operations (default: 60s) # FILE_DOWNLOAD_MAX_BYTES=52428800 # Max single file download size (default: 50MB) -# Devcontainer image caching (GHCR) -# Set to true to enable opportunistic devcontainer image caching via GHCR. -# Requires the GitHub App to have packages:write permission. +# Devcontainer image caching +# Set to true to enable opportunistic devcontainer image caching. +# The API can pass short-lived Cloudflare managed registry credentials per workspace. DEVCONTAINER_CACHE_ENABLED=false # Container registry to use for cache images (default: ghcr.io) DEVCONTAINER_CACHE_REGISTRY=ghcr.io +# Optional explicit cache credentials/ref supplied by the control plane. +# DEVCONTAINER_CACHE_USERNAME= +# DEVCONTAINER_CACHE_PASSWORD= +# DEVCONTAINER_CACHE_REF= # System info collection timeouts SYSINFO_DOCKER_LIST_TIMEOUT=10s -SYSINFO_DOCKER_STATS_TIMEOUT=10s \ No newline at end of file +SYSINFO_DOCKER_STATS_TIMEOUT=10s diff --git a/packages/vm-agent/internal/bootstrap/bootstrap.go b/packages/vm-agent/internal/bootstrap/bootstrap.go index 192850fd4..c6f0dcb88 100644 --- a/packages/vm-agent/internal/bootstrap/bootstrap.go +++ b/packages/vm-agent/internal/bootstrap/bootstrap.go @@ -338,21 +338,17 @@ func PrepareWorkspace(ctx context.Context, cfg *config.Config, state ProvisionSt }() } - // Resolve devcontainer cache ref (best-effort, only for non-lightweight GitHub repos). + // Resolve devcontainer cache ref (best-effort, only for non-lightweight workspaces). cacheRef := "" - if cfg.DevcontainerCacheEnabled && !state.Lightweight && bootstrap.GitHubToken != "" { - owner, repo, ok := cache.ParseGitHubRepo(cfg.Repository) - if ok { - cacheRef = cache.CacheRef(cfg.DevcontainerCacheRegistry, owner, repo, state.DevcontainerConfigName) - // Best-effort login to the cache registry. - if loginErr := cache.DockerLogin(ctx, cfg.DevcontainerCacheRegistry, "x-access-token", bootstrap.GitHubToken); loginErr != nil { - slog.Warn("Cache registry login failed (caching disabled for this build)", "registry", cfg.DevcontainerCacheRegistry, "error", loginErr) - cacheRef = "" // Disable caching if login fails. - } else { - reporter.Log("devcontainer_cache", "started", "Checking devcontainer cache") - } - } else { - slog.Info("Devcontainer caching disabled: not a GitHub repository", "repository", cfg.Repository) + if cfg.DevcontainerCacheEnabled && !state.Lightweight { + var cacheErr error + cacheRef, cacheErr = prepareDevcontainerCache(ctx, cfg, bootstrap.GitHubToken, state.DevcontainerConfigName) + if cacheErr != nil { + slog.Warn("Cache registry login failed (caching disabled for this build)", "registry", cfg.DevcontainerCacheRegistry, "error", cacheErr) + cacheRef = "" + } + if cacheRef != "" { + reporter.Log("devcontainer_cache", "started", "Checking devcontainer cache") } } @@ -456,6 +452,34 @@ func PrepareWorkspace(ctx context.Context, cfg *config.Config, state ProvisionSt return recoveryMode, nil } +func prepareDevcontainerCache(ctx context.Context, cfg *config.Config, githubToken, devcontainerConfigName string) (string, error) { + if cfg.DevcontainerCacheRef != "" { + if cfg.DevcontainerCachePassword == "" { + return "", fmt.Errorf("cache password is required when DEVCONTAINER_CACHE_REF is set") + } + if err := cache.DockerLogin(ctx, cfg.DevcontainerCacheRegistry, cfg.DevcontainerCacheUsername, cfg.DevcontainerCachePassword); err != nil { + return "", err + } + return cfg.DevcontainerCacheRef, nil + } + + githubToken = strings.TrimSpace(githubToken) + if githubToken == "" { + return "", nil + } + owner, repo, ok := cache.ParseGitHubRepo(cfg.Repository) + if !ok { + slog.Info("Devcontainer caching disabled: not a GitHub repository", "repository", cfg.Repository) + return "", nil + } + + cacheRef := cache.CacheRef(cfg.DevcontainerCacheRegistry, owner, repo, devcontainerConfigName) + if err := cache.DockerLogin(ctx, cfg.DevcontainerCacheRegistry, "x-access-token", githubToken); err != nil { + return "", err + } + return cacheRef, nil +} + // ensureVolumeReady creates a Docker named volume for the workspace if it doesn't // already exist. The volume persists across container rebuilds and is deleted when // the workspace is deleted. diff --git a/packages/vm-agent/internal/cache/cache.go b/packages/vm-agent/internal/cache/cache.go index 569a65a0f..6d35277b9 100644 --- a/packages/vm-agent/internal/cache/cache.go +++ b/packages/vm-agent/internal/cache/cache.go @@ -97,11 +97,23 @@ func DockerLogin(ctx context.Context, registry, username, token string) error { output, err := cmd.CombinedOutput() if err != nil { - return fmt.Errorf("docker login failed: %w: %s", err, strings.TrimSpace(string(output))) + return fmt.Errorf("docker login failed: %w: %s", err, redactSensitive(strings.TrimSpace(string(output)), token)) } return nil } +func redactSensitive(message string, values ...string) string { + redacted := message + for _, value := range values { + value = strings.TrimSpace(value) + if value == "" { + continue + } + redacted = strings.ReplaceAll(redacted, value, "[redacted]") + } + return redacted +} + // PullCacheImage pulls a cache image from the registry. // Returns an error if the pull fails (caller decides whether this is fatal). func PullCacheImage(ctx context.Context, ref string) error { diff --git a/packages/vm-agent/internal/cache/cache_test.go b/packages/vm-agent/internal/cache/cache_test.go index 75bcfcd6d..6305d78e8 100644 --- a/packages/vm-agent/internal/cache/cache_test.go +++ b/packages/vm-agent/internal/cache/cache_test.go @@ -48,34 +48,34 @@ func TestParseGitHubRepo(t *testing.T) { wantOK: true, }, { - name: "non-GitHub HTTPS URL", + name: "non-GitHub HTTPS URL", repoURL: "https://gitlab.com/octocat/hello-world.git", - wantOK: false, + wantOK: false, }, { - name: "empty string", + name: "empty string", repoURL: "", - wantOK: false, + wantOK: false, }, { - name: "whitespace only", + name: "whitespace only", repoURL: " ", - wantOK: false, + wantOK: false, }, { - name: "single segment", + name: "single segment", repoURL: "hello-world", - wantOK: false, + wantOK: false, }, { - name: "empty owner", + name: "empty owner", repoURL: "/hello-world", - wantOK: false, + wantOK: false, }, { - name: "empty repo", + name: "empty repo", repoURL: "octocat/", - wantOK: false, + wantOK: false, }, { name: "https URL with trailing slash", @@ -187,3 +187,15 @@ func TestCacheRef(t *testing.T) { }) } } + +func TestRedactSensitive(t *testing.T) { + got := redactSensitive("login failed for secret-token", "secret-token") + if got != "login failed for [redacted]" { + t.Fatalf("redactSensitive() = %q", got) + } + + got = redactSensitive("nothing to redact", "") + if got != "nothing to redact" { + t.Fatalf("redactSensitive() with empty value = %q", got) + } +} diff --git a/packages/vm-agent/internal/config/config.go b/packages/vm-agent/internal/config/config.go index 0cc21dce5..ced349e50 100644 --- a/packages/vm-agent/internal/config/config.go +++ b/packages/vm-agent/internal/config/config.go @@ -73,10 +73,10 @@ type Config struct { HeartbeatInterval time.Duration // HTTP server timeouts - HTTPReadTimeout time.Duration - HTTPWriteTimeout time.Duration - HTTPIdleTimeout time.Duration - HTTPCallbackTimeout time.Duration // timeout for outbound HTTP callbacks to the control plane + HTTPReadTimeout time.Duration + HTTPWriteTimeout time.Duration + HTTPIdleTimeout time.Duration + HTTPCallbackTimeout time.Duration // timeout for outbound HTTP callbacks to the control plane // WebSocket settings WSReadBufferSize int @@ -96,16 +96,16 @@ type Config struct { ACPInitializeTimeoutMs int // Per-phase timeout for Initialize RPC; 0 = use ACPInitTimeoutMs (default: 0) ACPNewSessionTimeoutMs int // Per-phase timeout for NewSession RPC; 0 = use ACPInitTimeoutMs (default: 0) ACPLoadSessionTimeoutMs int // Per-phase timeout for LoadSession RPC; 0 = use ACPInitTimeoutMs (default: 0) - ACPReconnectDelayMs int - ACPReconnectTimeoutMs int - ACPMaxRestartAttempts int - ACPMessageBufferSize int // Max buffered messages per SessionHost for late-join replay - ACPViewerSendBuffer int // Per-viewer send channel buffer size - ACPPingInterval time.Duration // WebSocket ping interval (default: 30s) - ACPPongTimeout time.Duration // WebSocket pong deadline after ping (default: 10s) - ACPPromptTimeout time.Duration // Max prompt runtime; 0 = no timeout (default: 0). Used for workspace sessions; task sessions use ACPTaskPromptTimeout via effectivePromptTimeout(). - ACPTaskPromptTimeout time.Duration // Max prompt runtime for task-driven sessions; 0 = no timeout (default: 6h) - ACPPromptCancelGrace time.Duration // Wait after cancel before force-stop fallback (default: 5s) + ACPReconnectDelayMs int + ACPReconnectTimeoutMs int + ACPMaxRestartAttempts int + ACPMessageBufferSize int // Max buffered messages per SessionHost for late-join replay + ACPViewerSendBuffer int // Per-viewer send channel buffer size + ACPPingInterval time.Duration // WebSocket ping interval (default: 30s) + ACPPongTimeout time.Duration // WebSocket pong deadline after ping (default: 10s) + ACPPromptTimeout time.Duration // Max prompt runtime; 0 = no timeout (default: 0). Used for workspace sessions; task sessions use ACPTaskPromptTimeout via effectivePromptTimeout(). + ACPTaskPromptTimeout time.Duration // Max prompt runtime for task-driven sessions; 0 = no timeout (default: 6h) + ACPPromptCancelGrace time.Duration // Wait after cancel before force-stop fallback (default: 5s) ACPIdleSuspendTimeout time.Duration // Auto-suspend after this idle duration with no viewers (default: 30m, 0=disabled) ACPNotifSerializeTimeout time.Duration // Max wait for previous notification processing before delivering next (default: 5s) ACPHeartbeatInterval time.Duration // Interval for direct ACP session heartbeats to control plane (default: 60s, env: ACP_HEARTBEAT_INTERVAL) @@ -141,6 +141,9 @@ type Config struct { // Configurable per constitution principle XI. DevcontainerCacheEnabled bool // Enable devcontainer image caching (env: DEVCONTAINER_CACHE_ENABLED, default: false) DevcontainerCacheRegistry string // Container registry for cache images (env: DEVCONTAINER_CACHE_REGISTRY, default: ghcr.io) + DevcontainerCacheUsername string // Optional registry username (env: DEVCONTAINER_CACHE_USERNAME) + DevcontainerCachePassword string // Optional registry password/token (env: DEVCONTAINER_CACHE_PASSWORD) + DevcontainerCacheRef string // Optional full cache image ref (env: DEVCONTAINER_CACHE_REF) // Cloud provider — used for provider-specific optimizations (apt mirrors, etc.) Provider string // Cloud provider name (env: PROVIDER, e.g. "hetzner", "scaleway", "gcp") @@ -153,9 +156,9 @@ type Config struct { TaskMode string // Task execution mode: "task" or "conversation" (env: TASK_MODE, default: "task") // Persistence settings - configurable per constitution principle XI - PersistenceDBPath string // SQLite database path for session state persistence - EventStoreDBPath string // SQLite database path for persistent event logs - MetricsDBPath string // SQLite database path for resource metrics snapshots + PersistenceDBPath string // SQLite database path for session state persistence + EventStoreDBPath string // SQLite database path for persistent event logs + MetricsDBPath string // SQLite database path for resource metrics snapshots MetricsInterval time.Duration // Resource metrics collection interval (default: 1m) // Git integration settings - configurable per constitution principle XI @@ -195,7 +198,7 @@ type Config struct { SysInfoCacheTTL time.Duration // Cache TTL for system info responses (default: 5s) // Log reader/stream settings - configurable per constitution principle XI - LogReaderTimeout time.Duration // Timeout for journalctl read commands (default: 30s) + LogReaderTimeout time.Duration // Timeout for journalctl read commands (default: 30s) LogStreamPingInterval time.Duration // WebSocket ping interval for log stream (default: 30s) LogStreamPongTimeout time.Duration // WebSocket pong deadline for log stream (default: 90s) @@ -295,20 +298,20 @@ func Load() (*Config, error) { PTYOutputBufferSize: getEnvInt("PTY_OUTPUT_BUFFER_SIZE", 262144), // 256 KB default // ACP settings - configurable per constitution principle XI - ACPInitTimeoutMs: getEnvInt("ACP_INIT_TIMEOUT_MS", 30000), - ACPInitializeTimeoutMs: getEnvInt("ACP_INITIALIZE_TIMEOUT_MS", 0), // 0 = use ACPInitTimeoutMs - ACPNewSessionTimeoutMs: getEnvInt("ACP_NEW_SESSION_TIMEOUT_MS", 0), // 0 = use ACPInitTimeoutMs - ACPLoadSessionTimeoutMs: getEnvInt("ACP_LOAD_SESSION_TIMEOUT_MS", 0), // 0 = use ACPInitTimeoutMs - ACPReconnectDelayMs: getEnvInt("ACP_RECONNECT_DELAY_MS", 2000), - ACPReconnectTimeoutMs: getEnvInt("ACP_RECONNECT_TIMEOUT_MS", 30000), - ACPMaxRestartAttempts: getEnvInt("ACP_MAX_RESTART_ATTEMPTS", 3), - ACPMessageBufferSize: getEnvInt("ACP_MESSAGE_BUFFER_SIZE", 5000), - ACPViewerSendBuffer: getEnvInt("ACP_VIEWER_SEND_BUFFER", 256), - ACPPingInterval: getEnvDuration("ACP_PING_INTERVAL", 30*time.Second), - ACPPongTimeout: getEnvDuration("ACP_PONG_TIMEOUT", 10*time.Second), - ACPPromptTimeout: getEnvDuration("ACP_PROMPT_TIMEOUT", 0), - ACPTaskPromptTimeout: getEnvDuration("ACP_TASK_PROMPT_TIMEOUT", 6*time.Hour), - ACPPromptCancelGrace: getEnvDuration("ACP_PROMPT_CANCEL_GRACE_PERIOD", 5*time.Second), + ACPInitTimeoutMs: getEnvInt("ACP_INIT_TIMEOUT_MS", 30000), + ACPInitializeTimeoutMs: getEnvInt("ACP_INITIALIZE_TIMEOUT_MS", 0), // 0 = use ACPInitTimeoutMs + ACPNewSessionTimeoutMs: getEnvInt("ACP_NEW_SESSION_TIMEOUT_MS", 0), // 0 = use ACPInitTimeoutMs + ACPLoadSessionTimeoutMs: getEnvInt("ACP_LOAD_SESSION_TIMEOUT_MS", 0), // 0 = use ACPInitTimeoutMs + ACPReconnectDelayMs: getEnvInt("ACP_RECONNECT_DELAY_MS", 2000), + ACPReconnectTimeoutMs: getEnvInt("ACP_RECONNECT_TIMEOUT_MS", 30000), + ACPMaxRestartAttempts: getEnvInt("ACP_MAX_RESTART_ATTEMPTS", 3), + ACPMessageBufferSize: getEnvInt("ACP_MESSAGE_BUFFER_SIZE", 5000), + ACPViewerSendBuffer: getEnvInt("ACP_VIEWER_SEND_BUFFER", 256), + ACPPingInterval: getEnvDuration("ACP_PING_INTERVAL", 30*time.Second), + ACPPongTimeout: getEnvDuration("ACP_PONG_TIMEOUT", 10*time.Second), + ACPPromptTimeout: getEnvDuration("ACP_PROMPT_TIMEOUT", 0), + ACPTaskPromptTimeout: getEnvDuration("ACP_TASK_PROMPT_TIMEOUT", 6*time.Hour), + ACPPromptCancelGrace: getEnvDuration("ACP_PROMPT_CANCEL_GRACE_PERIOD", 5*time.Second), ACPIdleSuspendTimeout: getEnvDuration("ACP_IDLE_SUSPEND_TIMEOUT", 30*time.Minute), ACPNotifSerializeTimeout: getEnvDuration("ACP_NOTIF_SERIALIZE_TIMEOUT", 5*time.Second), ACPHeartbeatInterval: getEnvDuration("ACP_HEARTBEAT_INTERVAL", 60*time.Second), @@ -342,6 +345,9 @@ func Load() (*Config, error) { // Devcontainer cache settings — opportunistic image caching. DevcontainerCacheEnabled: getEnvBool("DEVCONTAINER_CACHE_ENABLED", false), DevcontainerCacheRegistry: getEnv("DEVCONTAINER_CACHE_REGISTRY", "ghcr.io"), + DevcontainerCacheUsername: getEnv("DEVCONTAINER_CACHE_USERNAME", ""), + DevcontainerCachePassword: getEnv("DEVCONTAINER_CACHE_PASSWORD", ""), + DevcontainerCacheRef: getEnv("DEVCONTAINER_CACHE_REF", ""), // Cloud provider (set via cloud-init) Provider: getEnv("PROVIDER", ""), @@ -375,10 +381,10 @@ func Load() (*Config, error) { // File transfer settings FileUploadMaxBytes: getEnvInt64("FILE_UPLOAD_MAX_BYTES", 50*1024*1024), // 50 MB - FileUploadBatchMaxBytes: getEnvInt64("FILE_UPLOAD_BATCH_MAX_BYTES", 250*1024*1024), // 250 MB + FileUploadBatchMaxBytes: getEnvInt64("FILE_UPLOAD_BATCH_MAX_BYTES", 250*1024*1024), // 250 MB FileUploadTimeout: getEnvDuration("FILE_UPLOAD_TIMEOUT", 120*time.Second), FileDownloadTimeout: getEnvDuration("FILE_DOWNLOAD_TIMEOUT", 60*time.Second), - FileDownloadMaxBytes: getEnvInt64("FILE_DOWNLOAD_MAX_BYTES", 50*1024*1024), // 50 MB + FileDownloadMaxBytes: getEnvInt64("FILE_DOWNLOAD_MAX_BYTES", 50*1024*1024), // 50 MB // Callback retry settings - configurable per constitution principle XI WorkspaceReadyCallbackTimeout: getEnvDuration("WORKSPACE_READY_CALLBACK_TIMEOUT", 10*time.Second), @@ -615,4 +621,3 @@ func deriveAllowedOrigins(controlPlaneURL string) []string { "https://*." + baseDomain, // Allow workspace subdomains } } - diff --git a/packages/vm-agent/internal/config/config_test.go b/packages/vm-agent/internal/config/config_test.go index acb14dd4d..1cb697cb4 100644 --- a/packages/vm-agent/internal/config/config_test.go +++ b/packages/vm-agent/internal/config/config_test.go @@ -118,6 +118,36 @@ func TestAdditionalFeaturesOverride(t *testing.T) { } } +func TestLoadDevcontainerCacheCredentials(t *testing.T) { + t.Setenv("CONTROL_PLANE_URL", "https://api.example.com") + t.Setenv("WORKSPACE_ID", "ws-123") + t.Setenv("DEVCONTAINER_CACHE_ENABLED", "true") + t.Setenv("DEVCONTAINER_CACHE_REGISTRY", "registry.cloudflare.com") + t.Setenv("DEVCONTAINER_CACHE_USERNAME", "cache-user") + t.Setenv("DEVCONTAINER_CACHE_PASSWORD", "cache-password") + t.Setenv("DEVCONTAINER_CACHE_REF", "registry.cloudflare.com/acct/octo-repo:devcontainer-cache") + + cfg, err := Load() + if err != nil { + t.Fatalf("Load returned error: %v", err) + } + if !cfg.DevcontainerCacheEnabled { + t.Fatal("DevcontainerCacheEnabled = false, want true") + } + if cfg.DevcontainerCacheRegistry != "registry.cloudflare.com" { + t.Fatalf("DevcontainerCacheRegistry = %q", cfg.DevcontainerCacheRegistry) + } + if cfg.DevcontainerCacheUsername != "cache-user" { + t.Fatalf("DevcontainerCacheUsername = %q", cfg.DevcontainerCacheUsername) + } + if cfg.DevcontainerCachePassword != "cache-password" { + t.Fatalf("DevcontainerCachePassword = %q", cfg.DevcontainerCachePassword) + } + if cfg.DevcontainerCacheRef != "registry.cloudflare.com/acct/octo-repo:devcontainer-cache" { + t.Fatalf("DevcontainerCacheRef = %q", cfg.DevcontainerCacheRef) + } +} + func TestLoadDefaultsContainerUserEmpty(t *testing.T) { t.Setenv("CONTROL_PLANE_URL", "https://api.example.com") t.Setenv("WORKSPACE_ID", "ws-123") @@ -460,12 +490,12 @@ func splitFirst(s, sep string) []string { // validConfig returns a Config with all required fields set to valid values. func validConfig() *Config { return &Config{ - Port: 8080, - ControlPlaneURL: "https://api.example.com", - NodeID: "node-1", - SessionMaxCount: 100, - DefaultRows: 24, - DefaultCols: 80, + Port: 8080, + ControlPlaneURL: "https://api.example.com", + NodeID: "node-1", + SessionMaxCount: 100, + DefaultRows: 24, + DefaultCols: 80, WSReadBufferSize: 1024, WSWriteBufferSize: 1024, } diff --git a/packages/vm-agent/internal/server/server.go b/packages/vm-agent/internal/server/server.go index fb3b27191..944690115 100644 --- a/packages/vm-agent/internal/server/server.go +++ b/packages/vm-agent/internal/server/server.go @@ -132,6 +132,7 @@ type WorkspaceRuntime struct { GitHubID string Lightweight bool // Skip devcontainer build, use fallback image for faster startup DevcontainerConfigName string // Named devcontainer config (subdirectory under .devcontainer/) + DevcontainerCache DevcontainerCacheCredentials PTY *pty.Manager // ReadyCallbackPending is true when the workspace provisioned successfully but @@ -144,6 +145,13 @@ type WorkspaceRuntime struct { ReadyCallbackStatus string } +type DevcontainerCacheCredentials struct { + Registry string + Username string + Password string + Ref string +} + type EventRecord struct { ID string `json:"id"` NodeID string `json:"nodeId,omitempty"` diff --git a/packages/vm-agent/internal/server/workspace_provisioning.go b/packages/vm-agent/internal/server/workspace_provisioning.go index 0b98585b2..46d5202c6 100644 --- a/packages/vm-agent/internal/server/workspace_provisioning.go +++ b/packages/vm-agent/internal/server/workspace_provisioning.go @@ -14,6 +14,7 @@ import ( "github.com/workspace/vm-agent/internal/bootlog" "github.com/workspace/vm-agent/internal/bootstrap" + "github.com/workspace/vm-agent/internal/config" ) var prepareWorkspaceForRuntime = bootstrap.PrepareWorkspace // returns (recoveryMode bool, error) @@ -80,6 +81,7 @@ func (s *Server) provisionWorkspaceRuntime(ctx context.Context, runtime *Workspa cfg.ContainerUser = strings.TrimSpace(s.config.ContainerUser) } cfg.CallbackToken = callbackToken + applyDevcontainerCacheCredentials(&cfg, runtime.DevcontainerCache) provisionCtx := ctx cancel := func() {} @@ -162,6 +164,7 @@ func (s *Server) recoverWorkspaceRuntime(ctx context.Context, runtime *Workspace cfg.ContainerUser = strings.TrimSpace(s.config.ContainerUser) } cfg.CallbackToken = callbackToken + applyDevcontainerCacheCredentials(&cfg, runtime.DevcontainerCache) state := bootstrap.ProvisionState{} if cfg.Repository != "" && callbackToken != "" { @@ -190,6 +193,19 @@ func (s *Server) recoverWorkspaceRuntime(ctx context.Context, runtime *Workspace return nil } +func applyDevcontainerCacheCredentials(cfg *config.Config, credentials DevcontainerCacheCredentials) { + if cfg == nil || credentials.Ref == "" { + return + } + cfg.DevcontainerCacheEnabled = true + if credentials.Registry != "" { + cfg.DevcontainerCacheRegistry = credentials.Registry + } + cfg.DevcontainerCacheUsername = credentials.Username + cfg.DevcontainerCachePassword = credentials.Password + cfg.DevcontainerCacheRef = credentials.Ref +} + func (s *Server) hydrateWorkspaceRuntimeForRecovery( ctx context.Context, runtime *WorkspaceRuntime, diff --git a/packages/vm-agent/internal/server/workspace_provisioning_test.go b/packages/vm-agent/internal/server/workspace_provisioning_test.go index 3e560053b..48e312745 100644 --- a/packages/vm-agent/internal/server/workspace_provisioning_test.go +++ b/packages/vm-agent/internal/server/workspace_provisioning_test.go @@ -83,6 +83,12 @@ func TestRecoverWorkspaceRuntimeUsesRuntimeConfig(t *testing.T) { ContainerWorkDir: "/workspaces/WS_TEST", ContainerUser: "node", CallbackToken: "workspace-callback-token", + DevcontainerCache: DevcontainerCacheCredentials{ + Registry: "registry.cloudflare.com", + Username: "cache-user", + Password: "cache-password", + Ref: "registry.cloudflare.com/acct/octo-repo:devcontainer-cache", + }, } s := &Server{ @@ -120,6 +126,21 @@ func TestRecoverWorkspaceRuntimeUsesRuntimeConfig(t *testing.T) { if capturedCfg.CallbackToken != runtime.CallbackToken { t.Fatalf("CallbackToken = %q, want %q", capturedCfg.CallbackToken, runtime.CallbackToken) } + if !capturedCfg.DevcontainerCacheEnabled { + t.Fatal("DevcontainerCacheEnabled = false, want true") + } + if capturedCfg.DevcontainerCacheRegistry != runtime.DevcontainerCache.Registry { + t.Fatalf("DevcontainerCacheRegistry = %q, want %q", capturedCfg.DevcontainerCacheRegistry, runtime.DevcontainerCache.Registry) + } + if capturedCfg.DevcontainerCacheUsername != runtime.DevcontainerCache.Username { + t.Fatalf("DevcontainerCacheUsername = %q, want %q", capturedCfg.DevcontainerCacheUsername, runtime.DevcontainerCache.Username) + } + if capturedCfg.DevcontainerCachePassword != runtime.DevcontainerCache.Password { + t.Fatalf("DevcontainerCachePassword = %q, want %q", capturedCfg.DevcontainerCachePassword, runtime.DevcontainerCache.Password) + } + if capturedCfg.DevcontainerCacheRef != runtime.DevcontainerCache.Ref { + t.Fatalf("DevcontainerCacheRef = %q, want %q", capturedCfg.DevcontainerCacheRef, runtime.DevcontainerCache.Ref) + } if capturedState.GitHubToken != "" { t.Fatalf("expected empty recovery git token for empty repository, got %q", capturedState.GitHubToken) } diff --git a/packages/vm-agent/internal/server/workspace_routing.go b/packages/vm-agent/internal/server/workspace_routing.go index 33f0d23f5..7c944fe5e 100644 --- a/packages/vm-agent/internal/server/workspace_routing.go +++ b/packages/vm-agent/internal/server/workspace_routing.go @@ -36,6 +36,7 @@ type workspaceRuntimeOpts struct { GitHubID string Lightweight bool DevcontainerConfigName string + DevcontainerCache DevcontainerCacheCredentials } func (s *Server) routedNodeID(r *http.Request) string { @@ -224,6 +225,9 @@ func (s *Server) upsertWorkspaceRuntime(workspaceID, repository, branch, status, if opt.DevcontainerConfigName != "" { runtime.DevcontainerConfigName = opt.DevcontainerConfigName } + if opt.DevcontainerCache.Ref != "" { + runtime.DevcontainerCache = opt.DevcontainerCache + } runtime.UpdatedAt = time.Now().UTC() if metadataChanged && runtime.Repository != "" { @@ -283,23 +287,24 @@ func (s *Server) upsertWorkspaceRuntime(workspaceID, repository, branch, status, manager := s.newPTYManagerForWorkspace(workspaceID, workspaceDir, containerWorkDir, containerLabelValue, containerUser) runtime = &WorkspaceRuntime{ - ID: workspaceID, - Repository: effectiveRepo, - Branch: effectiveBranch, - Status: status, - CreatedAt: time.Now().UTC(), - UpdatedAt: time.Now().UTC(), - WorkspaceDir: workspaceDir, - ContainerLabelValue: containerLabelValue, - ContainerWorkDir: containerWorkDir, - ContainerUser: containerUser, - CallbackToken: strings.TrimSpace(callbackToken), - GitUserName: opt.GitUserName, - GitUserEmail: opt.GitUserEmail, - GitHubID: opt.GitHubID, - Lightweight: opt.Lightweight || persistedLightweight, - DevcontainerConfigName: firstNonEmpty(opt.DevcontainerConfigName, persistedDevcontainerConfigName), - PTY: manager, + ID: workspaceID, + Repository: effectiveRepo, + Branch: effectiveBranch, + Status: status, + CreatedAt: time.Now().UTC(), + UpdatedAt: time.Now().UTC(), + WorkspaceDir: workspaceDir, + ContainerLabelValue: containerLabelValue, + ContainerWorkDir: containerWorkDir, + ContainerUser: containerUser, + CallbackToken: strings.TrimSpace(callbackToken), + GitUserName: opt.GitUserName, + GitUserEmail: opt.GitUserEmail, + GitHubID: opt.GitHubID, + Lightweight: opt.Lightweight || persistedLightweight, + DevcontainerConfigName: firstNonEmpty(opt.DevcontainerConfigName, persistedDevcontainerConfigName), + DevcontainerCache: opt.DevcontainerCache, + PTY: manager, } s.workspaces[workspaceID] = runtime diff --git a/packages/vm-agent/internal/server/workspaces.go b/packages/vm-agent/internal/server/workspaces.go index f02eaef94..d5251c6aa 100644 --- a/packages/vm-agent/internal/server/workspaces.go +++ b/packages/vm-agent/internal/server/workspaces.go @@ -362,6 +362,12 @@ func (s *Server) handleCreateWorkspace(w http.ResponseWriter, r *http.Request) { GitHubID string `json:"githubId,omitempty"` Lightweight bool `json:"lightweight,omitempty"` DevcontainerConfigName string `json:"devcontainerConfigName,omitempty"` + DevcontainerCache struct { + Registry string `json:"registry,omitempty"` + Username string `json:"username,omitempty"` + Password string `json:"password,omitempty"` + Ref string `json:"ref,omitempty"` + } `json:"devcontainerCache,omitempty"` } if err := json.NewDecoder(r.Body).Decode(&body); err != nil { @@ -397,6 +403,12 @@ func (s *Server) handleCreateWorkspace(w http.ResponseWriter, r *http.Request) { GitHubID: strings.TrimSpace(body.GitHubID), Lightweight: body.Lightweight, DevcontainerConfigName: strings.TrimSpace(body.DevcontainerConfigName), + DevcontainerCache: DevcontainerCacheCredentials{ + Registry: strings.TrimSpace(body.DevcontainerCache.Registry), + Username: strings.TrimSpace(body.DevcontainerCache.Username), + Password: strings.TrimSpace(body.DevcontainerCache.Password), + Ref: strings.TrimSpace(body.DevcontainerCache.Ref), + }, }) // Note: Per-workspace message reporter is created lazily in diff --git a/scripts/deploy/configure-secrets.sh b/scripts/deploy/configure-secrets.sh index a22f435b4..683c0785c 100644 --- a/scripts/deploy/configure-secrets.sh +++ b/scripts/deploy/configure-secrets.sh @@ -120,6 +120,10 @@ set_worker_secret "GITHUB_WEBHOOK_SECRET" "${GH_WEBHOOK_SECRET:-${GITHUB_WEBHOOK set_worker_secret "CF_API_TOKEN" "${CF_API_TOKEN:-}" "$ENVIRONMENT" "true" || FAILED=true set_worker_secret "CF_ZONE_ID" "${CF_ZONE_ID:-}" "$ENVIRONMENT" "true" || FAILED=true set_worker_secret "CF_ACCOUNT_ID" "${CF_ACCOUNT_ID:-}" "$ENVIRONMENT" "true" || FAILED=true +# Optional: use a narrower Cloudflare token/account for managed Containers Registry +# devcontainer cache credentials. Falls back to CF_API_TOKEN/CF_ACCOUNT_ID when unset. +set_worker_secret "DEVCONTAINER_CACHE_CLOUDFLARE_API_TOKEN" "${DEVCONTAINER_CACHE_CLOUDFLARE_API_TOKEN:-}" "$ENVIRONMENT" "false" +set_worker_secret "DEVCONTAINER_CACHE_CLOUDFLARE_ACCOUNT_ID" "${DEVCONTAINER_CACHE_CLOUDFLARE_ACCOUNT_ID:-}" "$ENVIRONMENT" "false" # Configure GitHub secrets (required - platform is useless without authentication) # GH_* env vars (GitHub Actions does not allow GITHUB_* secret names) are mapped to GITHUB_* Worker secrets. diff --git a/tasks/active/2026-05-11-cloudflare-managed-devcontainer-cache.md b/tasks/active/2026-05-11-cloudflare-managed-devcontainer-cache.md new file mode 100644 index 000000000..c17b6c455 --- /dev/null +++ b/tasks/active/2026-05-11-cloudflare-managed-devcontainer-cache.md @@ -0,0 +1,36 @@ +# Cloudflare Managed Devcontainer Cache + +## Problem + +The GHCR devcontainer cache path cannot push cache images with GitHub App installation tokens. PR #963 proved that Cloudflare managed Containers Registry supports the Docker-native pull/build/tag/push flow SAM already uses, including a real SAM devcontainer image. The production path needs to mint short-lived registry credentials in the API/control plane and pass them to the VM agent without installing Wrangler on VM nodes. + +## Research Findings + +- `packages/vm-agent/internal/cache/cache.go` already provides best-effort Docker login, pull, tag, and push helpers. +- `packages/vm-agent/internal/bootstrap/bootstrap.go` currently derives `ghcr.io//:devcontainer-cache` and logs in with the GitHub token. This needs to accept explicit registry credentials and an explicit cache ref. +- `apps/api/src/durable-objects/task-runner/workspace-steps.ts` creates workspaces through `createWorkspaceOnNode()` after node provisioning. This is the right place to mint and pass per-workspace short-lived credentials. +- `packages/vm-agent/internal/server/workspaces.go` accepts the workspace creation request and stores runtime metadata. The request body needs non-logged cache credential fields. +- `apps/api/src/env.ts`, `packages/cloud-init`, and self-hosting/deploy docs only expose `DEVCONTAINER_CACHE_ENABLED` today. +- Relevant postmortems: project credential security hardening, env-var quote stripping, and devcontainer gitconfig lock failures. + +## Checklist + +- [x] Add API service for Cloudflare managed registry config, cache ref construction, and short-lived credential minting. +- [x] Pass registry host, username, password, and cache ref from TaskRunner workspace creation to the VM agent. +- [x] Extend VM-agent config/provisioning state to use explicit cache credentials and cache ref when present. +- [x] Preserve existing fallback when Cloudflare registry config is absent. +- [x] Keep registry passwords and tokens out of logs and persisted metadata. +- [x] Add focused API tests for config absence, request construction, ref construction, and response validation. +- [x] Add focused Go tests for config parsing/cache ref precedence and no-password logging behavior. +- [x] Update env docs, self-hosting docs, and deployment secret mapping. +- [ ] Run targeted tests, then broader validation as practical. + +## Acceptance Criteria + +- No Wrangler dependency is added to VM nodes. +- Cloudflare registry credentials are minted in the API/control plane with configurable account ID, token, repository prefix, registry host, and credential TTL. +- VM agent receives Docker registry credentials and cache ref during workspace bootstrap. +- Missing Cloudflare registry config falls back to current behavior. +- Secrets are not logged or persisted in workspace metadata. +- Tests cover the new API and VM-agent behavior. +- Changes are pushed to PR #963 and the PR is not merged. From f936ecbcfc56318fb2dfb09f24547b825ec9f4ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Titsworth-Morin?= Date: Mon, 11 May 2026 14:47:39 +0000 Subject: [PATCH 11/13] docs: add devcontainer cache validation evidence --- ...1-cloudflare-managed-devcontainer-cache.md | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tasks/active/2026-05-11-cloudflare-managed-devcontainer-cache.md b/tasks/active/2026-05-11-cloudflare-managed-devcontainer-cache.md index c17b6c455..1be8b4b5e 100644 --- a/tasks/active/2026-05-11-cloudflare-managed-devcontainer-cache.md +++ b/tasks/active/2026-05-11-cloudflare-managed-devcontainer-cache.md @@ -23,7 +23,8 @@ The GHCR devcontainer cache path cannot push cache images with GitHub App instal - [x] Add focused API tests for config absence, request construction, ref construction, and response validation. - [x] Add focused Go tests for config parsing/cache ref precedence and no-password logging behavior. - [x] Update env docs, self-hosting docs, and deployment secret mapping. -- [ ] Run targeted tests, then broader validation as practical. +- [x] Run targeted tests, then broader validation as practical. +- [x] Update PR #963 with agent preflight and specialist review evidence. ## Acceptance Criteria @@ -34,3 +35,22 @@ The GHCR devcontainer cache path cannot push cache images with GitHub App instal - Secrets are not logged or persisted in workspace metadata. - Tests cover the new API and VM-agent behavior. - Changes are pushed to PR #963 and the PR is not merged. + +## Validation + +- `pnpm --filter @simple-agent-manager/shared build` +- `pnpm --filter @simple-agent-manager/providers build` +- `pnpm --filter @simple-agent-manager/cloud-init build` +- `pnpm --filter @simple-agent-manager/api test -- tests/unit/services/devcontainer-cache.test.ts` +- `pnpm --filter @simple-agent-manager/api typecheck` +- `pnpm --filter @simple-agent-manager/api build` +- `pnpm --filter @simple-agent-manager/api lint` (passed with existing warnings, 0 errors) +- `pnpm --filter @simple-agent-manager/api exec eslint src/services/devcontainer-cache.ts tests/unit/services/devcontainer-cache.test.ts src/durable-objects/task-runner/workspace-steps.ts src/services/node-agent.ts` +- `go test ./internal/config ./internal/cache ./internal/server` +- `go test ./...` +- `git diff --check` + +## PR Evidence + +- PR #963 body updated with agent preflight evidence, cross-component data flow, validation, staging caveat, and specialist review evidence. +- Implementation pushed in commit `0af2c518` and follow-up evidence commit. From 5aa4b6326dbcdc5036b3154f0e394dc61c7b90f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Titsworth-Morin?= Date: Mon, 11 May 2026 14:58:35 +0000 Subject: [PATCH 12/13] refactor: split task runner workspace creation --- .../task-runner/workspace-steps.ts | 332 ++++++++++-------- 1 file changed, 195 insertions(+), 137 deletions(-) diff --git a/apps/api/src/durable-objects/task-runner/workspace-steps.ts b/apps/api/src/durable-objects/task-runner/workspace-steps.ts index 1cee1fa65..3b4ac8880 100644 --- a/apps/api/src/durable-objects/task-runner/workspace-steps.ts +++ b/apps/api/src/durable-objects/task-runner/workspace-steps.ts @@ -6,6 +6,7 @@ import { DEFAULT_WORKSPACE_PROFILE } from '@simple-agent-manager/shared'; import { log } from '../../lib/logger'; +import type { DevcontainerCacheCredentials } from '../../services/devcontainer-cache'; import { ensureSessionLinked } from './state-machine'; import type { TaskRunnerContext, TaskRunnerState } from './types'; @@ -23,34 +24,10 @@ export async function handleWorkspaceCreation( throw new Error('No nodeId in state — cannot create workspace'); } - // If workspace already created (retry or crash recovery), skip creation. - // Check both DO state AND D1 to handle the crash window between D1 insert - // and storage.put — if D1 has a workspace_id for this task but the DO - // state doesn't, recover it. - if (!state.stepResults.workspaceId) { - const existingTask = await rc.env.DATABASE.prepare( - `SELECT workspace_id, status FROM tasks WHERE id = ?` - ).bind(state.taskId).first<{ workspace_id: string | null; status: string }>(); - - if (existingTask?.workspace_id) { - // D1 has a workspace — recover it into DO state (crash recovery) - state.stepResults.workspaceId = existingTask.workspace_id; - await rc.ctx.storage.put('state', state); - - log.info('task_runner_do.workspace_recovered_from_d1', { - taskId: state.taskId, - workspaceId: existingTask.workspace_id, - }); - } - } + await recoverWorkspaceFromD1(state, rc); if (state.stepResults.workspaceId) { - // Check if we already transitioned to delegated - const task = await rc.env.DATABASE.prepare( - `SELECT status FROM tasks WHERE id = ?` - ).bind(state.taskId).first<{ status: string }>(); - - if (task?.status === 'delegated') { + if (await isTaskDelegated(state, rc)) { // TDF-6: Ensure session linking on crash recovery — the DO may have crashed // after creating the workspace but before linking the session. await ensureSessionLinked(state, state.stepResults.workspaceId, rc); @@ -59,117 +36,7 @@ export async function handleWorkspaceCreation( } // If still queued, proceed with delegation transition below } else { - // Create workspace in D1 - const { ulid } = await import('../../lib/ulid'); - const { resolveUniqueWorkspaceDisplayName } = await import('../../services/workspace-names'); - const { drizzle } = await import('drizzle-orm/d1'); - const schema = await import('../../db/schema'); - - const db = drizzle(rc.env.DATABASE, { schema }); - const workspaceId = ulid(); - const workspaceName = `Task: ${state.config.taskTitle.slice(0, 50)}`; - const uniqueName = await resolveUniqueWorkspaceDisplayName(db, state.stepResults.nodeId, workspaceName); - const now = new Date().toISOString(); - - await db.insert(schema.workspaces).values({ - id: workspaceId, - nodeId: state.stepResults.nodeId, - projectId: state.projectId, - userId: state.userId, - installationId: state.config.installationId, - name: workspaceName, - displayName: uniqueName.displayName, - normalizedDisplayName: uniqueName.normalizedDisplayName, - repository: state.config.repository, - branch: state.config.branch, - status: 'creating', - vmSize: state.config.vmSize, - vmLocation: state.config.vmLocation, - workspaceProfile: state.config.workspaceProfile ?? DEFAULT_WORKSPACE_PROFILE, - devcontainerConfigName: state.config.devcontainerConfigName ?? null, - createdAt: now, - updatedAt: now, - }); - - // Update task with workspace ID - await rc.env.DATABASE.prepare( - `UPDATE tasks SET workspace_id = ?, updated_at = ? WHERE id = ?` - ).bind(workspaceId, now, state.taskId).run(); - - state.stepResults.workspaceId = workspaceId; - await rc.ctx.storage.put('state', state); - - // Start compute usage metering (best-effort) - try { - const { startComputeTracking } = await import('../../services/compute-usage'); - const nodeRow = await rc.env.DATABASE.prepare( - `SELECT cloud_provider, credential_source FROM nodes WHERE id = ?` - ).bind(state.stepResults.nodeId).first<{ cloud_provider: string | null; credential_source: string | null }>(); - - await startComputeTracking(db, { - userId: state.userId, - workspaceId, - nodeId: state.stepResults.nodeId, - vmSize: state.config.vmSize, - cloudProvider: nodeRow?.cloud_provider, - credentialSource: (nodeRow?.credential_source as 'user' | 'platform') ?? 'user', - }); - } catch (err) { - log.error('task_runner_do.compute_tracking_start_failed', { - taskId: state.taskId, - workspaceId, - error: err instanceof Error ? err.message : String(err), - }); - } - - // TDF-6: Link existing chat session to workspace (session created at submit time). - // No new session creation here — one session per task. Uses shared helper - // so crash recovery path also gets session linking (MEDIUM #1 fix). - await ensureSessionLinked(state, workspaceId, rc); - - // Set output_branch - const outputBranch = state.config.outputBranch || `task/${state.taskId}`; - await rc.env.DATABASE.prepare( - `UPDATE tasks SET output_branch = ?, updated_at = ? WHERE id = ?` - ).bind(outputBranch, now, state.taskId).run(); - - // Create workspace on VM agent - const { signCallbackToken } = await import('../../services/jwt'); - const { createWorkspaceOnNode } = await import('../../services/node-agent'); - const { getDevcontainerCacheCredentials } = await import('../../services/devcontainer-cache'); - - const callbackToken = await signCallbackToken(workspaceId, rc.env); - let devcontainerCache = null; - if (state.config.workspaceProfile !== 'lightweight') { - try { - devcontainerCache = await getDevcontainerCacheCredentials( - rc.env, - state.config.repository, - state.config.devcontainerConfigName - ); - } catch (err) { - log.warn('task_runner_do.devcontainer_cache_credentials_failed', { - taskId: state.taskId, - workspaceId, - error: err instanceof Error ? err.message : String(err), - }); - } - } - - await createWorkspaceOnNode(state.stepResults.nodeId, rc.env, state.userId, { - workspaceId, - repository: state.config.repository, - branch: state.config.branch, - callbackToken, - gitUserName: state.config.userName, - gitUserEmail: state.config.userEmail, - githubId: state.config.githubId, - lightweight: state.config.workspaceProfile === 'lightweight', - devcontainerConfigName: state.config.devcontainerConfigName ?? undefined, - devcontainerCache, - }); - - await rc.ctx.storage.put('state', state); + await createAndProvisionWorkspace(state, rc); } // Transition task: queued → delegated (optimistic locking) @@ -204,6 +71,197 @@ export async function handleWorkspaceCreation( await rc.advanceToStep(state, 'workspace_ready'); } +async function recoverWorkspaceFromD1( + state: TaskRunnerState, + rc: TaskRunnerContext, +): Promise { + // If workspace already created (retry or crash recovery), skip creation. + // Check both DO state AND D1 to handle the crash window between D1 insert + // and storage.put — if D1 has a workspace_id for this task but the DO + // state doesn't, recover it. + if (state.stepResults.workspaceId) { + return; + } + + const existingTask = await rc.env.DATABASE.prepare( + `SELECT workspace_id, status FROM tasks WHERE id = ?` + ).bind(state.taskId).first<{ workspace_id: string | null; status: string }>(); + + if (!existingTask?.workspace_id) { + return; + } + + // D1 has a workspace — recover it into DO state (crash recovery) + state.stepResults.workspaceId = existingTask.workspace_id; + await rc.ctx.storage.put('state', state); + + log.info('task_runner_do.workspace_recovered_from_d1', { + taskId: state.taskId, + workspaceId: existingTask.workspace_id, + }); +} + +async function isTaskDelegated( + state: TaskRunnerState, + rc: TaskRunnerContext, +): Promise { + const task = await rc.env.DATABASE.prepare( + `SELECT status FROM tasks WHERE id = ?` + ).bind(state.taskId).first<{ status: string }>(); + + return task?.status === 'delegated'; +} + +async function createAndProvisionWorkspace( + state: TaskRunnerState, + rc: TaskRunnerContext, +): Promise { + const { ulid } = await import('../../lib/ulid'); + const { resolveUniqueWorkspaceDisplayName } = await import('../../services/workspace-names'); + const { drizzle } = await import('drizzle-orm/d1'); + const schema = await import('../../db/schema'); + + const db = drizzle(rc.env.DATABASE, { schema }); + const nodeId = state.stepResults.nodeId; + if (!nodeId) { + throw new Error('No nodeId in state — cannot create workspace'); + } + const workspaceId = ulid(); + const workspaceName = `Task: ${state.config.taskTitle.slice(0, 50)}`; + const uniqueName = await resolveUniqueWorkspaceDisplayName( + db, + nodeId, + workspaceName + ); + const now = new Date().toISOString(); + + await db.insert(schema.workspaces).values({ + id: workspaceId, + nodeId, + projectId: state.projectId, + userId: state.userId, + installationId: state.config.installationId, + name: workspaceName, + displayName: uniqueName.displayName, + normalizedDisplayName: uniqueName.normalizedDisplayName, + repository: state.config.repository, + branch: state.config.branch, + status: 'creating', + vmSize: state.config.vmSize, + vmLocation: state.config.vmLocation, + workspaceProfile: state.config.workspaceProfile ?? DEFAULT_WORKSPACE_PROFILE, + devcontainerConfigName: state.config.devcontainerConfigName ?? null, + createdAt: now, + updatedAt: now, + }); + + await rc.env.DATABASE.prepare( + `UPDATE tasks SET workspace_id = ?, updated_at = ? WHERE id = ?` + ).bind(workspaceId, now, state.taskId).run(); + + state.stepResults.workspaceId = workspaceId; + await rc.ctx.storage.put('state', state); + await startComputeTrackingBestEffort(state, rc, db, workspaceId, nodeId); + await ensureSessionLinked(state, workspaceId, rc); + await setOutputBranch(state, rc, now); + await createWorkspaceOnVmAgent(state, rc, workspaceId, nodeId); + await rc.ctx.storage.put('state', state); +} + +async function startComputeTrackingBestEffort( + state: TaskRunnerState, + rc: TaskRunnerContext, + db: unknown, + workspaceId: string, + nodeId: string, +): Promise { + try { + const { startComputeTracking } = await import('../../services/compute-usage'); + const nodeRow = await rc.env.DATABASE.prepare( + `SELECT cloud_provider, credential_source FROM nodes WHERE id = ?` + ).bind(nodeId).first<{ + cloud_provider: string | null; + credential_source: string | null; + }>(); + + await startComputeTracking(db as Parameters[0], { + userId: state.userId, + workspaceId, + nodeId, + vmSize: state.config.vmSize, + cloudProvider: nodeRow?.cloud_provider, + credentialSource: (nodeRow?.credential_source as 'user' | 'platform') ?? 'user', + }); + } catch (err) { + log.error('task_runner_do.compute_tracking_start_failed', { + taskId: state.taskId, + workspaceId, + error: err instanceof Error ? err.message : String(err), + }); + } +} + +async function setOutputBranch( + state: TaskRunnerState, + rc: TaskRunnerContext, + now: string, +): Promise { + const outputBranch = state.config.outputBranch || `task/${state.taskId}`; + await rc.env.DATABASE.prepare( + `UPDATE tasks SET output_branch = ?, updated_at = ? WHERE id = ?` + ).bind(outputBranch, now, state.taskId).run(); +} + +async function createWorkspaceOnVmAgent( + state: TaskRunnerState, + rc: TaskRunnerContext, + workspaceId: string, + nodeId: string, +): Promise { + const { signCallbackToken } = await import('../../services/jwt'); + const { createWorkspaceOnNode } = await import('../../services/node-agent'); + const callbackToken = await signCallbackToken(workspaceId, rc.env); + + await createWorkspaceOnNode(nodeId, rc.env, state.userId, { + workspaceId, + repository: state.config.repository, + branch: state.config.branch, + callbackToken, + gitUserName: state.config.userName, + gitUserEmail: state.config.userEmail, + githubId: state.config.githubId, + lightweight: state.config.workspaceProfile === 'lightweight', + devcontainerConfigName: state.config.devcontainerConfigName ?? undefined, + devcontainerCache: await getDevcontainerCacheForWorkspace(state, rc, workspaceId), + }); +} + +async function getDevcontainerCacheForWorkspace( + state: TaskRunnerState, + rc: TaskRunnerContext, + workspaceId: string, +): Promise { + if (state.config.workspaceProfile === 'lightweight') { + return null; + } + + try { + const { getDevcontainerCacheCredentials } = await import('../../services/devcontainer-cache'); + return await getDevcontainerCacheCredentials( + rc.env, + state.config.repository, + state.config.devcontainerConfigName + ); + } catch (err) { + log.warn('task_runner_do.devcontainer_cache_credentials_failed', { + taskId: state.taskId, + workspaceId, + error: err instanceof Error ? err.message : String(err), + }); + return null; + } +} + export async function handleWorkspaceReady( state: TaskRunnerState, rc: TaskRunnerContext, From 5f0a9e36ef7fd8681b5ee73a080beee676b23572 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Titsworth-Morin?= Date: Mon, 11 May 2026 15:02:47 +0000 Subject: [PATCH 13/13] chore: clear devcontainer cache sonar hotspots --- .../devcontainer-cache-experiments.yml | 20 +++--- apps/api/src/services/devcontainer-cache.ts | 70 +++++++++++++++++-- 2 files changed, 76 insertions(+), 14 deletions(-) diff --git a/.github/workflows/devcontainer-cache-experiments.yml b/.github/workflows/devcontainer-cache-experiments.yml index ebdee5f27..bbcd0bb59 100644 --- a/.github/workflows/devcontainer-cache-experiments.yml +++ b/.github/workflows/devcontainer-cache-experiments.yml @@ -42,13 +42,13 @@ jobs: CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CF_ACCOUNT_ID }} IMAGE_NAME: sam-devcontainer-cache-exp steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd - - uses: pnpm/action-setup@v4 + - uses: pnpm/action-setup@f40ffcd9367d9f12939873eb1018b921a783ffaa with: version: 9.15.9 - - uses: actions/setup-node@v6 + - uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e with: node-version: 22 cache: pnpm @@ -108,13 +108,13 @@ jobs: AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} AWS_DEFAULT_REGION: auto steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd - - uses: pnpm/action-setup@v4 + - uses: pnpm/action-setup@f40ffcd9367d9f12939873eb1018b921a783ffaa with: version: 9.15.9 - - uses: actions/setup-node@v6 + - uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e with: node-version: 22 cache: pnpm @@ -155,7 +155,7 @@ jobs: echo "- Key: \`$KEY\`" >> "$GITHUB_STEP_SUMMARY" echo "- Result: upload, download, load, run succeeded" >> "$GITHUB_STEP_SUMMARY" - - uses: docker/setup-buildx-action@v3 + - uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f with: driver: docker-container buildkitd-flags: --debug @@ -202,13 +202,13 @@ jobs: CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CF_ACCOUNT_ID }} IMAGE_NAME: sam-devcontainer-cache-stress steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd - - uses: pnpm/action-setup@v4 + - uses: pnpm/action-setup@f40ffcd9367d9f12939873eb1018b921a783ffaa with: version: 9.15.9 - - uses: actions/setup-node@v6 + - uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e with: node-version: 22 cache: pnpm diff --git a/apps/api/src/services/devcontainer-cache.ts b/apps/api/src/services/devcontainer-cache.ts index 73ca7529e..233abac10 100644 --- a/apps/api/src/services/devcontainer-cache.ts +++ b/apps/api/src/services/devcontainer-cache.ts @@ -73,7 +73,7 @@ function parseGitHubRepo(repository: string): { owner: string; repo: string } | return null; } if (value.startsWith('git@github.com:')) { - return splitOwnerRepo(value.slice('git@github.com:'.length).replace(/\.git$/, '')); + return splitOwnerRepo(stripGitSuffix(value.slice('git@github.com:'.length))); } if (value.includes('://')) { let url: URL; @@ -85,7 +85,7 @@ function parseGitHubRepo(repository: string): { owner: string; repo: string } | if (url.hostname !== 'github.com') { return null; } - return splitOwnerRepo(decodeURIComponent(url.pathname).replace(/^\/+|\/+$/g, '').replace(/\.git$/, '')); + return splitOwnerRepo(stripGitSuffix(trimSlashes(decodeURIComponent(url.pathname)))); } return splitOwnerRepo(value); } @@ -99,11 +99,73 @@ function splitOwnerRepo(path: string): { owner: string; repo: string } | null { } function sanitizeRepositoryComponent(value: string): string { - return value.toLowerCase().replace(/[^a-z0-9._-]+/g, '-').replace(/^-+|-+$/g, ''); + return trimHyphens(replaceInvalidRepositoryChars(value.toLowerCase())); } function sanitizeRepositoryPrefix(value: string): string { - return value.toLowerCase().replace(/[^a-z0-9._-]+/g, '-').replace(/^-+/g, ''); + return trimLeadingHyphens(replaceInvalidRepositoryChars(value.toLowerCase())); +} + +function stripGitSuffix(value: string): string { + return value.endsWith('.git') ? value.slice(0, -4) : value; +} + +function trimSlashes(value: string): string { + let start = 0; + let end = value.length; + while (start < end && value.charCodeAt(start) === 47) { + start += 1; + } + while (end > start && value.charCodeAt(end - 1) === 47) { + end -= 1; + } + return value.slice(start, end); +} + +function trimHyphens(value: string): string { + return trimTrailingHyphens(trimLeadingHyphens(value)); +} + +function trimLeadingHyphens(value: string): string { + let start = 0; + while (start < value.length && value.charCodeAt(start) === 45) { + start += 1; + } + return value.slice(start); +} + +function trimTrailingHyphens(value: string): string { + let end = value.length; + while (end > 0 && value.charCodeAt(end - 1) === 45) { + end -= 1; + } + return value.slice(0, end); +} + +function replaceInvalidRepositoryChars(value: string): string { + let result = ''; + let lastWasHyphen = false; + for (const char of value) { + if (isRepositoryChar(char)) { + result += char; + lastWasHyphen = false; + } else if (!lastWasHyphen) { + result += '-'; + lastWasHyphen = true; + } + } + return result; +} + +function isRepositoryChar(char: string): boolean { + const code = char.charCodeAt(0); + return ( + (code >= 97 && code <= 122) || + (code >= 48 && code <= 57) || + code === 46 || + code === 95 || + code === 45 + ); } function cacheTag(devcontainerConfigName?: string | null): string {