From 11b25282e869f074abf02e187bbdb98dfbbe5c4b Mon Sep 17 00:00:00 2001 From: Guzman Alvarez Date: Thu, 7 May 2026 14:27:39 +0200 Subject: [PATCH 1/3] fix: create parent directory before moving workflow clone When loading a custom workflow without a subpath, the /workspace/workflows/ parent directory was never created before the mv/shutil.move call. This caused the move to fail silently (hydrate.sh has set +e, workflow.py catches exceptions), and the finally block then deleted the successfully cloned temp directory. The runner later created an empty directory as the CWD, making it appear the workflow loaded but was empty. The subpath code paths already had the mkdir -p / parent.mkdir() calls; this adds them to the non-subpath paths in both hydrate.sh and workflow.py. Co-Authored-By: Claude Opus 4.6 --- .../runners/ambient-runner/ambient_runner/endpoints/workflow.py | 2 ++ components/runners/state-sync/hydrate.sh | 1 + 2 files changed, 3 insertions(+) mode change 100644 => 100755 components/runners/state-sync/hydrate.sh diff --git a/components/runners/ambient-runner/ambient_runner/endpoints/workflow.py b/components/runners/ambient-runner/ambient_runner/endpoints/workflow.py index 133807fa0..af70f5836 100644 --- a/components/runners/ambient-runner/ambient_runner/endpoints/workflow.py +++ b/components/runners/ambient-runner/ambient_runner/endpoints/workflow.py @@ -149,10 +149,12 @@ async def clone_workflow_at_runtime( logger.warning(f"Subpath '{subpath}' not found, using entire repo") if workflow_final.exists(): shutil.rmtree(workflow_final) + workflow_final.parent.mkdir(parents=True, exist_ok=True) shutil.move(str(temp_dir), str(workflow_final)) else: if workflow_final.exists(): shutil.rmtree(workflow_final) + workflow_final.parent.mkdir(parents=True, exist_ok=True) shutil.move(str(temp_dir), str(workflow_final)) logger.info(f"Workflow '{workflow_name}' ready at {workflow_final}") diff --git a/components/runners/state-sync/hydrate.sh b/components/runners/state-sync/hydrate.sh old mode 100644 new mode 100755 index 12bf33334..cb955ae93 --- a/components/runners/state-sync/hydrate.sh +++ b/components/runners/state-sync/hydrate.sh @@ -345,6 +345,7 @@ if [ -n "$ACTIVE_WORKFLOW_GIT_URL" ] && [ "$ACTIVE_WORKFLOW_GIT_URL" != "null" ] fi else # No subpath - use entire repo + mkdir -p "$(dirname "$WORKFLOW_FINAL")" mv "$WORKFLOW_TEMP" "$WORKFLOW_FINAL" echo " ✓ Workflow ready at /workspace/workflows/${WORKFLOW_NAME}" fi From 481bd5317db8529d2c4d5d73d4da10d1b6557dbd Mon Sep 17 00:00:00 2001 From: Guzman Alvarez Date: Thu, 7 May 2026 14:45:39 +0200 Subject: [PATCH 2/3] fix: also add mkdir -p in subpath-fallback branch of hydrate.sh The subpath-fallback branch (when a subpath is specified but not found in the cloned repo) also lacked mkdir -p before mv, same root cause. Co-Authored-By: Claude Opus 4.6 --- components/runners/state-sync/hydrate.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/components/runners/state-sync/hydrate.sh b/components/runners/state-sync/hydrate.sh index cb955ae93..1e128e1df 100755 --- a/components/runners/state-sync/hydrate.sh +++ b/components/runners/state-sync/hydrate.sh @@ -340,6 +340,7 @@ if [ -n "$ACTIVE_WORKFLOW_GIT_URL" ] && [ "$ACTIVE_WORKFLOW_GIT_URL" != "null" ] echo " Available paths in repo:" find "$WORKFLOW_TEMP" -maxdepth 3 -type d | head -10 echo " Using entire repo instead" + mkdir -p "$(dirname "$WORKFLOW_FINAL")" mv "$WORKFLOW_TEMP" "$WORKFLOW_FINAL" echo " ✓ Workflow ready at /workspace/workflows/${WORKFLOW_NAME}" fi From 66f6f7e2041c3f393eea9887475cf99d451b0e66 Mon Sep 17 00:00:00 2001 From: Guzman Alvarez Date: Thu, 7 May 2026 15:46:33 +0200 Subject: [PATCH 3/3] fix: decouple repo/workflow cloning from S3 availability in hydrate.sh The init container previously called exit 0 when S3 credentials were missing, which skipped all git cloning of repos and workflows. Repo and workflow cloning are independent of S3 state persistence and should always run. Replace the early exit with a conditional block around S3-specific operations (rclone setup, state download, repo state restore), while leaving git credential setup, repo cloning, and workflow cloning unconditional. Co-Authored-By: Claude Opus 4.6 --- components/runners/state-sync/hydrate.sh | 273 +++++++++++------------ 1 file changed, 135 insertions(+), 138 deletions(-) diff --git a/components/runners/state-sync/hydrate.sh b/components/runners/state-sync/hydrate.sh index 1e128e1df..ffeda84a2 100755 --- a/components/runners/state-sync/hydrate.sh +++ b/components/runners/state-sync/hydrate.sh @@ -91,70 +91,64 @@ chmod 777 /workspace/file-uploads 2>/dev/null || true # - Directory contains cloned git repos (no secrets), so world-writable is acceptable chmod 777 /workspace/repos 2>/dev/null || true -# Check if S3 is configured -if [ -z "${S3_ENDPOINT}" ] || [ -z "${S3_BUCKET}" ] || [ -z "${AWS_ACCESS_KEY_ID}" ] || [ -z "${AWS_SECRET_ACCESS_KEY}" ]; then - echo "S3 not configured - using ephemeral storage only (no state persistence)" - echo "=========================================" - exit 0 -fi - -# Setup rclone -echo "Setting up rclone..." -setup_rclone - -S3_PATH="s3:${S3_BUCKET}/${NAMESPACE}/${SESSION_NAME}" - -# Test S3 connection -echo "Testing S3 connection..." -if ! rclone --config /tmp/.config/rclone/rclone.conf lsd "s3:${S3_BUCKET}/" --max-depth 1 2>&1; then - error_exit "Failed to connect to S3 at ${S3_ENDPOINT}. Check endpoint and credentials." -fi -echo "S3 connection successful" - -# Check if session state exists in S3 -echo "Checking for existing session state in S3..." -if rclone --config /tmp/.config/rclone/rclone.conf lsf "${S3_PATH}/" 2>/dev/null | grep -q .; then - echo "Found existing session state, downloading from S3..." - - # Download framework state data to the framework data path - if rclone --config /tmp/.config/rclone/rclone.conf lsf "${S3_PATH}/${RUNNER_STATE_DIR}/" 2>/dev/null | grep -q .; then - echo " Downloading ${RUNNER_STATE_DIR}/..." - rclone --config /tmp/.config/rclone/rclone.conf copy "${S3_PATH}/${RUNNER_STATE_DIR}/" "${FRAMEWORK_DATA_PATH}/" \ - --copy-links \ - --transfers 8 \ - --fast-list \ - --progress 2>&1 || echo " Warning: failed to download ${RUNNER_STATE_DIR}" - else - echo " No data for ${RUNNER_STATE_DIR}/" +# Check if S3 is configured and hydrate state if so +if [ -n "${S3_ENDPOINT}" ] && [ -n "${S3_BUCKET}" ] && [ -n "${AWS_ACCESS_KEY_ID}" ] && [ -n "${AWS_SECRET_ACCESS_KEY}" ]; then + # Setup rclone + echo "Setting up rclone..." + setup_rclone + + S3_PATH="s3:${S3_BUCKET}/${NAMESPACE}/${SESSION_NAME}" + + # Test S3 connection + echo "Testing S3 connection..." + if ! rclone --config /tmp/.config/rclone/rclone.conf lsd "s3:${S3_BUCKET}/" --max-depth 1 2>&1; then + error_exit "Failed to connect to S3 at ${S3_ENDPOINT}. Check endpoint and credentials." fi + echo "S3 connection successful" - # Download other sync paths to /workspace - for path in "${SYNC_PATHS[@]}"; do - if rclone --config /tmp/.config/rclone/rclone.conf lsf "${S3_PATH}/${path}/" 2>/dev/null | grep -q .; then - echo " Downloading ${path}/..." - rclone --config /tmp/.config/rclone/rclone.conf copy "${S3_PATH}/${path}/" "/workspace/${path}/" \ + # Check if session state exists in S3 + echo "Checking for existing session state in S3..." + if rclone --config /tmp/.config/rclone/rclone.conf lsf "${S3_PATH}/" 2>/dev/null | grep -q .; then + echo "Found existing session state, downloading from S3..." + + # Download framework state data to the framework data path + if rclone --config /tmp/.config/rclone/rclone.conf lsf "${S3_PATH}/${RUNNER_STATE_DIR}/" 2>/dev/null | grep -q .; then + echo " Downloading ${RUNNER_STATE_DIR}/..." + rclone --config /tmp/.config/rclone/rclone.conf copy "${S3_PATH}/${RUNNER_STATE_DIR}/" "${FRAMEWORK_DATA_PATH}/" \ --copy-links \ --transfers 8 \ --fast-list \ - --progress 2>&1 || echo " Warning: failed to download ${path}" + --progress 2>&1 || echo " Warning: failed to download ${RUNNER_STATE_DIR}" else - echo " No data for ${path}/" + echo " No data for ${RUNNER_STATE_DIR}/" fi - done - echo "State hydration complete!" + # Download other sync paths to /workspace + for path in "${SYNC_PATHS[@]}"; do + if rclone --config /tmp/.config/rclone/rclone.conf lsf "${S3_PATH}/${path}/" 2>/dev/null | grep -q .; then + echo " Downloading ${path}/..." + rclone --config /tmp/.config/rclone/rclone.conf copy "${S3_PATH}/${path}/" "/workspace/${path}/" \ + --copy-links \ + --transfers 8 \ + --fast-list \ + --progress 2>&1 || echo " Warning: failed to download ${path}" + else + echo " No data for ${path}/" + fi + done + + echo "State hydration complete!" + else + echo "No existing state found, starting fresh session" + fi else - echo "No existing state found, starting fresh session" + echo "S3 not configured - skipping state hydration (no persistence)" fi -# Set ownership and permissions on subdirectories after S3 download +# Set ownership and permissions on subdirectories echo "Setting ownership and permissions on subdirectories..." -# Try chown first (works on standard K8s), fall back to 777 if blocked by SELinux/SCC chown -R 1001:0 "${FRAMEWORK_DATA_PATH}" /workspace/artifacts /workspace/file-uploads /workspace/repos 2>/dev/null || true -# Framework data dir needs 777 for SDK internals chmod -R 777 "${FRAMEWORK_DATA_PATH}" 2>/dev/null || true -# repos also needs write access for runtime repo additions (clone_repo_at_runtime) -# See security rationale above for why 777 is used chmod -R 755 /workspace/artifacts 2>/dev/null || true chmod -R 777 /workspace/file-uploads 2>/dev/null || true chmod -R 777 /workspace/repos 2>/dev/null || true @@ -359,114 +353,117 @@ fi # ======================================== # Restore git repo state from S3 backup # ======================================== -echo "=========================================" -echo "Checking for git repo state backup..." -echo "=========================================" +# Restore git repo state from S3 backup (only when S3 is available) +if [ -n "${S3_PATH:-}" ]; then + echo "=========================================" + echo "Checking for git repo state backup..." + echo "=========================================" -S3_REPO_STATE="${S3_PATH}/repo-state/" + S3_REPO_STATE="${S3_PATH}/repo-state/" -if rclone --config /tmp/.config/rclone/rclone.conf lsf "${S3_REPO_STATE}" 2>/dev/null | grep -q .; then - echo "Found git repo state backup, restoring..." + if rclone --config /tmp/.config/rclone/rclone.conf lsf "${S3_REPO_STATE}" 2>/dev/null | grep -q .; then + echo "Found git repo state backup, restoring..." - REPO_STATE_DIR="/tmp/repo-state" - rm -rf "${REPO_STATE_DIR}" - mkdir -p "${REPO_STATE_DIR}" + REPO_STATE_DIR="/tmp/repo-state" + rm -rf "${REPO_STATE_DIR}" + mkdir -p "${REPO_STATE_DIR}" - # Download repo state from S3 - rclone --config /tmp/.config/rclone/rclone.conf copy "${S3_REPO_STATE}" "${REPO_STATE_DIR}/" \ - --transfers 8 \ - --fast-list \ - --progress 2>&1 || echo " Warning: failed to download repo state" + # Download repo state from S3 + rclone --config /tmp/.config/rclone/rclone.conf copy "${S3_REPO_STATE}" "${REPO_STATE_DIR}/" \ + --transfers 8 \ + --fast-list \ + --progress 2>&1 || echo " Warning: failed to download repo state" - for repo_state_dir in "${REPO_STATE_DIR}"/*/; do - [ -d "${repo_state_dir}" ] || continue + for repo_state_dir in "${REPO_STATE_DIR}"/*/; do + [ -d "${repo_state_dir}" ] || continue - REPO_NAME=$(basename "${repo_state_dir}") - REPO_DIR="/workspace/repos/${REPO_NAME}" - METADATA_FILE="${repo_state_dir}/metadata.json" + REPO_NAME=$(basename "${repo_state_dir}") + REPO_DIR="/workspace/repos/${REPO_NAME}" + METADATA_FILE="${repo_state_dir}/metadata.json" - echo " Restoring git state for ${REPO_NAME}..." + echo " Restoring git state for ${REPO_NAME}..." - if [ ! -f "${METADATA_FILE}" ]; then - echo " WARNING: No metadata.json for ${REPO_NAME}, skipping" - continue - fi + if [ ! -f "${METADATA_FILE}" ]; then + echo " WARNING: No metadata.json for ${REPO_NAME}, skipping" + continue + fi - # Read metadata - REMOTE_URL=$(jq -r '.remoteUrl // empty' "${METADATA_FILE}" 2>/dev/null || echo "") - SAVED_BRANCH=$(jq -r '.currentBranch // "main"' "${METADATA_FILE}" 2>/dev/null || echo "main") - SAVED_HEAD=$(jq -r '.headSha // empty' "${METADATA_FILE}" 2>/dev/null || echo "") + # Read metadata + REMOTE_URL=$(jq -r '.remoteUrl // empty' "${METADATA_FILE}" 2>/dev/null || echo "") + SAVED_BRANCH=$(jq -r '.currentBranch // "main"' "${METADATA_FILE}" 2>/dev/null || echo "main") + SAVED_HEAD=$(jq -r '.headSha // empty' "${METADATA_FILE}" 2>/dev/null || echo "") + + # If repo was not already cloned (e.g., runtime-added repo), clone it + if [ ! -d "${REPO_DIR}" ] && [ -n "${REMOTE_URL}" ]; then + # Redact credentials from URL for logging + SAFE_URL=$(echo "${REMOTE_URL}" | sed 's|://[^@]*@|://|') + echo " Cloning missing repo ${REPO_NAME} from ${SAFE_URL}..." + git config --global --add safe.directory "${REPO_DIR}" 2>/dev/null || true + git clone "${REMOTE_URL}" "${REPO_DIR}" 2>&1 || { + echo " WARNING: Failed to clone ${REPO_NAME}, skipping restore" + continue + } + fi - # If repo was not already cloned (e.g., runtime-added repo), clone it - if [ ! -d "${REPO_DIR}" ] && [ -n "${REMOTE_URL}" ]; then - # Redact credentials from URL for logging - SAFE_URL=$(echo "${REMOTE_URL}" | sed 's|://[^@]*@|://|') - echo " Cloning missing repo ${REPO_NAME} from ${SAFE_URL}..." - git config --global --add safe.directory "${REPO_DIR}" 2>/dev/null || true - git clone "${REMOTE_URL}" "${REPO_DIR}" 2>&1 || { - echo " WARNING: Failed to clone ${REPO_NAME}, skipping restore" + if [ ! -d "${REPO_DIR}/.git" ] && [ ! -f "${REPO_DIR}/.git" ]; then + echo " WARNING: ${REPO_DIR} is not a git repo, skipping restore" continue - } - fi - - if [ ! -d "${REPO_DIR}/.git" ] && [ ! -f "${REPO_DIR}/.git" ]; then - echo " WARNING: ${REPO_DIR} is not a git repo, skipping restore" - continue - fi + fi - git config --global --add safe.directory "${REPO_DIR}" 2>/dev/null || true + git config --global --add safe.directory "${REPO_DIR}" 2>/dev/null || true - # Import local branches from bundle using fetch (creates local branch refs) - if [ -f "${repo_state_dir}/repo.bundle" ]; then - echo " Fetching refs from bundle for ${REPO_NAME}..." - # Detach HEAD so fetch can update all branches (including the checked-out one) - git -C "${REPO_DIR}" checkout --detach 2>/dev/null || true - git -C "${REPO_DIR}" fetch "${repo_state_dir}/repo.bundle" "+refs/heads/*:refs/heads/*" 2>&1 || { - echo " WARNING: Failed to fetch refs from bundle for ${REPO_NAME}" - } - fi + # Import local branches from bundle using fetch (creates local branch refs) + if [ -f "${repo_state_dir}/repo.bundle" ]; then + echo " Fetching refs from bundle for ${REPO_NAME}..." + # Detach HEAD so fetch can update all branches (including the checked-out one) + git -C "${REPO_DIR}" checkout --detach 2>/dev/null || true + git -C "${REPO_DIR}" fetch "${repo_state_dir}/repo.bundle" "+refs/heads/*:refs/heads/*" 2>&1 || { + echo " WARNING: Failed to fetch refs from bundle for ${REPO_NAME}" + } + fi - # Fetch all remotes to ensure refs are up to date - git -C "${REPO_DIR}" fetch --all 2>/dev/null || true + # Fetch all remotes to ensure refs are up to date + git -C "${REPO_DIR}" fetch --all 2>/dev/null || true - # Checkout the saved branch - if [ "${SAVED_BRANCH}" != "unknown" ] && [ -n "${SAVED_BRANCH}" ]; then - echo " Checking out branch: ${SAVED_BRANCH}" - git -C "${REPO_DIR}" checkout "${SAVED_BRANCH}" 2>&1 || { - echo " WARNING: Failed to checkout ${SAVED_BRANCH}, staying on current branch" - } - fi + # Checkout the saved branch + if [ "${SAVED_BRANCH}" != "unknown" ] && [ -n "${SAVED_BRANCH}" ]; then + echo " Checking out branch: ${SAVED_BRANCH}" + git -C "${REPO_DIR}" checkout "${SAVED_BRANCH}" 2>&1 || { + echo " WARNING: Failed to checkout ${SAVED_BRANCH}, staying on current branch" + } + fi - # Apply uncommitted changes (best-effort) - if [ -f "${repo_state_dir}/uncommitted.patch" ] && [ -s "${repo_state_dir}/uncommitted.patch" ]; then - echo " Applying uncommitted changes..." - git -C "${REPO_DIR}" apply --allow-empty "${repo_state_dir}/uncommitted.patch" 2>&1 || { - echo " WARNING: Failed to apply uncommitted changes for ${REPO_NAME} (conflicts likely)" - } - fi + # Apply uncommitted changes (best-effort) + if [ -f "${repo_state_dir}/uncommitted.patch" ] && [ -s "${repo_state_dir}/uncommitted.patch" ]; then + echo " Applying uncommitted changes..." + git -C "${REPO_DIR}" apply --allow-empty "${repo_state_dir}/uncommitted.patch" 2>&1 || { + echo " WARNING: Failed to apply uncommitted changes for ${REPO_NAME} (conflicts likely)" + } + fi - # Apply staged changes (best-effort) - if [ -f "${repo_state_dir}/staged.patch" ] && [ -s "${repo_state_dir}/staged.patch" ]; then - echo " Applying staged changes..." - git -C "${REPO_DIR}" apply --cached --allow-empty "${repo_state_dir}/staged.patch" 2>&1 || { - echo " WARNING: Failed to apply staged changes for ${REPO_NAME} (conflicts likely)" - } - fi + # Apply staged changes (best-effort) + if [ -f "${repo_state_dir}/staged.patch" ] && [ -s "${repo_state_dir}/staged.patch" ]; then + echo " Applying staged changes..." + git -C "${REPO_DIR}" apply --cached --allow-empty "${repo_state_dir}/staged.patch" 2>&1 || { + echo " WARNING: Failed to apply staged changes for ${REPO_NAME} (conflicts likely)" + } + fi - # Verify HEAD SHA matches expectation - CURRENT_HEAD=$(git -C "${REPO_DIR}" rev-parse HEAD 2>/dev/null || echo "") - if [ -n "${SAVED_HEAD}" ] && [ -n "${CURRENT_HEAD}" ] && [ "${SAVED_HEAD}" != "${CURRENT_HEAD}" ]; then - echo " WARNING: HEAD diverged for ${REPO_NAME}: expected ${SAVED_HEAD:0:8}, got ${CURRENT_HEAD:0:8}" - fi + # Verify HEAD SHA matches expectation + CURRENT_HEAD=$(git -C "${REPO_DIR}" rev-parse HEAD 2>/dev/null || echo "") + if [ -n "${SAVED_HEAD}" ] && [ -n "${CURRENT_HEAD}" ] && [ "${SAVED_HEAD}" != "${CURRENT_HEAD}" ]; then + echo " WARNING: HEAD diverged for ${REPO_NAME}: expected ${SAVED_HEAD:0:8}, got ${CURRENT_HEAD:0:8}" + fi - echo " Restored ${REPO_NAME} (branch: ${SAVED_BRANCH})" - done + echo " Restored ${REPO_NAME} (branch: ${SAVED_BRANCH})" + done - # Clean up - rm -rf "${REPO_STATE_DIR}" - echo "Git repo state restore complete" -else - echo "No git repo state backup found" + # Clean up + rm -rf "${REPO_STATE_DIR}" + echo "Git repo state restore complete" + else + echo "No git repo state backup found" + fi fi # Set permissions on repos after restore (repos may have been cloned or restored)