diff --git a/.buildkite/commands/build-wda.sh b/.buildkite/commands/build-wda.sh new file mode 100755 index 000000000000..5b079b0c6c30 --- /dev/null +++ b/.buildkite/commands/build-wda.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +# Clone and build WebDriverAgent for iOS Simulator testing. +# +# Skips the build only when a usable build-for-testing artifact already exists. +# +# Required (one of): +# SIMULATOR_UDID Simulator UDID for the build destination +# SIMULATOR_NAME Simulator name for the build destination (e.g., iPhone 16) +# +# Optional: +# WEBDRIVERAGENT_REPO_URL Repo URL (default: appium/WebDriverAgent) +# WEBDRIVERAGENT_REF Git ref or commit to build (default: current remote HEAD / existing checkout) + +set -euo pipefail + +if [[ -z "${SIMULATOR_UDID:-}" && -z "${SIMULATOR_NAME:-}" ]]; then + echo "Error: set SIMULATOR_UDID or SIMULATOR_NAME" >&2 + exit 1 +fi + +WDA_DIR=".build/WebDriverAgent" +WDA_PROJECT="${WDA_DIR}/WebDriverAgent.xcodeproj" +WDA_DERIVED_DATA="${WDA_DIR}/DerivedData" +WEBDRIVERAGENT_REPO_URL="${WEBDRIVERAGENT_REPO_URL:-https://github.com/appium/WebDriverAgent.git}" +WEBDRIVERAGENT_REF="${WEBDRIVERAGENT_REF:-}" + +if [[ -n "${SIMULATOR_UDID:-}" ]]; then + DESTINATION="platform=iOS Simulator,id=${SIMULATOR_UDID}" +else + DESTINATION="platform=iOS Simulator,name=${SIMULATOR_NAME}" +fi + +ensure_wda_checkout() { + mkdir -p .build + + if [[ ! -d "${WDA_DIR}/.git" ]]; then + git clone --depth 1 "${WEBDRIVERAGENT_REPO_URL}" "${WDA_DIR}" + fi + + if [[ -n "${WEBDRIVERAGENT_REF}" ]]; then + git -C "${WDA_DIR}" fetch --depth 1 origin "${WEBDRIVERAGENT_REF}" + git -C "${WDA_DIR}" checkout --detach "${WEBDRIVERAGENT_REF}" + fi +} + +has_built_artifacts() { + [[ -d "${WDA_DERIVED_DATA}/Build/Products" ]] && \ + find "${WDA_DERIVED_DATA}/Build/Products" -name '*.xctestrun' -print -quit | grep -q . +} + +ensure_wda_checkout + +if [[ -d "$WDA_PROJECT" ]] && has_built_artifacts; then + echo "WebDriverAgent already built, skipping." + exit 0 +fi + +xcodebuild build-for-testing \ + -project "$WDA_PROJECT" \ + -scheme WebDriverAgentRunner \ + -destination "$DESTINATION" \ + -derivedDataPath "$WDA_DERIVED_DATA" \ + CODE_SIGNING_ALLOWED=NO \ + | tail -1 + +if ! has_built_artifacts; then + echo "Error: WebDriverAgent build completed without an .xctestrun artifact" >&2 + exit 1 +fi diff --git a/.buildkite/commands/run-ai-e2e-tests.sh b/.buildkite/commands/run-ai-e2e-tests.sh new file mode 100755 index 000000000000..95da3553e40f --- /dev/null +++ b/.buildkite/commands/run-ai-e2e-tests.sh @@ -0,0 +1,398 @@ +#!/usr/bin/env bash +# Run AI-driven E2E tests on an iOS Simulator using Claude Code with a +# tightly scoped set of wrapper scripts and runner-side result enforcement. +# +# This script manages the full lifecycle: +# 1. Check for "Testing" label on PR (Buildkite only, skips if missing) +# 2. Download build artifacts and install app (Buildkite only) +# 3. Install Claude Code (if needed) +# 4. Resolve a specific simulator UDID +# 5. Start WebDriverAgent +# 6. Run each markdown test file separately with locked-down wrappers +# 7. Enforce verification / cleanup / final-result contracts per test +# 8. Stop WebDriverAgent and print results +# +# Required environment variables: +# ANTHROPIC_API_KEY Claude API key +# SITE_URL WordPress test site URL +# WP_USERNAME WordPress username +# WP_APP_PASSWORD WordPress application password +# +# Optional environment variables: +# APP wordpress | jetpack (default: jetpack) +# SIMULATOR_NAME Simulator to boot if none running (default: iPhone 16) +# WDA_PORT WebDriverAgent port (default: 8100) +# CLAUDE_MAX_TURNS Max Claude Code tool-use turns (default: 100) +# TEST_DIR Test directory (default: Tests/AgentTests/ui-tests) +# CLAUDE_MODEL Model to use (default: claude-sonnet-4-6) +# CLAUDE_CODE_EXPECTED_VERSION Claude Code version to install (default: 2.1.84) +# CLAUDE_CODE_NPM_SPEC npm package spec for Claude Code + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +cd "$REPO_ROOT" +WDA_STARTED=0 + +normalize_site_url() { + local site_url="$1" + if [[ "$site_url" == http://* || "$site_url" == https://* ]]; then + printf '%s' "$site_url" + else + printf 'https://%s' "$site_url" + fi +} + +cleanup_wda() { + stop_progress_tail + if [[ "$WDA_STARTED" -eq 1 ]]; then + echo "--- Cleanup" + ruby "$WDA_STOP" --port "$WDA_PORT" 2>/dev/null || true + fi +} + +stop_progress_tail() { + if [[ -n "${AI_TEST_PROGRESS_TAIL_PID:-}" ]]; then + kill "$AI_TEST_PROGRESS_TAIL_PID" 2>/dev/null || true + wait "$AI_TEST_PROGRESS_TAIL_PID" 2>/dev/null || true + unset AI_TEST_PROGRESS_TAIL_PID + fi +} + +trap cleanup_wda EXIT + +# ── Label gate (Buildkite only) ──────────────────────────────────────── +if [[ -n "${BUILDKITE_PULL_REQUEST_LABELS:-}" ]]; then + echo "--- Checking for 'Testing' label" + + if ! echo ",${BUILDKITE_PULL_REQUEST_LABELS}," | grep -qF ",Testing,"; then + echo "PR does not have the 'Testing' label. Skipping." + echo "Add the label and re-run this step to trigger AI E2E tests." + exit 0 + fi + echo "'Testing' label found." +fi + +# ── Required env vars ────────────────────────────────────────────────── +: "${ANTHROPIC_API_KEY:?Set ANTHROPIC_API_KEY}" +: "${SITE_URL:?Set SITE_URL}" +: "${WP_USERNAME:?Set WP_USERNAME}" +: "${WP_APP_PASSWORD:?Set WP_APP_PASSWORD}" +export SITE_URL="$(normalize_site_url "$SITE_URL")" + +# ── Defaults ─────────────────────────────────────────────────────────── +APP="${APP:-jetpack}" +export SIMULATOR_NAME="${SIMULATOR_NAME:-iPhone 16}" +WDA_PORT="${WDA_PORT:-8100}" +CLAUDE_MAX_TURNS="${CLAUDE_MAX_TURNS:-100}" +TEST_DIR="${TEST_DIR:-Tests/AgentTests/ui-tests}" +CLAUDE_MODEL="${CLAUDE_MODEL:-claude-sonnet-4-6}" +CLAUDE_CODE_EXPECTED_VERSION="${CLAUDE_CODE_EXPECTED_VERSION:-2.1.84}" +CLAUDE_CODE_NPM_SPEC="${CLAUDE_CODE_NPM_SPEC:-@anthropic-ai/claude-code@${CLAUDE_CODE_EXPECTED_VERSION}}" +WDA_START_TIMEOUT="${WDA_START_TIMEOUT:-120}" + +case "$APP" in + wordpress) APP_BUNDLE_ID="org.wordpress" ;; + jetpack) APP_BUNDLE_ID="com.automattic.jetpack" ;; + *) echo "Error: APP must be 'wordpress' or 'jetpack', got '$APP'" >&2; exit 1 ;; +esac +export APP_BUNDLE_ID + +# ── Artifact download (Buildkite only) ───────────────────────────────── +if [[ -n "${BUILDKITE:-}" ]]; then + echo "--- Downloading Build Artifacts" + download_artifact "build-products-${APP}.tar" + tar -xf "build-products-${APP}.tar" + + echo "--- Setting up Gems" + install_gems +fi + +if [[ ! -d "$TEST_DIR" ]]; then + echo "Error: test directory not found: $TEST_DIR" >&2 + exit 1 +fi + +# ── Locate WDA scripts ───────────────────────────────────────────────── +WDA_START="$REPO_ROOT/.claude/skills/ios-sim-navigation/scripts/wda-start.rb" +WDA_STOP="$REPO_ROOT/.claude/skills/ios-sim-navigation/scripts/wda-stop.rb" + +if [[ ! -f "$WDA_START" ]]; then + echo "Error: WDA start script not found at $WDA_START" >&2 + exit 1 +fi + +write_result_file() { + local status="$1" + local reason="$2" + local screenshot_rel="${3:-}" + + ruby Scripts/ci/write-ai-test-result.rb \ + "$AI_TEST_RESULT_FILE" \ + "$AI_TEST_TITLE" \ + "$AI_TEST_FILE" \ + "$status" \ + "$reason" \ + "$screenshot_rel" +} + +result_field() { + local key="$1" + ruby Scripts/ci/read-ai-test-result.rb "$AI_TEST_RESULT_FILE" "$key" +} + +recorded_result_count() { + if [[ -f "$AI_TEST_RESULT_EVENTS_FILE" ]]; then + awk 'END { print NR + 0 }' "$AI_TEST_RESULT_EVENTS_FILE" + else + echo 0 + fi +} + +successful_rest_calls() { + local purpose="$1" + if [[ -f "$AI_TEST_USAGE_FILE" ]]; then + awk -F '\t' -v purpose="$purpose" '$1 == purpose && $3 == "1" { count += 1 } END { print count + 0 }' "$AI_TEST_USAGE_FILE" + else + echo 0 + fi +} + +join_reasons() { + local joined="" + local reason + for reason in "$@"; do + if [[ -n "$joined" ]]; then + joined="${joined}; ${reason}" + else + joined="$reason" + fi + done + printf '%s' "$joined" +} + +# ── Install Claude Code ──────────────────────────────────────────────── +if ! command -v claude &>/dev/null || ! claude --version 2>/dev/null | grep -Fq "$CLAUDE_CODE_EXPECTED_VERSION"; then + echo "--- Installing Claude Code (${CLAUDE_CODE_NPM_SPEC})" + if ! command -v npm &>/dev/null; then + echo "npm not found, installing Node.js via Homebrew..." + brew install node + fi + npm install -g "$CLAUDE_CODE_NPM_SPEC" +fi +echo "Claude Code: $(claude --version 2>/dev/null || echo 'unknown')" + +# CI permissions are defined explicitly here. Do not rely on +# .claude/settings.json for the Buildkite execution path. +CLAUDE_ALLOWED_TOOLS=( + --allowedTools "Bash(./Scripts/ci/launch-app.sh)" + --allowedTools "Bash(./Scripts/ci/wda-curl.sh *)" + --allowedTools "Bash(./Scripts/ci/tap-element.sh *)" + --allowedTools "Bash(./Scripts/ci/wp-api.sh *)" + --allowedTools "Bash(./Scripts/ci/take-ai-test-screenshot.sh *)" + --allowedTools "Bash(./Scripts/ci/record-ai-test-result.sh *)" + --allowedTools "Bash(sleep *)" +) + +# ── Resolve simulator ────────────────────────────────────────────────── +echo "--- Setting up Simulator" + +SIMULATOR_UDID="$(ruby Scripts/ci/find-booted-simulator.rb "$SIMULATOR_NAME" 2>/dev/null || true)" +if [[ -z "$SIMULATOR_UDID" ]]; then + echo "No booted simulator named '$SIMULATOR_NAME' found. Booting..." + xcrun simctl boot "$SIMULATOR_NAME" 2>/dev/null || true + SIMULATOR_UDID="$(ruby Scripts/ci/find-booted-simulator.rb "$SIMULATOR_NAME" 30 1 2>/dev/null || true)" +fi + +if [[ -z "$SIMULATOR_UDID" ]]; then + echo "Error: could not find a booted simulator named '$SIMULATOR_NAME'" >&2 + exit 1 +fi +export SIMULATOR_UDID +echo "Simulator UDID: $SIMULATOR_UDID" + +# ── Install app on simulator (Buildkite only) ───────────────────────── +if [[ -n "${BUILDKITE:-}" ]]; then + APP_DISPLAY_NAME="Jetpack" + [[ "$APP" = "wordpress" ]] && APP_DISPLAY_NAME="WordPress" + + APP_PATH="$(find DerivedData/Build/Products -name "${APP_DISPLAY_NAME}.app" -path "*Debug-iphonesimulator*" | head -1)" + if [[ -z "$APP_PATH" ]]; then + echo "Error: ${APP_DISPLAY_NAME}.app not found in build products" >&2 + exit 1 + fi + echo "Installing $APP_PATH on simulator..." + xcrun simctl install "$SIMULATOR_UDID" "$APP_PATH" +fi + +# ── Build and start WDA ──────────────────────────────────────────────── +echo "--- Building WebDriverAgent" +"$(dirname "$0")/build-wda.sh" + +echo "--- Starting WebDriverAgent" +ruby "$WDA_START" --udid "$SIMULATOR_UDID" --port "$WDA_PORT" --timeout "$WDA_START_TIMEOUT" +WDA_STARTED=1 + +RESULTS_DIR="Tests/AgentTests/results/$(date +%Y-%m-%d-%H%M)" +RESULTS_JSON_DIR="${RESULTS_DIR}/.results" +RESULT_EVENTS_DIR="${RESULTS_DIR}/.result-events" +USAGE_DIR="${RESULTS_DIR}/.rest-api-usage" +PROGRESS_DIR="${RESULTS_DIR}/.progress" +SCREENSHOTS_DIR="${RESULTS_DIR}/screenshots" +mkdir -p "$RESULTS_JSON_DIR" "$RESULT_EVENTS_DIR" "$USAGE_DIR" "$PROGRESS_DIR" "$SCREENSHOTS_DIR" + +TEST_FILES=() +while IFS= read -r test_file; do + TEST_FILES+=("$test_file") +done < <(find "$TEST_DIR" -maxdepth 1 -type f -name '*.md' | sort) + +if [[ ${#TEST_FILES[@]} -eq 0 ]]; then + cat > "${RESULTS_DIR}/results.md" <&2 + exit 1 +fi + +echo "--- Running AI E2E Tests" + +declare -a RESULT_FILES=() +OVERALL_EXIT=0 + +for index in "${!TEST_FILES[@]}"; do + AI_TEST_FILE="${TEST_FILES[$index]}" + AI_TEST_TITLE="$(ruby Scripts/ci/inspect-ai-test-case.rb "$AI_TEST_FILE" title)" + AI_TEST_SLUG="$(ruby Scripts/ci/inspect-ai-test-case.rb "$AI_TEST_FILE" slug)" + AI_TEST_RESULT_FILE="${RESULTS_JSON_DIR}/${AI_TEST_SLUG}.json" + AI_TEST_RESULT_EVENTS_FILE="${RESULT_EVENTS_DIR}/${AI_TEST_SLUG}.log" + AI_TEST_USAGE_FILE="${USAGE_DIR}/${AI_TEST_SLUG}.log" + AI_TEST_PROGRESS_FILE="${PROGRESS_DIR}/${AI_TEST_SLUG}.log" + AI_TEST_RESULTS_DIR="$RESULTS_DIR" + AI_TEST_SCREENSHOTS_DIR="$SCREENSHOTS_DIR" + export AI_TEST_FILE AI_TEST_TITLE AI_TEST_SLUG AI_TEST_RESULT_FILE AI_TEST_RESULT_EVENTS_FILE AI_TEST_USAGE_FILE AI_TEST_PROGRESS_FILE AI_TEST_RESULTS_DIR AI_TEST_SCREENSHOTS_DIR + + rm -f "$AI_TEST_RESULT_FILE" "$AI_TEST_RESULT_EVENTS_FILE" "$AI_TEST_USAGE_FILE" "$AI_TEST_PROGRESS_FILE" + : > "$AI_TEST_PROGRESS_FILE" + + VERIFICATION_EXPECTED="$(ruby Scripts/ci/inspect-ai-test-case.rb "$AI_TEST_FILE" section-present verification)" + CLEANUP_EXPECTED="$(ruby Scripts/ci/inspect-ai-test-case.rb "$AI_TEST_FILE" section-present cleanup)" + export WDA_SESSION_ID="" + WDA_SESSION_ID="$(ruby Scripts/ci/create-wda-session.rb "$WDA_PORT" 2>/dev/null || true)" + + echo + echo "============================================================" + echo "[$((index + 1))/${#TEST_FILES[@]}] ${AI_TEST_TITLE}" + echo "============================================================" + + if [[ -z "$WDA_SESSION_ID" ]]; then + write_result_file fail "Failed to create a WebDriverAgent session before test execution" + RESULT_FILES+=("$AI_TEST_RESULT_FILE") + OVERALL_EXIT=1 + continue + fi + export WDA_SESSION_ID + + TEST_CONTENT="$(cat "$AI_TEST_FILE")" + SKILL_CONTENT="$(cat .claude/skills/ci-test-runner/SKILL.md | tail -n +8)" + PROMPT="$(cat < +- App Bundle ID: - WDA Session ID: - Simulator UDID: - Test file: (absolute path) @@ -117,7 +117,7 @@ Use the ios-sim-navigation skill for WDA interaction reference. 2. **Relaunch the app** for a clean state: ```bash - xcrun simctl launch --terminate-running-process \ + xcrun simctl launch --terminate-running-process \ -ui-test-site-url \ -ui-test-site-user \ -ui-test-site-pass diff --git a/.claude/skills/ci-test-runner/SKILL.md b/.claude/skills/ci-test-runner/SKILL.md new file mode 100644 index 000000000000..c99916bb8e6c --- /dev/null +++ b/.claude/skills/ci-test-runner/SKILL.md @@ -0,0 +1,159 @@ +--- +name: ci-test-runner +description: >- + CI-hardened single-test runner for WordPress/Jetpack iOS. Use when the prompt + contains one test case and the available tools are the constrained Scripts/ci + wrappers from Buildkite. +--- + +# CI Test Runner + +Drive the app through one UI test case. Every response must contain tool calls. +Do not narrate plans — act. + +## Commands + +| Command | Purpose | +|---------|---------| +| `./Scripts/ci/launch-app.sh` | Relaunch app with test credentials | +| `./Scripts/ci/tap-element.sh IDENTIFIER_OR_LABEL` | Find element by accessibility ID or label and tap it (one call) | +| `./Scripts/ci/wda-curl.sh METHOD PATH [BODY]` | Raw WDA HTTP calls (for actions, typing, scrolling — see patterns below) | +| `./Scripts/ci/wp-api.sh PURPOSE METHOD PATH [BODY]` | REST API with purpose `setup`, `verification`, or `cleanup` | +| `./Scripts/ci/take-ai-test-screenshot.sh LABEL` | Screenshot — **only before recording a failure** (max 3 per test) | +| `./Scripts/ci/record-ai-test-result.sh STATUS REASON [SCREENSHOT]` | Record final result — call exactly once | +| `sleep N` | Wait N seconds | + +## WDA Patterns + +Session ID is in `$WDA_SESSION_ID`. + +### Fetch accessibility tree + +```bash +./Scripts/ci/wda-curl.sh GET '/source?format=description' +``` + +Returns a text tree. Each element has type, frame `{{x, y}, {width, height}}`, +optional identifier and label. The root node frame gives screen dimensions. + +### Tap element by ID or label (preferred — one call) + +```bash +./Scripts/ci/tap-element.sh 'Prologue Self Hosted Button' +``` + +Finds the element by accessibility ID first, then by label as fallback, and +taps it. Use this for all taps where you know the identifier or label. + +### Tap by coordinates (when no ID/label, or for precise positioning) + +Compute center from frame: `X = x + width/2`, `Y = y + height/2`, then: + +```bash +./Scripts/ci/wda-curl.sh POST "/session/${WDA_SESSION_ID}/actions" \ + '{"actions":[{"type":"pointer","id":"f1","parameters":{"pointerType":"touch"},"actions":[{"type":"pointerMove","duration":0,"x":X,"y":Y},{"type":"pointerDown"},{"type":"pointerUp"}]}]}' +``` + +### Type text + +Tap the field first to focus it, then: + +```bash +./Scripts/ci/wda-curl.sh POST "/session/${WDA_SESSION_ID}/wda/keys" \ + '{"value":["h","e","l","l","o"]}' +``` + +### Clear text field + +Select all then delete: + +```bash +./Scripts/ci/wda-curl.sh POST "/session/${WDA_SESSION_ID}/wda/keys" '{"value":["\u0001"]}' +./Scripts/ci/wda-curl.sh POST "/session/${WDA_SESSION_ID}/wda/keys" '{"value":["\u007F"]}' +``` + +### Swipe / scroll + +```bash +./Scripts/ci/wda-curl.sh POST "/session/${WDA_SESSION_ID}/actions" \ + '{"actions":[{"type":"pointer","id":"f1","parameters":{"pointerType":"touch"},"actions":[{"type":"pointerMove","duration":0,"x":X1,"y":Y1},{"type":"pointerDown"},{"type":"pointerMove","duration":500,"x":X2,"y":Y2},{"type":"pointerUp"}]}]}' +``` + +- Scroll down: swipe from lower y to upper y. Use `x = screen_width - 30`. +- Back gesture: swipe from `(5, H/2)` to `(W*2/3, H/2)`. +- If the tree is unchanged after a scroll, you reached the end. + +### Long press + +Same as tap but add a pause between down and up: + +```bash +./Scripts/ci/wda-curl.sh POST "/session/${WDA_SESSION_ID}/actions" \ + '{"actions":[{"type":"pointer","id":"f1","parameters":{"pointerType":"touch"},"actions":[{"type":"pointerMove","duration":0,"x":X,"y":Y},{"type":"pointerDown"},{"type":"pause","duration":1000},{"type":"pointerUp"}]}]}' +``` + +### Press hardware button + +```bash +./Scripts/ci/wda-curl.sh POST "/session/${WDA_SESSION_ID}/wda/pressButton" '{"name":"home"}' +``` + +## Test Flow + +1. `./Scripts/ci/launch-app.sh`, then `sleep 3`, then fetch the tree. +2. If the tree shows a login/prologue screen, follow the Login Flow below. + If already logged in (e.g., My Site tab visible), skip login. +3. Execute the test steps. After each action, `sleep 1` then fetch the tree + to confirm the UI changed before proceeding. +4. Run verification with `./Scripts/ci/wp-api.sh verification ...` if required. +5. Run cleanup with `./Scripts/ci/wp-api.sh cleanup ...` if required. +6. Call `./Scripts/ci/record-ai-test-result.sh pass "Short reason"`. + On failure, take a screenshot first and pass its path. + +## Login Flow + +1. `./Scripts/ci/tap-element.sh 'Prologue Self Hosted Button'` +2. `./Scripts/ci/tap-element.sh 'Site address'` +3. Type the site host (without scheme, e.g., `example.com`) +4. `./Scripts/ci/tap-element.sh 'Site Address Next Button'` +5. `sleep 3`, fetch tree — you should see the logged-in state + +Never use the WordPress.com flow. Never type a password — it is passed via +launch arguments. + +## Handling Common Situations + +- **System alerts** (permissions, tracking): Check the tree for `Alert` or + `Sheet` elements. Tap "Allow", "OK", or "Don't Allow" to dismiss, then retry. +- **Loading states**: If the tree shows a spinner, `sleep 2` and re-fetch. +- **Back navigation**: Tap the back button in the NavigationBar, or use the + back swipe gesture as a fallback. +- **WDA session expired** (4xx errors): Create a new session: + ```bash + ./Scripts/ci/wda-curl.sh POST /session '{"capabilities":{"alwaysMatch":{}}}' + ``` + Use `value.sessionId` from the response for subsequent calls. +- **App crash**: Re-run `./Scripts/ci/launch-app.sh`, `sleep 3`, re-fetch tree. + +## Element Finding Priority + +1. Accessibility identifier (most stable) +2. Label text +3. Type + context (e.g., Button inside NavigationBar) +4. Partial label match +5. Coordinates from the tree as last resort + +## Rules + +- **Act, don't narrate.** Every response must contain tool calls. +- **Use `tap-element.sh`** whenever you know the element's identifier or label. + Fall back to coordinate taps only when there's no usable ID/label. +- **NEVER take screenshots to inspect the UI.** Use the accessibility tree + instead — it is faster and does not cost a turn. Only call + `take-ai-test-screenshot.sh` right before `record-ai-test-result.sh fail` + so there is evidence of the failure. Maximum 3 per test. +- **Do not undo to recover from mistakes.** Move forward or fail the test. + Only use undo/redo if the test case specifically asks for it. +- **Do not skip verification or cleanup** if the test case declares them. +- **Call record-ai-test-result.sh exactly once.** Keep the reason short and + single-line. diff --git a/.claude/skills/ios-sim-navigation/SKILL.md b/.claude/skills/ios-sim-navigation/SKILL.md index 0ec4ab7efab1..96cb6311aeb4 100644 --- a/.claude/skills/ios-sim-navigation/SKILL.md +++ b/.claude/skills/ios-sim-navigation/SKILL.md @@ -366,7 +366,7 @@ If actions consistently fail or the tree looks unexpected, the app may have cras xcrun simctl list devices booted # Re-launch the app -xcrun simctl launch +xcrun simctl launch ``` After re-launching, create a new WDA session before continuing. diff --git a/.claude/skills/ios-sim-navigation/scripts/wda-start.rb b/.claude/skills/ios-sim-navigation/scripts/wda-start.rb index 68c5dd8e3ee0..f8e6986e1afc 100755 --- a/.claude/skills/ios-sim-navigation/scripts/wda-start.rb +++ b/.claude/skills/ios-sim-navigation/scripts/wda-start.rb @@ -9,6 +9,7 @@ # Options: # --udid Target a specific simulator (default: first booted) # --port WDA port (default: 8100) +# --timeout Seconds to wait for WDA to become ready (default: 60) # # Exit codes: # 0 WDA started successfully @@ -20,6 +21,7 @@ require "json" DEFAULT_PORT = 8100 +DEFAULT_TIMEOUT = 60 def get_booted_udid output = `xcrun simctl list devices booted -j 2>/dev/null` @@ -55,11 +57,13 @@ def wda_running?(port) udid = nil port = DEFAULT_PORT +timeout = DEFAULT_TIMEOUT parser = OptionParser.new do |opts| opts.banner = "Usage: wda-start.rb [options]" opts.on("--udid UDID", "Target a specific simulator") { |v| udid = v } opts.on("--port PORT", Integer, "WDA port (default: 8100)") { |v| port = v } + opts.on("--timeout SECONDS", Integer, "Seconds to wait for WDA to become ready (default: 60)") { |v| timeout = v } end parser.parse! @@ -73,6 +77,7 @@ def wda_running?(port) # Find the WDA project wda_project = File.join(Dir.pwd, ".build", "WebDriverAgent", "WebDriverAgent.xcodeproj") +wda_derived_data = File.join(Dir.pwd, ".build", "WebDriverAgent", "DerivedData") unless File.exist?(wda_project) $stderr.puts "Error: WebDriverAgent project not found at #{wda_project}" $stderr.puts "Clone it: git clone https://github.com/appium/WebDriverAgent.git .build/WebDriverAgent" @@ -85,6 +90,7 @@ def wda_running?(port) "-project", wda_project, "-scheme", "WebDriverAgentRunner", "-destination", "id=#{udid}", + "-derivedDataPath", wda_derived_data, "USE_PORT=#{port}", "CODE_SIGNING_ALLOWED=NO" ] @@ -100,7 +106,7 @@ def wda_running?(port) Process.detach(pid) # Wait for WDA to become ready -max_wait = 60 +max_wait = timeout interval = 2 elapsed = 0 diff --git a/Scripts/ci/ai-test-progress.sh b/Scripts/ci/ai-test-progress.sh new file mode 100644 index 000000000000..0c96eb1d4116 --- /dev/null +++ b/Scripts/ci/ai-test-progress.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +log_ai_test_progress() { + local message="${1:-}" + + if [[ -z "$message" || -z "${AI_TEST_PROGRESS_FILE:-}" ]]; then + return 0 + fi + + mkdir -p "$(dirname "$AI_TEST_PROGRESS_FILE")" + printf '[%s] %s\n' "$(date +%H:%M:%S)" "$message" >> "$AI_TEST_PROGRESS_FILE" +} diff --git a/Scripts/ci/assemble-ai-test-results.rb b/Scripts/ci/assemble-ai-test-results.rb new file mode 100755 index 000000000000..8155cc9adba1 --- /dev/null +++ b/Scripts/ci/assemble-ai-test-results.rb @@ -0,0 +1,38 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require 'json' + +results_dir, app, site_url, *result_files = ARGV + +abort 'Usage: assemble-ai-test-results.rb RESULTS_DIR APP SITE_URL [RESULT_FILES...]' if site_url.nil? + +results = result_files.map do |path| + JSON.parse(File.read(path)) +end + +passed = results.count { |result| result['status'] == 'pass' } +failed = results.count { |result| result['status'] == 'fail' } + +lines = [ + '# Test Results', + '', + "- **Date:** #{Time.now.strftime('%Y-%m-%d %H:%M')}", + "- **App:** #{app}", + "- **Site:** #{site_url}", + "- **Total:** #{results.length} | **Passed:** #{passed} | **Failed:** #{failed}", + '', + '## Results', + '' +] + +results.each do |result| + status_label = result.fetch('status') == 'pass' ? 'PASS' : 'FAIL' + lines << "### #{status_label}: #{result.fetch('title')}" + lines << "**Reason:** #{result.fetch('reason')}" + lines << "**Test File:** #{result.fetch('test_file')}" + lines << "**Screenshot:** #{result.fetch('screenshot')}" if result['screenshot'] + lines << '' +end + +File.write(File.join(results_dir, 'results.md'), "#{lines.join("\n")}\n") diff --git a/Scripts/ci/create-wda-session.rb b/Scripts/ci/create-wda-session.rb new file mode 100755 index 000000000000..dedbe8358b35 --- /dev/null +++ b/Scripts/ci/create-wda-session.rb @@ -0,0 +1,22 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require 'json' +require 'net/http' +require 'uri' + +port = Integer(ARGV[0] || 8100) +uri = URI("http://localhost:#{port}/session") +request = Net::HTTP::Post.new(uri) +request['Content-Type'] = 'application/json' +request.body = JSON.generate(capabilities: { alwaysMatch: {} }) + +response = Net::HTTP.start(uri.hostname, uri.port, read_timeout: 30, open_timeout: 10) do |http| + http.request(request) +end + +exit 1 unless response.code.to_i.between?(200, 499) + +parsed = JSON.parse(response.body) +session_id = parsed.dig('value', 'sessionId') || parsed['sessionId'] +print(session_id) diff --git a/Scripts/ci/find-booted-simulator.rb b/Scripts/ci/find-booted-simulator.rb new file mode 100755 index 000000000000..67f33992366a --- /dev/null +++ b/Scripts/ci/find-booted-simulator.rb @@ -0,0 +1,38 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require 'json' +require 'open3' + +requested_name = ARGV[0].to_s +wait_seconds = ARGV[1].to_f +poll_interval = ARGV[2].to_f +poll_interval = 1.0 if poll_interval <= 0 +deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + [wait_seconds, 0].max + +loop do + output, status = Open3.capture2('xcrun', 'simctl', 'list', 'devices', 'booted', '-j') + exit 1 unless status.success? + + data = JSON.parse(output) + devices = data.fetch('devices', {}).each_value.flat_map do |list| + list.select { |device| device['state'] == 'Booted' } + end + + device = if requested_name.empty? + devices.first + else + devices.find { |entry| entry['name'] == requested_name } + end + + if device + print(device['udid']) + exit 0 + end + + break if wait_seconds <= 0 || Process.clock_gettime(Process::CLOCK_MONOTONIC) >= deadline + + sleep poll_interval +end + +exit 1 diff --git a/Scripts/ci/inspect-ai-test-case.rb b/Scripts/ci/inspect-ai-test-case.rb new file mode 100755 index 000000000000..a1ba0e805df0 --- /dev/null +++ b/Scripts/ci/inspect-ai-test-case.rb @@ -0,0 +1,39 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +file_path = ARGV[0] +command = ARGV[1] + +abort 'Usage: inspect-ai-test-case.rb FILE_PATH COMMAND [ARGS...]' if file_path.nil? || command.nil? + +content = File.read(file_path) +sections = {} +current_name = nil +buffer = [] + +content.each_line do |line| + heading = line.match(/^##\s+(.+)$/) + if heading + sections[current_name] = buffer.join.strip if current_name + current_name = heading[1].strip + buffer = [] + elsif current_name + buffer << line + end +end +sections[current_name] = buffer.join.strip if current_name + +case command +when 'title' + title = content[/^#\s+(.+)$/, 1] || File.basename(file_path, '.md') + print title +when 'slug' + slug = File.basename(file_path, '.md').downcase.gsub(/[^a-z0-9]+/, '-').gsub(/\A-+|-+\z/, '') + print slug +when 'section-present' + pattern = Regexp.new(ARGV.fetch(2), Regexp::IGNORECASE) + matched = sections.any? { |name, body| name.match?(pattern) && !body.to_s.strip.empty? } + print(matched ? '1' : '0') +else + abort "Unknown command: #{command}" +end diff --git a/Scripts/ci/launch-app.sh b/Scripts/ci/launch-app.sh new file mode 100755 index 000000000000..374f9d88c4e9 --- /dev/null +++ b/Scripts/ci/launch-app.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# Launch the app on the simulator with test credentials. +# Takes no arguments — all values come from environment variables. +# +# Usage: launch-app.sh +# +# Environment (required): +# SIMULATOR_UDID Simulator UDID +# APP_BUNDLE_ID App bundle ID (org.wordpress or com.automattic.jetpack) +# SITE_URL WordPress site URL +# WP_USERNAME WordPress username +# WP_APP_PASSWORD WordPress application password +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +# shellcheck source=Scripts/ci/ai-test-progress.sh +source "$SCRIPT_DIR/ai-test-progress.sh" + +: "${SIMULATOR_UDID:?SIMULATOR_UDID is required}" +: "${APP_BUNDLE_ID:?APP_BUNDLE_ID is required}" +: "${SITE_URL:?SITE_URL is required}" +: "${WP_USERNAME:?WP_USERNAME is required}" +: "${WP_APP_PASSWORD:?WP_APP_PASSWORD is required}" + +launch_output="$(xcrun simctl launch --terminate-running-process \ + "$SIMULATOR_UDID" "$APP_BUNDLE_ID" \ + -ui-testing YES \ + -ui-test-reset-everything YES \ + -ui-test-disable-prompts YES \ + -ui-test-disable-animations YES \ + -ui-test-disable-migration YES \ + -ui-test-site-url "$SITE_URL" \ + -ui-test-site-user "$WP_USERNAME" \ + -ui-test-site-pass "$WP_APP_PASSWORD")" + +log_ai_test_progress "Launched ${APP_BUNDLE_ID}" +printf '%s\n' "$launch_output" diff --git a/Scripts/ci/read-ai-test-result.rb b/Scripts/ci/read-ai-test-result.rb new file mode 100755 index 000000000000..984ee7d5976c --- /dev/null +++ b/Scripts/ci/read-ai-test-result.rb @@ -0,0 +1,13 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require 'json' + +result_file = ARGV[0] +key = ARGV[1] + +abort 'Usage: read-ai-test-result.rb RESULT_FILE KEY' if result_file.nil? || key.nil? + +data = JSON.parse(File.read(result_file)) +value = data[key] +print(value.nil? ? '' : value.to_s) diff --git a/Scripts/ci/record-ai-test-result.sh b/Scripts/ci/record-ai-test-result.sh new file mode 100755 index 000000000000..6a5fa6e7b1eb --- /dev/null +++ b/Scripts/ci/record-ai-test-result.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Record the final pass/fail status for the current AI-driven test case. +# +# Usage: record-ai-test-result.sh [screenshot-relative-path] +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +# shellcheck source=Scripts/ci/ai-test-progress.sh +source "$SCRIPT_DIR/ai-test-progress.sh" + +STATUS="${1:?Usage: record-ai-test-result.sh [screenshot-relative-path]}" +REASON="${2:?Usage: record-ai-test-result.sh [screenshot-relative-path]}" +SCREENSHOT_REL="${3:-}" + +: "${AI_TEST_RESULT_FILE:?AI_TEST_RESULT_FILE is required}" +: "${AI_TEST_RESULT_EVENTS_FILE:?AI_TEST_RESULT_EVENTS_FILE is required}" +: "${AI_TEST_TITLE:?AI_TEST_TITLE is required}" +: "${AI_TEST_FILE:?AI_TEST_FILE is required}" + +mkdir -p "$(dirname "$AI_TEST_RESULT_EVENTS_FILE")" +printf '%s\t%s\n' "$(date +%s)" "$STATUS" >> "$AI_TEST_RESULT_EVENTS_FILE" + +ruby Scripts/ci/write-ai-test-result.rb \ + "$AI_TEST_RESULT_FILE" \ + "$AI_TEST_TITLE" \ + "$AI_TEST_FILE" \ + "$STATUS" \ + "$REASON" \ + "$SCREENSHOT_REL" + +log_ai_test_progress "Test result: $(printf '%s' "$STATUS" | tr '[:lower:]' '[:upper:]') — ${REASON}" +echo "Recorded ${STATUS} result for ${AI_TEST_TITLE}" diff --git a/Scripts/ci/take-ai-test-screenshot.sh b/Scripts/ci/take-ai-test-screenshot.sh new file mode 100755 index 000000000000..272746db687f --- /dev/null +++ b/Scripts/ci/take-ai-test-screenshot.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# Capture a screenshot for the current AI-driven test case and print the +# relative path that should be stored in the result metadata. +# +# A hard cap of 3 screenshots per test is enforced to prevent wasting +# turns on unnecessary screenshots during normal flow. +# +# Usage: take-ai-test-screenshot.sh