Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 69 additions & 0 deletions .buildkite/commands/build-wda.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/env bash
# Clone and build WebDriverAgent for iOS Simulator testing.
#
# Skips the build only when a usable build-for-testing artifact already exists.
#
# Required (one of):
# SIMULATOR_UDID Simulator UDID for the build destination
# SIMULATOR_NAME Simulator name for the build destination (e.g., iPhone 16)
#
# Optional:
# WEBDRIVERAGENT_REPO_URL Repo URL (default: appium/WebDriverAgent)
# WEBDRIVERAGENT_REF Git ref or commit to build (default: current remote HEAD / existing checkout)

set -euo pipefail

if [[ -z "${SIMULATOR_UDID:-}" && -z "${SIMULATOR_NAME:-}" ]]; then
echo "Error: set SIMULATOR_UDID or SIMULATOR_NAME" >&2
exit 1
fi

WDA_DIR=".build/WebDriverAgent"
WDA_PROJECT="${WDA_DIR}/WebDriverAgent.xcodeproj"
WDA_DERIVED_DATA="${WDA_DIR}/DerivedData"
WEBDRIVERAGENT_REPO_URL="${WEBDRIVERAGENT_REPO_URL:-https://github.com/appium/WebDriverAgent.git}"
WEBDRIVERAGENT_REF="${WEBDRIVERAGENT_REF:-}"

if [[ -n "${SIMULATOR_UDID:-}" ]]; then
DESTINATION="platform=iOS Simulator,id=${SIMULATOR_UDID}"
else
DESTINATION="platform=iOS Simulator,name=${SIMULATOR_NAME}"
fi

ensure_wda_checkout() {

Check warning on line 33 in .buildkite/commands/build-wda.sh

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Add an explicit return statement at the end of the function.

See more on https://sonarcloud.io/project/issues?id=wordpress-mobile_WordPress-iOS&issues=AZ0qUkVe0GVV5QbRXvwq&open=AZ0qUkVe0GVV5QbRXvwq&pullRequest=25444
mkdir -p .build

if [[ ! -d "${WDA_DIR}/.git" ]]; then
git clone --depth 1 "${WEBDRIVERAGENT_REPO_URL}" "${WDA_DIR}"
fi

if [[ -n "${WEBDRIVERAGENT_REF}" ]]; then
git -C "${WDA_DIR}" fetch --depth 1 origin "${WEBDRIVERAGENT_REF}"
git -C "${WDA_DIR}" checkout --detach "${WEBDRIVERAGENT_REF}"
fi
}

has_built_artifacts() {

Check warning on line 46 in .buildkite/commands/build-wda.sh

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Add an explicit return statement at the end of the function.

See more on https://sonarcloud.io/project/issues?id=wordpress-mobile_WordPress-iOS&issues=AZ0qUkVe0GVV5QbRXvwr&open=AZ0qUkVe0GVV5QbRXvwr&pullRequest=25444
[[ -d "${WDA_DERIVED_DATA}/Build/Products" ]] && \
find "${WDA_DERIVED_DATA}/Build/Products" -name '*.xctestrun' -print -quit | grep -q .
}

ensure_wda_checkout

if [[ -d "$WDA_PROJECT" ]] && has_built_artifacts; then
echo "WebDriverAgent already built, skipping."
exit 0
fi

xcodebuild build-for-testing \
-project "$WDA_PROJECT" \
-scheme WebDriverAgentRunner \
-destination "$DESTINATION" \
-derivedDataPath "$WDA_DERIVED_DATA" \
CODE_SIGNING_ALLOWED=NO \
| tail -1

if ! has_built_artifacts; then
echo "Error: WebDriverAgent build completed without an .xctestrun artifact" >&2
exit 1
fi
147 changes: 147 additions & 0 deletions .buildkite/commands/run-ai-e2e-tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
#!/usr/bin/env bash
# Run AI-driven E2E tests on an iOS Simulator using simulator-llm-pilot.
#
# This script manages the full lifecycle:
# 1. Check for "Testing" label on PR (Buildkite only, skips if missing)
# 2. Download build artifacts and install app (Buildkite only)
# 3. Install the simulator-llm-pilot gem from GitHub
# 4. Run tests (gem handles simulator, WDA, agent loop, and results)
#
# The gem provides a sandboxed agent that drives the simulator through a
# fixed set of tools (tap, swipe, type, REST API, etc.) — no arbitrary
# code execution, no shell access.
#
# Required environment variables:
# ANTHROPIC_API_KEY Claude API key
# SIMULATOR_LLM_PILOT_SITE_URL WordPress test site URL
# SIMULATOR_LLM_PILOT_USERNAME WordPress username
# SIMULATOR_LLM_PILOT_APP_PASSWORD WordPress application password
#
# Optional environment variables:
# APP wordpress | jetpack (default: jetpack)
# SIMULATOR_NAME Simulator to boot if none running (default: iPhone 16)
# TEST_DIR Test directory (default: Tests/AgentTests/ui-tests)
# SIMULATOR_LLM_PILOT_REPO_URL Remote repo URL for simulator-llm-pilot
# SIMULATOR_LLM_PILOT_SOURCE_PATH Local source checkout override for simulator-llm-pilot

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
cd "$REPO_ROOT"

normalize_site_url() {

Check warning on line 33 in .buildkite/commands/run-ai-e2e-tests.sh

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Add an explicit return statement at the end of the function.

See more on https://sonarcloud.io/project/issues?id=wordpress-mobile_WordPress-iOS&issues=AZ0qcbrhYKdbl8xajz_N&open=AZ0qcbrhYKdbl8xajz_N&pullRequest=25444
local site_url="$1"
if [[ "$site_url" == http://* || "$site_url" == https://* ]]; then
printf '%s' "$site_url"
else
printf 'https://%s' "$site_url"
fi
}

# ── Label gate (Buildkite only) ─────────────────────────────────────
if [[ -n "${BUILDKITE_PULL_REQUEST_LABELS:-}" ]]; then
echo "--- Checking for 'Testing' label"

if ! echo ",${BUILDKITE_PULL_REQUEST_LABELS}," | grep -qF ",Testing,"; then
echo "PR does not have the 'Testing' label. Skipping."
echo "Add the label and re-run this step to trigger AI E2E tests."
exit 0
fi
echo "'Testing' label found."
fi

# ── Required env vars ────────────────────────────────────────────────
: "${ANTHROPIC_API_KEY:?Set ANTHROPIC_API_KEY}"
: "${SIMULATOR_LLM_PILOT_SITE_URL:?Set SIMULATOR_LLM_PILOT_SITE_URL}"
: "${SIMULATOR_LLM_PILOT_USERNAME:?Set SIMULATOR_LLM_PILOT_USERNAME}"
: "${SIMULATOR_LLM_PILOT_APP_PASSWORD:?Set SIMULATOR_LLM_PILOT_APP_PASSWORD}"
export SIMULATOR_LLM_PILOT_SITE_URL="$(normalize_site_url "$SIMULATOR_LLM_PILOT_SITE_URL")"

# ── Defaults ─────────────────────────────────────────────────────────
APP="${APP:-jetpack}"
export SIMULATOR_NAME="${SIMULATOR_NAME:-iPhone 16}"
TEST_DIR="${TEST_DIR:-Tests/AgentTests/ui-tests}"
SIMULATOR_LLM_PILOT_REPO_URL="${SIMULATOR_LLM_PILOT_REPO_URL:-https://github.com/Automattic/simulator-llm-pilot.git}"
SIMULATOR_LLM_PILOT_SOURCE_PATH="${SIMULATOR_LLM_PILOT_SOURCE_PATH:-}"

case "$APP" in
wordpress) APP_BUNDLE_ID="org.wordpress"; APP_DISPLAY_NAME="WordPress" ;;
jetpack) APP_BUNDLE_ID="com.automattic.jetpack"; APP_DISPLAY_NAME="Jetpack" ;;
*) echo "Error: APP must be 'wordpress' or 'jetpack', got '$APP'" >&2; exit 1 ;;
esac

APP_INSTRUCTIONS_FILE="${REPO_ROOT}/Tests/AgentTests/app-instructions.md"

# ── Artifact download (Buildkite only) ───────────────────────────────
if [[ -n "${BUILDKITE:-}" ]]; then
echo "--- Downloading Build Artifacts"
download_artifact "build-products-${APP}.tar"
tar -xf "build-products-${APP}.tar"

echo "--- Setting up Gems"
install_gems
fi

# ── Install simulator-llm-pilot ──────────────────────────────────────
echo "--- Installing simulator-llm-pilot"
bash Scripts/ci/install-simulator-llm-pilot.sh
echo "simulator-llm-pilot $(simulator-llm-pilot version)"

# ── Resolve simulator and install app (Buildkite only) ───────────────
echo "--- Setting up Simulator"

UDID="$(ruby Scripts/ci/find-booted-simulator.rb "$SIMULATOR_NAME" 2>/dev/null || true)"
if [[ -z "$UDID" ]]; then
echo "No booted simulator named '$SIMULATOR_NAME' found. Booting..."
xcrun simctl boot "$SIMULATOR_NAME" 2>/dev/null || true
UDID="$(ruby Scripts/ci/find-booted-simulator.rb "$SIMULATOR_NAME" 30 1 2>/dev/null || true)"
fi

if [[ -z "$UDID" ]]; then
echo "Error: could not find a booted simulator named '$SIMULATOR_NAME'" >&2
exit 1
fi

export SIMULATOR_UDID="$UDID"
echo "Simulator UDID: $UDID"

if [[ -n "${BUILDKITE:-}" ]]; then
APP_PATH=$(find DerivedData/Build/Products -name "${APP_DISPLAY_NAME}.app" -path "*Debug-iphonesimulator*" | head -1)
if [[ -z "$APP_PATH" ]]; then
echo "Error: ${APP_DISPLAY_NAME}.app not found in build products" >&2
exit 1
fi
echo "Installing $APP_PATH on simulator..."
xcrun simctl install "$UDID" "$APP_PATH"
fi

# ── Build WebDriverAgent (if not present) ────────────────────────────
echo "--- Building WebDriverAgent"
"$(dirname "$0")/build-wda.sh"

# ── Run tests ────────────────────────────────────────────────────────
echo "--- Running AI E2E Tests"

TIMESTAMP="$(date +%Y-%m-%d-%H%M)"
RESULTS_DIR="Tests/AgentTests/results/${TIMESTAMP}"

EXIT_CODE=0
simulator-llm-pilot run "$TEST_DIR" \
--app-bundle-id "$APP_BUNDLE_ID" \
--app-name "$APP_DISPLAY_NAME" \
--app-instructions-file "$APP_INSTRUCTIONS_FILE" \
--simulator-udid "$UDID" \
--results-dir "$RESULTS_DIR" \
|| EXIT_CODE=$?

# ── Report results ───────────────────────────────────────────────────
echo "--- Results"
RESULTS_FILE="${RESULTS_DIR}/results.md"
if [[ -f "$RESULTS_FILE" ]]; then
cat "$RESULTS_FILE"
else
echo "Warning: no results.md found at $RESULTS_FILE"
fi

exit "$EXIT_CODE"
18 changes: 18 additions & 0 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,24 @@ steps:
command: .buildkite/commands/lint-localized-strings-format.sh
plugins: [$CI_TOOLKIT_PLUGIN]

#################
# AI E2E Tests (requires "Testing" label on PR)
#################
- label: "🤖 AI E2E Tests"
command: .buildkite/commands/run-ai-e2e-tests.sh
depends_on: "build_jetpack"
if: "build.pull_request.id != null"
soft_fail: true
timeout_in_minutes: 60
plugins: [$CI_TOOLKIT_PLUGIN]
env:
APP: jetpack
artifact_paths:
- "Tests/AgentTests/results/**/*"
notify:
- github_commit_status:
context: "AI E2E Tests"

#################
# Claude Build Analysis - dynamically uploaded so Build result conditions evaluate at runtime after the wait
#################
Expand Down
4 changes: 2 additions & 2 deletions .claude/skills/ai-test-runner/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ Use the ios-sim-navigation skill for WDA interaction reference.

## Context

- App Bundle ID: <BUNDLE_ID>
- App Bundle ID: <APP_BUNDLE_ID>
- WDA Session ID: <SESSION_ID>
- Simulator UDID: <UDID>
- Test file: <TEST_FILE_PATH> (absolute path)
Expand All @@ -117,7 +117,7 @@ Use the ios-sim-navigation skill for WDA interaction reference.
2. **Relaunch the app** for a clean state:

```bash
xcrun simctl launch --terminate-running-process <UDID> <BUNDLE_ID> \
xcrun simctl launch --terminate-running-process <UDID> <APP_BUNDLE_ID> \
-ui-test-site-url <SITE_URL> \
-ui-test-site-user <USERNAME> \
-ui-test-site-pass <APPLICATION_PASSWORD>
Expand Down
2 changes: 1 addition & 1 deletion .claude/skills/ios-sim-navigation/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ If actions consistently fail or the tree looks unexpected, the app may have cras
xcrun simctl list devices booted

# Re-launch the app
xcrun simctl launch <UDID> <BUNDLE_ID>
xcrun simctl launch <UDID> <APP_BUNDLE_ID>
```

After re-launching, create a new WDA session before continuing.
38 changes: 38 additions & 0 deletions Scripts/ci/find-booted-simulator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env ruby
# frozen_string_literal: true

require 'json'
require 'open3'

requested_name = ARGV[0].to_s
wait_seconds = ARGV[1].to_f
poll_interval = ARGV[2].to_f
poll_interval = 1.0 if poll_interval <= 0
deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + [wait_seconds, 0].max

loop do
output, status = Open3.capture2('xcrun', 'simctl', 'list', 'devices', 'booted', '-j')
exit 1 unless status.success?

data = JSON.parse(output)
devices = data.fetch('devices', {}).each_value.flat_map do |list|
list.select { |device| device['state'] == 'Booted' }
end

device = if requested_name.empty?
devices.first
else
devices.find { |entry| entry['name'] == requested_name }
end

if device
print(device['udid'])
exit 0
end

break if wait_seconds <= 0 || Process.clock_gettime(Process::CLOCK_MONOTONIC) >= deadline

sleep poll_interval
end

exit 1
48 changes: 48 additions & 0 deletions Scripts/ci/install-simulator-llm-pilot.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/usr/bin/env bash
set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
DEFAULT_LOCAL_GEM_PATH="$(cd "$REPO_ROOT/.." && pwd)/simulator-llm-pilot"

SIMULATOR_LLM_PILOT_REPO_URL="${SIMULATOR_LLM_PILOT_REPO_URL:-https://github.com/Automattic/simulator-llm-pilot.git}"
SIMULATOR_LLM_PILOT_SOURCE_PATH="${SIMULATOR_LLM_PILOT_SOURCE_PATH:-}"

build_dir="$(mktemp -d)"
trap 'rm -rf "$build_dir"' EXIT

source_path="${SIMULATOR_LLM_PILOT_SOURCE_PATH}"
if [[ -z "$source_path" && -f "${DEFAULT_LOCAL_GEM_PATH}/simulator-llm-pilot.gemspec" ]]; then
source_path="${DEFAULT_LOCAL_GEM_PATH}"
fi

if [[ -n "$source_path" ]]; then
echo "Using local simulator-llm-pilot source at ${source_path}"
if [[ -d "${source_path}/.git" ]]; then
source_revision="$(git -C "${source_path}" rev-parse HEAD)"
git -C "${source_path}" archive HEAD | tar -x -C "$build_dir"
else
source_revision="local-filesystem"
tar -cf - -C "${source_path}" . | tar -xf - -C "$build_dir"
fi
else
echo "Cloning simulator-llm-pilot from ${SIMULATOR_LLM_PILOT_REPO_URL}"
git clone --depth 1 "${SIMULATOR_LLM_PILOT_REPO_URL}" "$build_dir"
source_revision="$(git -C "$build_dir" rev-parse HEAD)"
fi

pushd "$build_dir" >/dev/null
gem build simulator-llm-pilot.gemspec >/dev/null
shopt -s nullglob
gem_files=(simulator-llm-pilot-*.gem)
shopt -u nullglob

if [[ ${#gem_files[@]} -ne 1 ]]; then
echo "Error: expected exactly one built simulator-llm-pilot gem, found ${#gem_files[@]}" >&2
exit 1
fi

gem install --no-document --force "./${gem_files[0]}"
popd >/dev/null

echo "Installed simulator-llm-pilot from ${source_revision}"
13 changes: 13 additions & 0 deletions Tests/AgentTests/app-instructions.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
## Login

This app uses a self-hosted WordPress site login flow. The app password is
passed via launch arguments — NEVER type a password manually.

- NEVER tap "Continue with WordPress.com", NEVER enter WordPress.com
email/password, and NEVER request a login link.
- Tap "Enter your existing site address", then enter the site host first
(without scheme, for example `example.com`). If the app rejects the
host-only form, try the full site URL once.
- If you reach any WordPress.com email/password screen, back out and
return to the self-hosted flow.
- If the app is already logged in (e.g., My Site tab visible), skip login.