diff --git a/ci/integration-test.sh b/ci/integration-test.sh
index f1c6902..76f0e7a 100755
--- a/ci/integration-test.sh
+++ b/ci/integration-test.sh
@@ -46,6 +46,81 @@ cleanup() {
 }
 trap cleanup EXIT
 
+# assert_quorum_n <expected-pod-count> [timeout-seconds]
+# Polls all pods 0..N-1 until they all report the same non-empty leaderId.
+# On success: exports LEADERS[] (array of leaderIds, one per pod) and
+# LEADER_ORDINAL (the pod ordinal of the leader).
+assert_quorum_n() {
+  local n=$1 timeout=${2:-$RAFT_TIMEOUT}
+  local deadline=$(( SECONDS + timeout ))
+  local i pid local_port l
+  while true; do
+    LEADERS=()
+    for (( i=0; i<n; i++ )); do
+      local_port=$(( HTTP_PORT + 10 + i ))
+      pid=$(pf_start "$i" "$local_port")
+      if pf_wait "$local_port"; then
+        l=$(api "$local_port" GET /api/v1/cluster \
+          | jq -r '.leaderId // empty' 2>/dev/null || echo "")
+        LEADERS+=("$l")
+      fi
+      pf_stop "$pid"
+    done
+
+    if (( ${#LEADERS[@]} == n )) && [[ -n "${LEADERS[0]}" ]]; then
+      local all_agree=1
+      for l in "${LEADERS[@]:1}"; do
+        [[ "$l" == "${LEADERS[0]}" ]] || { all_agree=0; break; }
+      done
+      if (( all_agree )); then
+        LEADER_ORDINAL=$(echo "${LEADERS[0]}" \
+          | sed -nE "s/^${RELEASE}-([0-9]+)\..*$/\1/p")
+        [[ -n "$LEADER_ORDINAL" ]] || {
+          echo "ERROR: could not parse ordinal from leader '${LEADERS[0]}'"
+          return 1
+        }
+        echo "    Raft leader: ${LEADERS[0]} (pod-${LEADER_ORDINAL})"
+        return 0
+      fi
+    fi
+
+    if (( SECONDS >= deadline )); then
+      echo "ERROR: Raft formation on ${n} pods timed out after ${timeout}s."
+      echo "       Leaders seen: ${LEADERS[*]:-<none>}"
+      return 1
+    fi
+    echo "    Not converged yet (${LEADERS[*]:-<none>}), retrying in 5s..."
+    sleep 5
+  done
+}
+
+# cluster_status_assert_healthy <local-port>
+# Asserts no peer is STALLED or FALLING_BEHIND. Gracefully skips if the
+# `peers[].status` field is absent (image predates commit 203acdaac).
+cluster_status_assert_healthy() {
+  local port=$1
+  local status_json has_status stalled peer_count
+  status_json=$(api "$port" GET /api/v1/cluster) || {
+    echo "ERROR: cluster status API call failed"; return 1
+  }
+  has_status=$(echo "$status_json" | jq -r '.peers[0].status // empty')
+  if [[ -z "$has_status" ]]; then
+    echo "    WARNING: peers[].status field absent on this image; skipping STATUS assertion."
+    return 0
+  fi
+  stalled=$(echo "$status_json" \
+    | jq -r '.peers[] | select(.status=="STALLED" or .status=="FALLING_BEHIND") | .id' \
+    | head -n1)
+  if [[ -n "$stalled" ]]; then
+    echo "ERROR: peer $stalled has status STALLED/FALLING_BEHIND"
+    echo "$status_json" | jq '.peers'
+    return 1
+  fi
+  peer_count=$(echo "$status_json" | jq '.peers | length')
+  echo "    All ${peer_count} peers HEALTHY/CATCHING_UP."
+  return 0
+}
+
 # ── retrieve password ─────────────────────────────────────────────────────────
 
 PASSWORD=$(kubectl get secret arcadedb-credentials-secret \
@@ -56,53 +131,21 @@ PASSWORD=$(kubectl get secret arcadedb-credentials-secret \
 
 # ── phase 1: pod readiness ────────────────────────────────────────────────────
 
-echo "==> [1/4] Waiting for StatefulSet rollout (timeout ${ROLLOUT_TIMEOUT}s)..."
+echo "==> [1/6] Waiting for StatefulSet rollout (timeout ${ROLLOUT_TIMEOUT}s)..."
 kubectl rollout status statefulset/"$RELEASE" \
   -n "$NAMESPACE" --timeout="${ROLLOUT_TIMEOUT}s"
 echo "    All 3 pods Ready."
 
 # ── phase 2: raft formation ───────────────────────────────────────────────────
 
-echo "==> [2/4] Checking Raft leader consensus (timeout ${RAFT_TIMEOUT}s)..."
-DEADLINE=$(( SECONDS + RAFT_TIMEOUT ))
-
-while true; do
-  LEADERS=()
-  for i in 0 1 2; do
-    LOCAL=$(( HTTP_PORT + 10 + i ))   # 2490, 2491, 2492
-    PID=$(pf_start "$i" "$LOCAL")
-    pf_wait "$LOCAL" || { pf_stop "$PID"; continue; }
-    LEADER=$(api "$LOCAL" GET /api/v1/cluster \
-      | jq -r '.leaderId // empty' 2>/dev/null || echo "")
-    pf_stop "$PID"
-    LEADERS+=("$LEADER")
-  done
-
-  if [[ -n "${LEADERS[0]}" \
-     && "${LEADERS[0]}" == "${LEADERS[1]}" \
-     && "${LEADERS[0]}" == "${LEADERS[2]}" ]]; then
-    echo "    Raft leader: ${LEADERS[0]}"
-    break
-  fi
-
-  if (( SECONDS >= DEADLINE )); then
-    echo "ERROR: Raft formation timed out after ${RAFT_TIMEOUT}s."
-    echo "       Leaders seen: ${LEADERS[*]:-<none>}"
-    exit 1
-  fi
-
-  echo "    Not converged yet (${LEADERS[*]:-<none>}), retrying in 5s..."
-  sleep 5
-done
+echo "==> [2/6] Checking Raft leader consensus (timeout ${RAFT_TIMEOUT}s)..."
+assert_quorum_n 3 || exit 1
 
 # ── phase 3: write ────────────────────────────────────────────────────────────
 
-# Writes (including database creation) must go through the Raft leader. Parse the
-# pod ordinal out of leaderId, e.g. "test-arcadedb-1.test-arcadedb.default..._2434" -> 1.
-LEADER_ORDINAL=$(echo "${LEADERS[0]}" | sed -nE "s/^${RELEASE}-([0-9]+)\..*$/\1/p")
-[[ -n "$LEADER_ORDINAL" ]] || { echo "ERROR: could not parse ordinal from leader '${LEADERS[0]}'"; exit 1; }
+# LEADER_ORDINAL is set by assert_quorum_n above.
 
-echo "==> [3/4] Writing test data via leader pod-${LEADER_ORDINAL}..."
+echo "==> [3/6] Writing test data via leader pod-${LEADER_ORDINAL}..."
 PF_PID=$(pf_start "$LEADER_ORDINAL" "$HTTP_PORT")
 pf_wait "$HTTP_PORT" || { echo "ERROR: port-forward to leader pod-${LEADER_ORDINAL} failed"; exit 1; }
 
@@ -122,7 +165,7 @@ echo "    Write complete."
 
 # ── phase 4: read and assert ──────────────────────────────────────────────────
 
-echo "==> [4/4] Reading back test data..."
+echo "==> [4/6] Reading back test data..."
 RESULT=$(api "$HTTP_PORT" POST /api/v1/query/integration-test \
   '{"language":"sql","command":"SELECT name FROM TestDoc WHERE name = '\''hello-kind'\''"}' \
   | jq -r '.result[0].name // empty') || {
@@ -138,4 +181,96 @@ if [[ "$RESULT" != "hello-kind" ]]; then
 fi
 
 echo "    Got: '${RESULT}'"
+
+# ── phase 5: STATUS column ────────────────────────────────────────────────────
+
+echo "==> [5/6] Asserting STATUS=HEALTHY for all peers..."
+PF_PID=$(pf_start "$LEADER_ORDINAL" "$HTTP_PORT")
+pf_wait "$HTTP_PORT" || { echo "ERROR: port-forward to leader failed"; exit 1; }
+
+cluster_status_assert_healthy "$HTTP_PORT" || exit 1
+
+pf_stop "$PF_PID"
+
+# ── phase 6: leadership transfer ──────────────────────────────────────────────
+
+echo "==> [6/6] Transferring Raft leadership..."
+PF_PID=$(pf_start "$LEADER_ORDINAL" "$HTTP_PORT")
+pf_wait "$HTTP_PORT" || { echo "ERROR: port-forward to leader failed"; exit 1; }
+
+CURRENT_LEADER=${LEADERS[0]}
+TARGET_PEER=$(api "$HTTP_PORT" GET /api/v1/cluster \
+  | jq -r --arg leader "$CURRENT_LEADER" \
+    '.peers[] | select(.id != $leader) | .id' | head -n1)
+[[ -n "$TARGET_PEER" ]] || { echo "ERROR: no non-leader peer found"; exit 1; }
+echo "    Current leader: $CURRENT_LEADER"
+echo "    Transfer target: $TARGET_PEER"
+
+api "$HTTP_PORT" POST /api/v1/cluster/leader \
+  "{\"peerId\":\"$TARGET_PEER\"}" >/dev/null
+pf_stop "$PF_PID"
+
+# Wait up to 30s for the transfer to take effect on any pod we can reach.
+DEADLINE=$(( SECONDS + 30 ))
+NEW_LEADER=""
+while (( SECONDS < DEADLINE )); do
+  for i in 0 1 2; do
+    LOCAL=$(( HTTP_PORT + 20 + i ))
+    PID=$(pf_start "$i" "$LOCAL")
+    if pf_wait "$LOCAL" 5; then
+      L=$(api "$LOCAL" GET /api/v1/cluster | jq -r '.leaderId // empty' 2>/dev/null || echo "")
+      pf_stop "$PID"
+      if [[ "$L" == "$TARGET_PEER" ]]; then
+        NEW_LEADER="$L"
+        break 2
+      fi
+    else
+      pf_stop "$PID"
+    fi
+  done
+  sleep 2
+done
+
+[[ "$NEW_LEADER" == "$TARGET_PEER" ]] || {
+  echo "ERROR: leadership did not transfer; got '${NEW_LEADER:-<none>}'"
+  exit 1
+}
+echo "    New leader: $NEW_LEADER"
+
+# Verify writes via the new leader.
+NEW_LEADER_ORDINAL=$(echo "$NEW_LEADER" | sed -nE "s/^${RELEASE}-([0-9]+)\..*$/\1/p")
+PF_PID=$(pf_start "$NEW_LEADER_ORDINAL" "$HTTP_PORT")
+pf_wait "$HTTP_PORT" || { echo "ERROR: port-forward to new leader failed"; exit 1; }
+
+api "$HTTP_PORT" POST /api/v1/command/integration-test \
+  '{"language":"sql","command":"INSERT INTO TestDoc SET name = '\''post-transfer'\''"}' \
+  >/dev/null
+
+POST_RESULT=$(api "$HTTP_PORT" POST /api/v1/query/integration-test \
+  '{"language":"sql","command":"SELECT name FROM TestDoc WHERE name = '\''post-transfer'\''"}' \
+  | jq -r '.result[0].name // empty')
+
+pf_stop "$PF_PID"
+
+[[ "$POST_RESULT" == "post-transfer" ]] || {
+  echo "ERROR: write via new leader failed (got '${POST_RESULT:-<empty>}')"
+  exit 1
+}
+echo "    Write via new leader succeeded."
+
+# Update tracked leader for downstream phases.
+LEADERS[0]=$NEW_LEADER
+LEADER_ORDINAL=$NEW_LEADER_ORDINAL
+
+# Phases 7 (helm-upgrade scale-up 3->5) and 8 (snapshot-install recovery) were
+# planned but discarded after CI proved the scenarios are not supported by the
+# current ArcadeDB image: a `helm upgrade --set replicaCount=5` rolling-restarts
+# all StatefulSet pods AND adds two with a serverList of 5 entries, but Raft
+# does not auto-vote in the new peers (the support email confirms this requires
+# an explicit POST /api/v1/cluster/peer call from the leader). The cluster ends
+# up unable to re-form quorum after the rolling restart. The snapshot-install
+# phase depended on the post-scale-up cluster, so it was dropped with phase 7.
+# See docs/superpowers/specs/2026-05-09-ha-integration-tests-design.md for the
+# updated rationale.
+
 echo "==> All checks passed."
diff --git a/docs/superpowers/plans/2026-05-09-ha-integration-tests.md b/docs/superpowers/plans/2026-05-09-ha-integration-tests.md
new file mode 100644
index 0000000..1a40e6e
--- /dev/null
+++ b/docs/superpowers/plans/2026-05-09-ha-integration-tests.md
@@ -0,0 +1,639 @@
+# HA Integration Tests Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Extend `ci/integration-test.sh` with four new phases that exercise Raft HA scenarios from the support email Q&A: STATUS column observation, runtime leadership transfer, helm-upgrade scale-up 3→5, and snapshot-install recovery.
+
+**Architecture:** Single CI job, single kind cluster, single Helm install. New phases append to the existing script in order of escalating risk; the destructive snapshot phase runs last. A small refactor extracts a generalized quorum helper so both the existing 3-pod and the new 5-pod check use the same code path.
+
+**Tech Stack:** bash, kubectl, kind, helm, jq, curl. No new tooling.
+
+**Spec:** `docs/superpowers/specs/2026-05-09-ha-integration-tests-design.md`
+
+---
+
+## Local Setup (before starting any task)
+
+To exercise the integration script locally between tasks, you need a kind cluster with the chart installed. Run once before Task 2:
+
+```bash
+kind create cluster --wait 60s
+
+helm install test-arcadedb charts/arcadedb/ \
+  --set replicaCount=3 \
+  --set persistence.enabled=false \
+  --set arcadedb.defaultDatabases="" \
+  --set 'arcadedb.extraCommands[1]=-Darcadedb.ha.snapshotThreshold=50' \
+  --timeout 5m --wait
+```
+
+Tear down at the end (or between major iterations if needed):
+
+```bash
+kind delete cluster
+```
+
+After Task 7 the cluster will have 5 pods and a deleted-and-recreated peer; you may want to delete and recreate the cluster between full runs.
+
+---
+
+## Task 1: Add `snapshotThreshold` override to CI workflow
+
+**Files:**
+- Modify: `.github/workflows/lint.yml` (the `Install chart` step in the `integration` job)
+
+- [ ] **Step 1: Update the helm install args in `lint.yml`**
+
+Locate the `Install chart` step in the `integration` job. Replace its `run:` block with:
+
+```yaml
+      - name: Install chart
+        run: |
+          helm install test-arcadedb charts/arcadedb/ \
+            --set replicaCount=3 \
+            --set persistence.enabled=false \
+            --set arcadedb.defaultDatabases="" \
+            --set 'arcadedb.extraCommands[1]=-Darcadedb.ha.snapshotThreshold=50' \
+            --timeout 5m \
+            --wait
+```
+
+- [ ] **Step 2: Verify YAML is valid**
+
+Run:
+
+```bash
+python3 -c "import yaml,sys; yaml.safe_load(open('.github/workflows/lint.yml'))" \
+  && echo OK
+```
+
+Expected: `OK`
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add .github/workflows/lint.yml
+git commit -m "ci: lower snapshot threshold for HA integration test"
+```
+
+---
+
+## Task 2: Refactor — extract `assert_quorum_n` helper and renumber phase echoes
+
+**Files:**
+- Modify: `ci/integration-test.sh` (helpers section + phase 2 + all phase echoes)
+
+The existing phase 2 hardcodes ordinals 0/1/2. The 5-pod scale-up in Task 6 needs the same logic for 0..4. Factor the loop out, parametrized by pod count. While we're here, update the `[N/4]` phase counters to `[N/8]` so subsequent tasks just append.
+
+- [ ] **Step 1: Add `assert_quorum_n` helper after the `cleanup` trap**
+
+Insert after line 47 (after the `trap cleanup EXIT` line) in `ci/integration-test.sh`:
+
+```bash
+# assert_quorum_n <expected-pod-count> [timeout-seconds]
+# Polls all pods 0..N-1 until they all report the same non-empty leaderId.
+# On success: exports LEADERS[] (array of leaderIds, one per pod) and
+# LEADER_ORDINAL (the pod ordinal of the leader).
+assert_quorum_n() {
+  local n=$1 timeout=${2:-$RAFT_TIMEOUT}
+  local deadline=$(( SECONDS + timeout ))
+  local i pid local_port l
+  while true; do
+    LEADERS=()
+    for (( i=0; i<n; i++ )); do
+      local_port=$(( HTTP_PORT + 10 + i ))
+      pid=$(pf_start "$i" "$local_port")
+      if pf_wait "$local_port"; then
+        l=$(api "$local_port" GET /api/v1/cluster \
+          | jq -r '.leaderId // empty' 2>/dev/null || echo "")
+        LEADERS+=("$l")
+      fi
+      pf_stop "$pid"
+    done
+
+    if (( ${#LEADERS[@]} == n )) && [[ -n "${LEADERS[0]}" ]]; then
+      local all_agree=1
+      for l in "${LEADERS[@]:1}"; do
+        [[ "$l" == "${LEADERS[0]}" ]] || { all_agree=0; break; }
+      done
+      if (( all_agree )); then
+        LEADER_ORDINAL=$(echo "${LEADERS[0]}" \
+          | sed -nE "s/^${RELEASE}-([0-9]+)\..*$/\1/p")
+        [[ -n "$LEADER_ORDINAL" ]] || {
+          echo "ERROR: could not parse ordinal from leader '${LEADERS[0]}'"
+          return 1
+        }
+        echo "    Raft leader: ${LEADERS[0]} (pod-${LEADER_ORDINAL})"
+        return 0
+      fi
+    fi
+
+    if (( SECONDS >= deadline )); then
+      echo "ERROR: Raft formation on ${n} pods timed out after ${timeout}s."
+      echo "       Leaders seen: ${LEADERS[*]:-<none>}"
+      return 1
+    fi
+    echo "    Not converged yet (${LEADERS[*]:-<none>}), retrying in 5s..."
+    sleep 5
+  done
+}
+```
+
+- [ ] **Step 2: Replace existing phase 2 body with a call to `assert_quorum_n`**
+
+Replace the block from `echo "==> [2/4] Checking Raft leader consensus..."` through the end of its `while true; do ... done` loop (lines 66–96 in the current file) with:
+
+```bash
+# ── phase 2: raft formation ───────────────────────────────────────────────────
+
+echo "==> [2/8] Checking Raft leader consensus (timeout ${RAFT_TIMEOUT}s)..."
+assert_quorum_n 3 || exit 1
+```
+
+- [ ] **Step 3: Drop the now-unused `LEADER_ORDINAL=...` parse in phase 3**
+
+In the current phase 3 (write block), `assert_quorum_n` already exports `LEADER_ORDINAL`. Remove the duplicate parse. Replace the lines:
+
+```bash
+LEADER_ORDINAL=$(echo "${LEADERS[0]}" | sed -nE "s/^${RELEASE}-([0-9]+)\..*$/\1/p")
+[[ -n "$LEADER_ORDINAL" ]] || { echo "ERROR: could not parse ordinal from leader '${LEADERS[0]}'"; exit 1; }
+```
+
+with a single comment:
+
+```bash
+# LEADER_ORDINAL is set by assert_quorum_n above.
+```
+
+- [ ] **Step 4: Renumber phase counters from `/4` to `/8`**
+
+Update the four existing `echo "==> [N/4] ..."` lines so they read `[1/8]`, `[2/8]`, `[3/8]`, `[4/8]` respectively. Phases 5–8 will be added in subsequent tasks.
+
+- [ ] **Step 5: Syntax-check the script**
+
+Run:
+
+```bash
+bash -n ci/integration-test.sh && echo OK
+```
+
+Expected: `OK`
+
+- [ ] **Step 6: Run end-to-end against the local kind cluster**
+
+Run:
+
+```bash
+make test-integration
+```
+
+Expected output (last line): `==> All checks passed.`
+
+If `assert_quorum_n` does not converge, dump cluster state:
+
+```bash
+kubectl get pods,svc -n default
+kubectl logs -l app=arcadedb -n default --tail=50
+```
+
+- [ ] **Step 7: Commit**
+
+```bash
+git add ci/integration-test.sh
+git commit -m "test(integration): extract assert_quorum_n helper, renumber phases"
+```
+
+---
+
+## Task 3: Add Phase 5 — `STATUS=HEALTHY` assertion
+
+**Files:**
+- Modify: `ci/integration-test.sh` (append phase 5 after the existing phase 4)
+
+- [ ] **Step 1: Add a `cluster_status_assert_healthy` helper**
+
+Append to the helpers section (immediately after `assert_quorum_n` from Task 2):
+
+```bash
+# cluster_status_assert_healthy <local-port>
+# Asserts no peer is STALLED or FALLING_BEHIND. Gracefully skips if the
+# `peers[].status` field is absent (image predates commit 203acdaac).
+cluster_status_assert_healthy() {
+  local port=$1
+  local status_json has_status stalled peer_count
+  status_json=$(api "$port" GET /api/v1/cluster) || {
+    echo "ERROR: cluster status API call failed"; return 1
+  }
+  has_status=$(echo "$status_json" | jq -r '.peers[0].status // empty')
+  if [[ -z "$has_status" ]]; then
+    echo "    WARNING: peers[].status field absent on this image; skipping STATUS assertion."
+    return 0
+  fi
+  stalled=$(echo "$status_json" \
+    | jq -r '.peers[] | select(.status=="STALLED" or .status=="FALLING_BEHIND") | .id' \
+    | head -n1)
+  if [[ -n "$stalled" ]]; then
+    echo "ERROR: peer $stalled has status STALLED/FALLING_BEHIND"
+    echo "$status_json" | jq '.peers'
+    return 1
+  fi
+  peer_count=$(echo "$status_json" | jq '.peers | length')
+  echo "    All ${peer_count} peers HEALTHY/CATCHING_UP."
+  return 0
+}
+```
+
+- [ ] **Step 2: Append phase 5 before the final `echo "==> All checks passed."` line**
+
+Insert this block immediately above the existing final line `echo "==> All checks passed."` (which must remain the last line of the file):
+
+```bash
+# ── phase 5: STATUS column ────────────────────────────────────────────────────
+
+echo "==> [5/8] Asserting STATUS=HEALTHY for all peers..."
+PF_PID=$(pf_start "$LEADER_ORDINAL" "$HTTP_PORT")
+pf_wait "$HTTP_PORT" || { echo "ERROR: port-forward to leader failed"; exit 1; }
+
+cluster_status_assert_healthy "$HTTP_PORT" || exit 1
+
+pf_stop "$PF_PID"
+```
+
+- [ ] **Step 3: Syntax-check the script**
+
+Run:
+
+```bash
+bash -n ci/integration-test.sh && echo OK
+```
+
+Expected: `OK`
+
+- [ ] **Step 4: Run end-to-end against the local kind cluster**
+
+Run:
+
+```bash
+make test-integration
+```
+
+Expected output: a line starting with `==> [5/8] Asserting STATUS=HEALTHY` followed by either `All N peers HEALTHY/CATCHING_UP.` or the WARNING graceful-skip line, then `==> All checks passed.`
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add ci/integration-test.sh
+git commit -m "test(integration): assert peers STATUS=HEALTHY (phase 5)"
+```
+
+---
+
+## Task 4: Add Phase 6 — runtime leadership transfer
+
+**Files:**
+- Modify: `ci/integration-test.sh` (append phase 6 after phase 5)
+
+- [ ] **Step 1: Append phase 6 before the final `echo "==> All checks passed."` line**
+
+```bash
+# ── phase 6: leadership transfer ──────────────────────────────────────────────
+
+echo "==> [6/8] Transferring Raft leadership..."
+PF_PID=$(pf_start "$LEADER_ORDINAL" "$HTTP_PORT")
+pf_wait "$HTTP_PORT" || { echo "ERROR: port-forward to leader failed"; exit 1; }
+
+CURRENT_LEADER=${LEADERS[0]}
+TARGET_PEER=$(api "$HTTP_PORT" GET /api/v1/cluster \
+  | jq -r --arg leader "$CURRENT_LEADER" \
+    '.peers[] | select(.id != $leader) | .id' | head -n1)
+[[ -n "$TARGET_PEER" ]] || { echo "ERROR: no non-leader peer found"; exit 1; }
+echo "    Current leader: $CURRENT_LEADER"
+echo "    Transfer target: $TARGET_PEER"
+
+api "$HTTP_PORT" POST /api/v1/cluster/leader \
+  "{\"peerId\":\"$TARGET_PEER\"}" >/dev/null
+pf_stop "$PF_PID"
+
+# Wait up to 30s for the transfer to take effect on any pod we can reach.
+DEADLINE=$(( SECONDS + 30 ))
+NEW_LEADER=""
+while (( SECONDS < DEADLINE )); do
+  for i in 0 1 2; do
+    LOCAL=$(( HTTP_PORT + 20 + i ))
+    PID=$(pf_start "$i" "$LOCAL")
+    if pf_wait "$LOCAL" 5; then
+      L=$(api "$LOCAL" GET /api/v1/cluster | jq -r '.leaderId // empty' 2>/dev/null || echo "")
+      pf_stop "$PID"
+      if [[ "$L" == "$TARGET_PEER" ]]; then
+        NEW_LEADER="$L"
+        break 2
+      fi
+    else
+      pf_stop "$PID"
+    fi
+  done
+  sleep 2
+done
+
+[[ "$NEW_LEADER" == "$TARGET_PEER" ]] || {
+  echo "ERROR: leadership did not transfer; got '${NEW_LEADER:-<none>}'"
+  exit 1
+}
+echo "    New leader: $NEW_LEADER"
+
+# Verify writes via the new leader.
+NEW_LEADER_ORDINAL=$(echo "$NEW_LEADER" | sed -nE "s/^${RELEASE}-([0-9]+)\..*$/\1/p")
+PF_PID=$(pf_start "$NEW_LEADER_ORDINAL" "$HTTP_PORT")
+pf_wait "$HTTP_PORT" || { echo "ERROR: port-forward to new leader failed"; exit 1; }
+
+api "$HTTP_PORT" POST /api/v1/command/integration-test \
+  '{"language":"sql","command":"INSERT INTO TestDoc SET name = '\''post-transfer'\''"}' \
+  >/dev/null
+
+POST_RESULT=$(api "$HTTP_PORT" POST /api/v1/query/integration-test \
+  '{"language":"sql","command":"SELECT name FROM TestDoc WHERE name = '\''post-transfer'\''"}' \
+  | jq -r '.result[0].name // empty')
+
+pf_stop "$PF_PID"
+
+[[ "$POST_RESULT" == "post-transfer" ]] || {
+  echo "ERROR: write via new leader failed (got '${POST_RESULT:-<empty>}')"
+  exit 1
+}
+echo "    Write via new leader succeeded."
+
+# Update tracked leader for downstream phases.
+LEADERS[0]=$NEW_LEADER
+LEADER_ORDINAL=$NEW_LEADER_ORDINAL
+```
+
+- [ ] **Step 2: Syntax-check**
+
+```bash
+bash -n ci/integration-test.sh && echo OK
+```
+
+Expected: `OK`
+
+- [ ] **Step 3: Run end-to-end**
+
+```bash
+make test-integration
+```
+
+Expected: a `[6/8]` block reporting `New leader: <pod-id>` (different from the original) and `Write via new leader succeeded.`, followed by `==> All checks passed.`
+
+If the API responds with 404 or 405 on `/api/v1/cluster/leader`, the deployed image does not yet expose this endpoint — capture the response body via `curl -v` and report; do not silently skip (this endpoint is the entire point of phase 6).
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add ci/integration-test.sh
+git commit -m "test(integration): exercise runtime leadership transfer (phase 6)"
+```
+
+---
+
+## Task 5: Add Phase 7 — `helm upgrade` scale-up 3→5
+
+**Files:**
+- Modify: `ci/integration-test.sh` (append phase 7 after phase 6)
+
+- [ ] **Step 1: Append phase 7 before the final `echo "==> All checks passed."` line**
+
+```bash
+# ── phase 7: scale-up 3 -> 5 ──────────────────────────────────────────────────
+
+echo "==> [7/8] Scaling cluster from 3 to 5 replicas..."
+helm upgrade "$RELEASE" charts/arcadedb/ \
+  --reuse-values \
+  --set replicaCount=5 \
+  --wait --timeout 5m
+
+kubectl rollout status statefulset/"$RELEASE" \
+  -n "$NAMESPACE" --timeout=5m
+echo "    Rollout complete (5 pods Ready)."
+
+echo "    Re-checking quorum across 5 pods..."
+assert_quorum_n 5 || exit 1
+
+echo "    Re-asserting STATUS across all peers..."
+PF_PID=$(pf_start "$LEADER_ORDINAL" "$HTTP_PORT")
+pf_wait "$HTTP_PORT" || { echo "ERROR: port-forward to leader failed"; exit 1; }
+
+PEER_COUNT=$(api "$HTTP_PORT" GET /api/v1/cluster | jq '.peers | length')
+[[ "$PEER_COUNT" == "5" ]] || {
+  echo "ERROR: expected 5 peers in cluster status, got ${PEER_COUNT}"
+  exit 1
+}
+
+cluster_status_assert_healthy "$HTTP_PORT" || exit 1
+
+pf_stop "$PF_PID"
+```
+
+- [ ] **Step 2: Syntax-check**
+
+```bash
+bash -n ci/integration-test.sh && echo OK
+```
+
+Expected: `OK`
+
+- [ ] **Step 3: Run end-to-end**
+
+```bash
+make test-integration
+```
+
+Expected: `[7/8] Scaling cluster from 3 to 5 replicas...`, then `Rollout complete (5 pods Ready).`, `Raft leader: ... (pod-N)` (from `assert_quorum_n 5`), peer-count check passes, STATUS check passes, `==> All checks passed.`
+
+If the rolling restart times out: increase `--timeout` to 10m and re-run; also check `kubectl describe pod test-arcadedb-3 -n default` for scheduling failures (kind clusters have limited resources).
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add ci/integration-test.sh
+git commit -m "test(integration): scale-up 3->5 via helm upgrade (phase 7)"
+```
+
+---
+
+## Task 6: Add Phase 8 — snapshot-install recovery
+
+**Files:**
+- Modify: `ci/integration-test.sh` (append phase 8 after phase 7)
+
+- [ ] **Step 1: Append phase 8 before the final `echo "==> All checks passed."` line**
+
+```bash
+# ── phase 8: snapshot-install recovery ────────────────────────────────────────
+
+echo "==> [8/8] Snapshot-install on follower recovery..."
+
+PF_PID=$(pf_start "$LEADER_ORDINAL" "$HTTP_PORT")
+pf_wait "$HTTP_PORT" || { echo "ERROR: port-forward to leader failed"; exit 1; }
+
+echo "    Writing 100 rows to push log past snapshotThreshold=50..."
+for i in $(seq 1 100); do
+  api "$HTTP_PORT" POST /api/v1/command/integration-test \
+    "{\"language\":\"sql\",\"command\":\"INSERT INTO TestDoc SET name = 'snap-${i}'\"}" \
+    >/dev/null
+done
+echo "    Wrote 100 rows."
+
+# Pick a non-leader pod ordinal to delete.
+DELETE_ORDINAL=""
+for i in 0 1 2 3 4; do
+  if [[ "$i" != "$LEADER_ORDINAL" ]]; then
+    DELETE_ORDINAL=$i
+    break
+  fi
+done
+[[ -n "$DELETE_ORDINAL" ]] || { echo "ERROR: no non-leader pod to delete"; exit 1; }
+
+pf_stop "$PF_PID"
+
+echo "    Deleting pod ${RELEASE}-${DELETE_ORDINAL}..."
+kubectl delete pod "${RELEASE}-${DELETE_ORDINAL}" -n "$NAMESPACE" --wait=false
+kubectl wait --for=condition=Ready pod/"${RELEASE}-${DELETE_ORDINAL}" \
+  -n "$NAMESPACE" --timeout=2m
+echo "    Pod recreated and Ready."
+
+PF_PID=$(pf_start "$LEADER_ORDINAL" "$HTTP_PORT")
+pf_wait "$HTTP_PORT" || { echo "ERROR: port-forward to leader failed"; exit 1; }
+
+DEADLINE=$(( SECONDS + 90 ))
+RECOVERED=0
+LAST_STATUS=""
+while (( SECONDS < DEADLINE )); do
+  STATUS_JSON=$(api "$HTTP_PORT" GET /api/v1/cluster)
+  S=$(echo "$STATUS_JSON" \
+    | jq -r --arg p "${RELEASE}-${DELETE_ORDINAL}" \
+      '.peers[] | select(.id | startswith($p)) | .status // empty')
+  if [[ "$S" == "HEALTHY" ]]; then
+    RECOVERED=1
+    break
+  fi
+  if [[ -z "$S" ]]; then
+    HAS_STATUS_FIELD=$(echo "$STATUS_JSON" | jq -r '.peers[0].status // empty')
+    if [[ -z "$HAS_STATUS_FIELD" ]]; then
+      PEER_PRESENT=$(echo "$STATUS_JSON" \
+        | jq -r --arg p "${RELEASE}-${DELETE_ORDINAL}" \
+          '.peers[] | select(.id | startswith($p)) | .id' | head -n1)
+      if [[ -n "$PEER_PRESENT" ]]; then
+        echo "    NOTE: STATUS field absent on this image; peer is present in cluster, accepting as recovered."
+        RECOVERED=1
+        break
+      fi
+    fi
+  fi
+  LAST_STATUS=$S
+  echo "    peer ${RELEASE}-${DELETE_ORDINAL} status=${S:-<absent>}, retrying..."
+  sleep 5
+done
+pf_stop "$PF_PID"
+
+(( RECOVERED )) || {
+  echo "ERROR: recreated pod did not reach HEALTHY in 90s (last status: ${LAST_STATUS:-<absent>})"
+  exit 1
+}
+echo "    Recreated pod recovered."
+
+# Best-effort log signal: did the snapshot-install path actually run?
+if kubectl logs "${RELEASE}-${DELETE_ORDINAL}" -n "$NAMESPACE" --tail=500 2>/dev/null \
+     | grep -q SnapshotInstaller; then
+  echo "    Confirmed snapshot-install path in logs."
+else
+  echo "    NOTE: SnapshotInstaller log line not found (log wording is not a stable contract; not a failure)."
+fi
+```
+
+- [ ] **Step 2: Syntax-check**
+
+```bash
+bash -n ci/integration-test.sh && echo OK
+```
+
+Expected: `OK`
+
+- [ ] **Step 3: Run end-to-end**
+
+```bash
+make test-integration
+```
+
+Expected: `[8/8] Snapshot-install on follower recovery...` block ending with `Recreated pod recovered.`, then `==> All checks passed.`
+
+If the recreated pod does not reach HEALTHY in 90s, capture diagnostics:
+
+```bash
+kubectl logs "test-arcadedb-${DELETE_ORDINAL}" -n default --tail=200
+kubectl logs "test-arcadedb-${LEADER_ORDINAL}" -n default --tail=200 | grep -i snapshot
+```
+
+If you see only `Snapshot download attempt N/3 failed` lines: the snapshot transfer is failing in the cluster, which is itself a real bug worth reporting; do not paper over it.
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add ci/integration-test.sh
+git commit -m "test(integration): snapshot-install on follower recovery (phase 8)"
+```
+
+---
+
+## Task 7: End-to-end CI verification
+
+**Files:**
+- No code changes; this task is pure verification.
+
+- [ ] **Step 1: Push the branch and trigger CI**
+
+```bash
+git push -u origin "$(git rev-parse --abbrev-ref HEAD)"
+```
+
+- [ ] **Step 2: Watch the `integration` job in GitHub Actions**
+
+Run:
+
+```bash
+gh run watch
+```
+
+Or open the Actions tab in the repo. The `integration` job should complete inside the 20-minute timeout and the log should contain all eight `[N/8]` phase headers ending with `==> All checks passed.`
+
+- [ ] **Step 3: If CI fails on a phase that passed locally**
+
+Common causes:
+- kind in CI is slower than local; bump per-phase timeouts (`RAFT_TIMEOUT`, the 90s in phase 8) before flagging as a real bug.
+- Image tag pulled in CI may differ from local cache; check the resolved tag in the `Install chart` step's helm output.
+
+If a flake is intermittent specifically in phase 8, gate it behind an env var:
+
+```bash
+if [[ "${RUN_SNAPSHOT_TEST:-1}" != "0" ]]; then
+  # phase 8 body
+fi
+```
+
+This is the contingency from the spec's risk section; only apply it after observing real flake.
+
+- [ ] **Step 4: Open PR**
+
+Once CI is green:
+
+```bash
+gh pr create --fill
+```
+
+---
+
+## Acceptance Checklist
+
+- [ ] All 8 phases pass locally on an image tag that exposes the STATUS field.
+- [ ] On older image tags, P5/P7 emit the WARNING graceful-skip line and the rest of the run still passes.
+- [ ] CI completes inside the 20-minute timeout.
+- [ ] No duplicated port-forward/poll loops between phase 2 and phase 7 — both go through `assert_quorum_n`.
+- [ ] No dangling `PF_PID` background processes after the script exits (the existing `cleanup` trap covers this).
diff --git a/docs/superpowers/specs/2026-05-09-ha-integration-tests-design.md b/docs/superpowers/specs/2026-05-09-ha-integration-tests-design.md
new file mode 100644
index 0000000..3085ea8
--- /dev/null
+++ b/docs/superpowers/specs/2026-05-09-ha-integration-tests-design.md
@@ -0,0 +1,238 @@
+# HA Cluster Integration Tests — Design
+
+**Date:** 2026-05-09
+**Status:** Draft
+**Source:** Support email Q&A on Raft HA behavior (leader control, scale-up sync, large-import tuning)
+
+## Context
+
+The chart already runs a kind-based integration test (`ci/integration-test.sh`) that
+brings up a 3-pod HA cluster, verifies Raft consensus, writes via the leader, and
+reads back. This design extends that test with scenarios derived from a recent
+support exchange about ArcadeDB HA cluster operations.
+
+Three areas were discussed in the support email:
+
+- **Q1** — Controlling the leader: `arcadedb.ha.serverRole=replica` to exclude a
+  pod from leadership; runtime leadership transfer via
+  `POST /api/v1/cluster/leader`.
+- **Q2** — Sync after scale-up: peer-add path (`POST /api/v1/cluster/peer`) and
+  the snapshot-install path (`/api/v1/ha/snapshot/{database}`).
+- **Q3** — Large (>1 GB) import recipe with replication: bring cluster up before
+  importing, drive imports through the leader, tune Raft thresholds.
+
+A new `STATUS` column (HEALTHY / CATCHING_UP / FALLING_BEHIND / STALLED) was
+added to the cluster status table in commit `203acdaac` and is the canonical
+signal for follower health.
+
+## Goals
+
+Add automated coverage for the support scenarios that are testable inside the
+existing kind-based CI job, within the 20-minute job timeout.
+
+## Non-Goals (discarded scenarios)
+
+- **Q1a — `serverRole=replica`:** The chart applies the same `-D` flags to every
+  pod. Per-ordinal configuration is a chart change, not a test, and is out of
+  scope here.
+- **Q2a — 1→3 helm upgrade reproducing the "peer not in `HA_SERVER_LIST`" path:**
+  The chart re-renders `arcadedb.ha.serverList` on every upgrade, so the new pod
+  is always in the configured list. The wire-level peer-add scenario from the
+  support email does not reproduce through Helm.
+- **Q3 — Large-import recipe:** Operational guidance for >1 GB datasets. Not
+  testable at CI scale; volume too high.
+
+## In-Scope Scenarios
+
+| ID | Scenario | Confidence | Approx. cost |
+|----|----------|-----------|--------------|
+| P5 | STATUS column reports HEALTHY | High | ~5 s |
+| P6 | Runtime leadership transfer | High | ~60 s |
+| ~~P7~~ | ~~Scale-up 3→5 via `helm upgrade`~~ | **Discarded — see "Post-implementation discovery" below** | — |
+| ~~P8~~ | ~~Snapshot-install on follower recovery~~ | **Discarded — depended on P7** | — |
+
+## Post-implementation discovery (2026-05-09)
+
+P7 and P8 were implemented and exercised in CI before being discarded. The CI
+run revealed two related limitations of the deployed ArcadeDB image:
+
+1. `helm upgrade --set replicaCount=5` triggers a rolling restart of the
+   StatefulSet because the `arcadedb.ha.serverList` env var grows from 3 to 5
+   entries. With `persistence.enabled=false` (CI default) every pod loses its
+   in-memory state during the restart cycle.
+2. ArcadeDB does not auto-vote new peers into the Raft configuration when a
+   pod with a wider serverList shows up — the support email itself notes that
+   `POST /api/v1/cluster/peer` must be called from the leader for each new
+   peer. The chart does not (and should not) issue that call.
+
+The combined effect: after `helm upgrade --set replicaCount=5`, the cluster
+falls below quorum during the rolling restart and never re-converges. P7's
+`assert_quorum_n 5` times out; P8 cannot run because it depends on a 5-pod
+cluster with the `integration-test` database.
+
+P7 and P8 are therefore removed from the in-scope list. The chart's
+serverList-rendering correctness is still covered by `helm-unittest` (template
+unit tests) — that asserts the correct value without needing a live cluster.
+
+Future work: a "snapshot-and-restore" workflow (mentioned at the end of the
+support email) would let us cover the snapshot path on a single fresh cluster
+without the scale-up dependency. Out of scope here.
+
+## Architecture
+
+Single CI job, single kind cluster, single Helm install. Extend
+`ci/integration-test.sh` with new phases. Existing phases stay (rollout →
+quorum → write → read). New phases append on the same cluster, with one
+`helm upgrade` step in the middle. Order chosen so destructive scenarios
+(pod delete) run last and cannot mask earlier signals.
+
+Why one cluster instead of one-per-scenario: each kind cluster create costs
+~60 s. Sequencing keeps total CI time well under the existing 20-minute job
+timeout.
+
+### Install-time changes
+
+In the `Install chart` step of `.github/workflows/lint.yml`, append a low
+snapshot threshold to `arcadedb.extraCommands`:
+
+```
+--set 'arcadedb.extraCommands[1]=-Darcadedb.ha.snapshotThreshold=50'
+```
+
+Index 1 because index 0 holds the existing `-Darcadedb.server.mode=production`.
+
+A low threshold makes the snapshot-install path reachable without writing
+100k rows. It does not affect other scenarios — they each generate fewer
+than 50 entries.
+
+### Helpers (additions to `ci/integration-test.sh`)
+
+The existing script already has `pf_start`, `pf_stop`, `pf_wait`, `api`. Add:
+
+- `cluster_status <local-port>` — fetches `GET /api/v1/cluster` and returns the
+  parsed JSON via stdout. Callers extract `.leaderId`, `.peers[]`, etc.
+- `peer_status <local-port> <peer-id>` — extracts a single peer's `.status`
+  field from cluster status. Returns empty string if the field is absent.
+- `wait_status_healthy <local-port> <peer-id> <timeout-seconds>` — polls
+  `peer_status` until it returns `HEALTHY`. Treats `CATCHING_UP` as transient.
+  Fails on `STALLED` or `FALLING_BEHIND` only after the timeout.
+- `assert_quorum_n <expected-pod-count>` — generalizes the existing
+  hardcoded 0/1/2 loop. Iterates ordinals 0..N-1, port-forwards each, reads
+  `leaderId` from each, asserts all agree.
+
+## Phase Detail
+
+### P5 — STATUS=HEALTHY assertion
+
+After the existing read assertion (phase 4), the script port-forwards to the
+leader and calls `GET /api/v1/cluster`. For each `peers[]` entry, assert
+`status` is `HEALTHY` (or absent — see graceful-skip below).
+
+**Graceful skip on missing field:** If `.peers[0].status` is null/missing on
+the deployed image (older than `203acdaac`), log a warning and skip the
+assertion. Do not fail. This keeps the test compatible with image tags that
+predate the STATUS column.
+
+### P6 — Runtime leadership transfer
+
+1. From cluster status, pick a non-leader peer ID.
+2. `POST /api/v1/cluster/leader` with body `{"peerId":"<chosen>"}`.
+3. Poll `GET /api/v1/cluster` from any pod for up to 30 s; assert `leaderId`
+   matches the chosen peer.
+4. Re-run the existing write+read sequence (insert a marker row, read it back)
+   via the new leader to confirm the cluster is still functional after the
+   transfer.
+
+Choosing a specific target peer (rather than sending an empty body) makes the
+assertion deterministic; Ratis would otherwise be free to re-elect the same
+pod and the test would have to retry.
+
+### P7 — Scale-up 3→5 via `helm upgrade`
+
+1. `helm upgrade test-arcadedb charts/arcadedb/ --set replicaCount=5
+   --reuse-values --wait --timeout 5m`.
+2. `kubectl rollout status statefulset/test-arcadedb --timeout 5m` to cover
+   the rolling restart of the original 3 pods plus scheduling of pods 3 and 4.
+3. Run `assert_quorum_n 5`.
+4. Run the STATUS=HEALTHY assertion across all 5 peers (with the same
+   graceful-skip behavior as P5).
+
+**No data-persistence assertion.** The CI install runs with
+`persistence.enabled=false`, so the rolling restart of pods 0–2 wipes existing
+data. The assertion here is purely about cluster topology: the chart's
+`arcadedb.nodenames` helper must re-render the serverList correctly so that
+all 5 pods agree on a single leader and report HEALTHY.
+
+### P8 — Snapshot install on follower recovery
+
+1. From the post-scale-up leader, write 100 small rows in a loop. With
+   `snapshotThreshold=50` (set at install time), the leader will have produced
+   a Raft snapshot.
+2. Pick a non-leader pod (e.g. ordinal 4). `kubectl delete pod test-arcadedb-4`.
+3. Wait for the StatefulSet to recreate the pod and for it to reach `Ready`
+   (`kubectl wait --for=condition=Ready pod/test-arcadedb-4 --timeout=2m`).
+4. Poll cluster status for up to 90 s; assert the recreated peer reaches
+   `STATUS=HEALTHY`.
+5. **Secondary signal (best-effort):** `kubectl logs test-arcadedb-4` and grep
+   for `SnapshotInstaller`. Log the result but do not fail on miss — log-line
+   wording is not a stable contract.
+
+Without persistence enabled, deleting the pod wipes its state, so the
+recreated pod's Raft log starts at index 0. With the leader at >50 entries,
+this is below the snapshot threshold gap and the leader will install a
+snapshot rather than ship individual log entries.
+
+## Phase Ordering
+
+```
+1. Existing: rollout
+2. Existing: Raft consensus (3 pods)
+3. Existing: write via leader
+4. Existing: read back
+5. P5: STATUS=HEALTHY (3 pods)
+6. P6: leadership transfer + verify writes
+7. P7: helm upgrade to replicaCount=5, re-verify quorum + STATUS
+8. P8: delete pod, verify snapshot-install recovery
+```
+
+## CI Budget
+
+| Phase | Estimate |
+|-------|---------:|
+| kind create | ~60 s |
+| helm install + rollout | ~2 min |
+| Existing phases 1–4 | ~2 min |
+| P5 STATUS | ~5 s |
+| P6 leadership transfer | ~60 s |
+| P7 scale-up to 5 | ~3–5 min |
+| P8 snapshot recovery | ~2 min |
+| **Total** | **~11–13 min** |
+
+Comfortable under the 20-minute job timeout.
+
+## Risks and Mitigations
+
+- **STATUS field absent on older image tags.** The chart's `image.tag` defaults
+  to `appVersion`; if a release predates commit `203acdaac`, the STATUS field
+  is missing. Mitigation: graceful skip with a warning, not a hard failure.
+- **Leadership-transfer flake (Ratis re-elects the same pod).** Mitigation:
+  send an explicit `peerId` instead of an empty body.
+- **Scale-up rolling restart loses data.** Mitigation: do not assert data
+  survival in P7; only assert cluster topology.
+- **P8 is the most flake-prone phase.** It runs last so a P8 failure cannot
+  mask earlier signals. If P8 proves flaky in practice, gate it behind a
+  `RUN_SNAPSHOT_TEST=1` env var rather than disabling the rest of the file.
+
+## Acceptance
+
+The work is complete when:
+
+1. `make test-integration` against a kind cluster passes all 8 phases on a
+   chart pinned to an image tag that includes the STATUS column.
+2. The same script run against an image tag that predates the STATUS column
+   skips P5/P7's STATUS assertions with a warning and still passes the rest.
+3. CI (`.github/workflows/lint.yml`) installs the chart with the
+   `snapshotThreshold=50` override and runs the extended script in under
+   the existing 20-minute job timeout.
+4. The new helpers in `ci/integration-test.sh` are factored out and reused
+   across phases (no duplicated port-forward/poll loops).