diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index c4183ef..421483f 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -231,8 +231,8 @@ }, { "name": "che-telegram-mcp", - "version": "1.3.1", - "description": "Telegram MCP Server Plugin — Bot API + 個人帳號 TDLib 全功能存取,28+ 工具,Keychain 密鑰管理", + "version": "1.3.2", + "description": "Telegram MCP Server Plugin — Bot API + 個人帳號 TDLib 全功能存取,28+ 工具,Keychain 密鑰管理。v1.3.2: lock-refused 分支 emit MCP JSON-RPC error envelope,取代通用 -32000", "author": { "name": "Che Cheng" }, @@ -410,4 +410,4 @@ "category": "development" } ] -} \ No newline at end of file +} diff --git a/plugins/che-telegram-mcp/.claude-plugin/plugin.json b/plugins/che-telegram-mcp/.claude-plugin/plugin.json index 9fb8c9a..910ce1b 100644 --- a/plugins/che-telegram-mcp/.claude-plugin/plugin.json +++ b/plugins/che-telegram-mcp/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "che-telegram-mcp", - "version": "1.3.1", - "description": "Telegram MCP Server Plugin — Bot API + 個人帳號 TDLib 全功能存取,28+ 工具,Keychain 密鑰管理", + "version": "1.3.2", + "description": "Telegram MCP Server Plugin — Bot API + 個人帳號 TDLib 全功能存取,28+ 工具,Keychain 密鑰管理。v1.3.2: lock-refused 分支 emit MCP JSON-RPC error envelope,取代通用 -32000", "author": { "name": "Che Cheng" }, "license": "MIT", "keywords": ["mcp", "telegram", "messaging", "tdlib", "bot", "chat", "macos", "keychain"] diff --git a/plugins/che-telegram-mcp/CHANGELOG.md b/plugins/che-telegram-mcp/CHANGELOG.md index 117277f..3d77d68 100644 --- a/plugins/che-telegram-mcp/CHANGELOG.md +++ b/plugins/che-telegram-mcp/CHANGELOG.md @@ -11,6 +11,29 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [1.3.2] - 2026-05-22 + +### Fixed +- `che-telegram-all-mcp-wrapper.sh`: lock-refused branch now emits a JSON-RPC 2.0 error envelope to stdout before exiting, so Claude Code's MCP client parses the human-readable message + structured data instead of seeing only `-32000 Server error`. Envelope carries: + - `error.code: -32000` (JSON-RPC server-defined errors range) + - `error.message: "Another instance of CheTelegramAllMCP is already running (lock held by PID NNNN). Use the existing Claude Code window, or kill the previous wrapper first."` + - `error.data.lockHolderPid: ` (machine-readable lock holder) + - `error.data.recoveryCommand: "pkill CheTelegramAllMCP 2>/dev/null; rm -rf ~/.cache/che-telegram-all-mcp.lock ~/.cache/che-telegram-all-mcp.lock.flock"` (semicolon, not `&&`, so cleanup runs even when no process exists) + - `error.data.docsUrl: https://.../README.md#multi-session-limitation` + + The original stderr message is retained for direct-shell debug. + + **PR-1b id matching (added 2026-05-22 after empirical verification)**: wrapper reads the first line of stdin (with 2s timeout) to extract the JSON-RPC `initialize` request's `id` field, then emits the response envelope with **matching id**. This was required because empirical two-session reproduction in Claude Code v2.1.148 showed that `id: null` responses (the v1.3.2 first attempt) are not matched to pending `initialize` requests and don't surface in Claude Code's MCP error state. With matching-id (PR-1b), debug-log capture confirmed Claude Code's MCP client correctly parses the envelope + stores the full `error.message` internally. + + Stdin extraction uses `jq` when available (preferred) and a bash regex fallback for environments without jq. Handles MCP 1.0 spec id forms: integer, quoted string, or null. + + New `test-wrapper-mcp-error.sh` covers 6 cases: happy path / lock refused emits valid JSON / stale-lock self-recovery / recoveryCommand validation / id-matching with initialize request / timeout fallback to null. Resolves [#31](https://github.com/PsychQuant/che-msg/issues/31). + + **Known UX gap** (out of plugin scope): Claude Code's `/mcp` short-list UI may display only `-32000` (truncated form) instead of the full message. The full message IS captured in Claude Code's internal MCP error state (verified via `--debug mcp` debug logs) and is available to downstream tool consumers. The display truncation is a Claude Code UI policy concern, not a plugin issue. + +### Documentation +- `README.md`: added `## Multi-session limitation` section explaining the TDLib single-instance constraint, the v1.3.2+ human-readable error message, and a recovery cookbook (`pkill CheTelegramAllMCP 2>/dev/null; rm -rf ~/.cache/che-telegram-all-mcp.lock ~/.cache/che-telegram-all-mcp.lock.flock`). Documents the pre-v1.3.2 generic `-32000` symptom for users upgrading. + ## [1.3.1] - 2026-05-07 ### Fixed diff --git a/plugins/che-telegram-mcp/README.md b/plugins/che-telegram-mcp/README.md index 848184e..375fe2e 100644 --- a/plugins/che-telegram-mcp/README.md +++ b/plugins/che-telegram-mcp/README.md @@ -178,6 +178,54 @@ Or just ask naturally: `get_me`, `get_updates`, `send_message`, `forward_message`, `get_chat`, `get_chat_administrators`, `get_chat_member_count`, `get_chat_member`, `set_chat_title`, `set_chat_description`, `pin_chat_message`, `unpin_chat_message`, `unpin_all_chat_messages`, `ban_chat_member`, `unban_chat_member`, `restrict_chat_member`, `promote_chat_member`, `leave_chat`, `delete_message`, `edit_message_text`, `copy_message`, `send_photo`, `send_document`, `send_video`, `send_audio`, `send_sticker`, `send_location`, `send_poll`, `set_my_commands`, `get_my_commands`, `delete_my_commands` +## Multi-session limitation + +`telegram-all` uses [TDLib](https://core.telegram.org/tdlib), which keeps the session in a SQLite database with an exclusive WAL lock — **only one process can hold it at a time**. `telegram-bot` is not affected (Bot API is HTTP-based and stateless, so any number of Claude Code sessions can run it in parallel). + +If you have **two or more Claude Code sessions open simultaneously**, only the first session can spawn `telegram-all`. The second session's wrapper detects the lock + refuses to start (preventing TDLib database corruption from concurrent writes). + +### What you'll see in v1.3.2+ + +`/mcp` displays a human-readable error such as: + +``` +mcp__plugin_che-telegram-mcp_telegram-all: Another instance of CheTelegramAllMCP is already running (lock held by PID 11252). Use the existing Claude Code window, or kill the previous wrapper first. +``` + +The error envelope also carries `data.recoveryCommand` and `data.docsUrl` (this section) for clients that show structured error data. + +### Recovery cookbook + +When you need to free the lock for the current session: + +```bash +# 1. Kill any running telegram-all binary (single instance assumption — safe). +# Use `2>/dev/null` and `; ` (not `&&`) — the orphan-lock case (where +# you most need recovery) has no process to kill, and `&&` would skip +# the cleanup below. `; ` ensures the lock removal always runs. +pkill CheTelegramAllMCP 2>/dev/null + +# 2. Remove BOTH lock variants. macOS without flock uses `.lock` directory; +# Linux with flock uses `.lock.flock` file. The wrapper picks one at +# runtime — recovery should clean both so it works on any platform. +rm -rf ~/.cache/che-telegram-all-mcp.lock ~/.cache/che-telegram-all-mcp.lock.flock + +# 3. (Optional) Confirm no stale process holds TDLib DB files. +lsof ~/Library/Application\ Support/che-telegram-all-mcp/tdlib/db.sqlite 2>/dev/null + +# 4. Restart Claude Code or run /mcp to reconnect. +``` + +Step 1 is graceful — the binary handles `SIGTERM` and `wait`s for TDLib to checkpoint the WAL before exiting. Step 2 removes the wrapper's atomic-claim guard (both `.lock` directory for mkdir mode and `.lock.flock` file for flock mode). After both, the next Claude Code session that spawns `telegram-all` will succeed. + +### Pre-v1.3.2 symptom + +If you're on `che-telegram-mcp` plugin **v1.3.1 or earlier**, the lock-refused branch only wrote to stderr (which Claude Code's MCP transport doesn't surface), so users would just see a generic `-32000 Server error` with no recovery hint. Upgrade to **v1.3.2+** for the human-readable message described above. See [#31](https://github.com/PsychQuant/che-msg/issues/31) for the diagnosis. + +### Why we don't auto-clean stale binaries + +Killing a TDLib process mid-write can corrupt the database (WAL checkpoint mid-flight). The wrapper deliberately requires manual intervention so the user — who knows whether the other Claude Code session is genuinely abandoned or just backgrounded — makes the destructive call. + ## Permissions This plugin requires: @@ -188,10 +236,20 @@ This plugin requires: ## Version -Plugin version: 1.3.0 (currently pins `che-telegram-all-mcp` v0.5.0 + `che-telegram-bot-mcp` v0.5.0 binaries; wrapper auto-upgrades on version mismatch) +Plugin version: 1.3.2 (currently pins `che-telegram-all-mcp` v0.5.0 + `che-telegram-bot-mcp` v0.5.0 binaries; wrapper auto-upgrades on version mismatch) ### Changelog +**1.3.2** (2026-05-22) + +- **Lock-refused branch emits MCP JSON-RPC error envelope to stdout** (refs [che-msg#31](https://github.com/PsychQuant/che-msg/issues/31)). When a second Claude Code session tries to spawn `telegram-all` while a stale session still holds the TDLib lock, the wrapper now writes a `{"jsonrpc":"2.0","id":,"error":{...}}` envelope to stdout before exit. The wrapper reads the first line of stdin (2s timeout) to extract the JSON-RPC `initialize` request's `id` and responds with the matching id, so Claude Code's MCP client surfaces `error.message` (e.g. `"Another instance of CheTelegramAllMCP is already running (lock held by PID 11252). Use the existing Claude Code window, or kill the previous wrapper first."`) instead of generic `-32000 Server error`. Falls back to `id: null` only when stdin is empty (direct-shell debug). `error.data` carries `lockHolderPid`, `recoveryCommand`, and `docsUrl`. +- **Multi-session limitation README section** documents the TDLib upstream constraint + recovery cookbook + Strategy B/C explicit non-decisions. +- **Recovery cookbook hardened**: `pkill ... ; rm -rf ...` (semicolon, not `&&`) so cleanup runs even when no process exists to kill — the orphan-lock case is exactly when cleanup matters most. Also covers both `.lock` (mkdir mode) and `.lock.flock` (flock mode) paths. + +**1.3.1** (2026-05-07) + +- **Atomic-claim lock**: wrapper now uses `flock` (Linux) or `mkdir` (macOS fallback) to prevent two simultaneous wrappers from racing the TDLib lock. Second wrapper fail-fast with stderr message instead of silent SIGTERM cross-fire. Stale-lock cleanup removes orphaned locks whose owner PID is dead. New regression test (`test-wrapper-pid.sh` test 9). Resolves [#10](https://github.com/PsychQuant/psychquant-claude-plugins/issues/10). + **1.3.0** (2026-04-28) - **Auto-upgrade wrappers**: each wrapper now pins a `DESIRED_VERSION` and writes a `~/bin/.${BINARY_NAME}.version` sidecar on install. When the plugin bumps the desired version, the wrapper detects the sidecar mismatch and atomically re-downloads (`.tmp` → `mv`) on next spawn. Source builds in `~/Developer/...` are never auto-replaced. Falls back to the existing binary on network failure (no brick). diff --git a/plugins/che-telegram-mcp/bin/che-telegram-all-mcp-wrapper.sh b/plugins/che-telegram-mcp/bin/che-telegram-all-mcp-wrapper.sh index 3fa8d1b..c2f55b3 100755 --- a/plugins/che-telegram-mcp/bin/che-telegram-all-mcp-wrapper.sh +++ b/plugins/che-telegram-mcp/bin/che-telegram-all-mcp-wrapper.sh @@ -99,6 +99,90 @@ if [[ -z "$TELEGRAM_API_ID" || -z "$TELEGRAM_API_HASH" ]]; then exit 1 fi +# --- MCP-shaped error envelope helpers (#31) --- +# When the atomic-claim lock below refuses startup, Claude Code's MCP +# transport otherwise sees the wrapper exit non-zero with no stdout and +# surfaces a generic "-32000 Server error" to the user. By emitting a +# JSON-RPC 2.0 error envelope to stdout BEFORE exit, Claude Code's MCP +# client can render error.message — turning the opaque -32000 into a +# human-readable instruction. +# +# PR-1b (empirical-driven, 2026-05-22): the v1.3.2 first attempt used +# `id: null` per JSON-RPC 2.0 § 5 ("If there was an error in detecting +# the id... it MUST be Null"). Empirical two-session reproduction in +# Claude Code showed the client drops null-id responses as unmatched +# transport noise and still surfaces generic -32000. Fix: read stdin +# briefly to capture the initialize request's id and respond with +# matching id so the MCP client recognizes the response. +# +# The functions are JSON-safe by construction: the only dynamic values +# (lock holder PID, request id) are either gated by numeric regex +# upstream or extracted via jq / strict bash regex. + +# Read first line of stdin (expected: JSON-RPC initialize request) with a +# short timeout, extract the request id. Falls back to "null" if stdin is +# empty, times out, or doesn't contain valid JSON. +# +# Output format mirrors JSON literal: numeric id printed unquoted (e.g. +# `42`), string id wrapped in JSON quotes (e.g. `"abc"`), missing/invalid +# id returns `null`. Caller substitutes this directly into the JSON +# envelope's `"id":` slot. +# +# Timeout is 2s — Claude Code MCP transport typically sends initialize +# within milliseconds of spawning the child process. Longer timeout +# would delay wrapper exit and could push Claude Code into its own +# transport timeout. +read_initialize_id() { + local line="" + local id="null" + + if IFS= read -r -t 2 line 2>/dev/null && [ -n "$line" ]; then + if command -v jq >/dev/null 2>&1; then + # jq -c outputs JSON-compact form: number unquoted, string with + # quotes, null as literal `null`. Perfect for direct substitution. + local extracted + extracted=$(printf '%s' "$line" | jq -c '.id' 2>/dev/null || true) + if [ -n "$extracted" ]; then + id="$extracted" + fi + else + # Fallback for environments without jq. Handles integer ids + # and quoted string ids — covers MCP 1.0 spec (id is string, + # number, or null per JSON-RPC 2.0). + if [[ "$line" =~ \"id\"[[:space:]]*:[[:space:]]*([0-9]+) ]]; then + id="${BASH_REMATCH[1]}" + elif [[ "$line" =~ \"id\"[[:space:]]*:[[:space:]]*\"([^\"]+)\" ]]; then + id="\"${BASH_REMATCH[1]}\"" + fi + fi + fi + + printf '%s' "$id" +} + +# Emit JSON-RPC 2.0 error envelope to stdout. owner_pid is the lock holder's +# PID (0 = unknown, e.g. flock branch). request_id is the JSON id from the +# pending initialize, output of read_initialize_id — substituted directly +# into the envelope. +emit_mcp_error_response() { + local owner_pid="${1:-0}" + local request_id="${2:-null}" + local pid_phrase="" + local pid_field="null" + if [[ "$owner_pid" =~ ^[0-9]+$ ]] && [ "$owner_pid" != "0" ]; then + pid_phrase=" (lock held by PID ${owner_pid})" + pid_field="${owner_pid}" + fi + # recoveryCommand uses `;` instead of `&&` because the orphan-lock case + # (most common stuck-state) has NO process to kill — pkill exits 1, which + # would short-circuit `&&` and skip the lock cleanup. Semicolon ensures + # both steps run regardless. Both lock paths are removed: `.lock` (mkdir + # mode) and `.lock.flock` (flock mode), so the same command works on + # macOS (mkdir) and Linux (flock). + printf '{"jsonrpc":"2.0","id":%s,"error":{"code":-32000,"message":"Another instance of CheTelegramAllMCP is already running%s. Use the existing Claude Code window, or kill the previous wrapper first.","data":{"lockHolderPid":%s,"recoveryCommand":"pkill CheTelegramAllMCP 2>/dev/null; rm -rf ~/.cache/che-telegram-all-mcp.lock ~/.cache/che-telegram-all-mcp.lock.flock","docsUrl":"https://github.com/PsychQuant/psychquant-claude-plugins/blob/main/plugins/che-telegram-mcp/README.md#multi-session-limitation"}}}\n' \ + "$request_id" "$pid_phrase" "$pid_field" +} + # --- Atomic-claim lock (#10) --- # TDLib DB is single-instance — two MCP servers can't share it. The previous # PID-tracking strategy (#8) is racy on multi-window scenarios: window B reads @@ -115,6 +199,11 @@ if command -v flock >/dev/null 2>&1; then LOCK_MODE="flock" exec 200>"$LOCK_FILE" if ! flock -n 200; then + # PR-1b: read initialize id from stdin before emitting response, so + # Claude Code's MCP client matches the error to its pending request. + # flock has no caller-visible owner PID, so emit without it. + REQ_ID=$(read_initialize_id) + emit_mcp_error_response 0 "$REQ_ID" echo "$BINARY_NAME: Another instance is already running. Use the existing Claude Code window, or kill the previous wrapper first." >&2 exit 1 fi @@ -132,6 +221,11 @@ else exit 1 } else + # PR-1b: read initialize id from stdin before emitting response, + # so Claude Code's MCP client matches the error to its pending + # request and surfaces error.message instead of generic -32000. + REQ_ID=$(read_initialize_id) + emit_mcp_error_response "${OWNER_PID:-0}" "$REQ_ID" echo "$BINARY_NAME: Another instance is already running (lock held by PID ${OWNER_PID:-?}). Use the existing Claude Code window, or kill the previous wrapper first." >&2 exit 1 fi diff --git a/plugins/che-telegram-mcp/bin/test-wrapper-mcp-error.sh b/plugins/che-telegram-mcp/bin/test-wrapper-mcp-error.sh new file mode 100755 index 0000000..259c964 --- /dev/null +++ b/plugins/che-telegram-mcp/bin/test-wrapper-mcp-error.sh @@ -0,0 +1,302 @@ +#!/bin/bash +# Smoke test for wrapper MCP-shaped JSON-RPC error emission (#31). +# +# Tests that the lock-refused branches of che-telegram-all-mcp-wrapper.sh +# emit a valid JSON-RPC 2.0 error envelope to stdout (in addition to the +# existing stderr message), so Claude Code's MCP client can surface a +# human-readable error message instead of generic -32000. +# +# Tests: +# 1. test_happy_path_no_lock : no lock → wrapper forks fake binary OK +# 2. test_lock_refused_emits_valid_json : alive PID lock → stdout first line +# is valid JSON-RPC 2.0 error with code=-32000, non-trivial message +# 3. test_stale_lock_self_recovery : dead-PID lock → stale-cleanup runs + +# wrapper forks fake binary +# 4. test_json_data_includes_recovery_command : emitted JSON .error.data +# .recoveryCommand starts with "pkill" +# +# Usage: +# ./test-wrapper-mcp-error.sh +# +# Exit: 0 on all pass, 1 on any failure. + +set -u + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +WRAPPER="$SCRIPT_DIR/che-telegram-all-mcp-wrapper.sh" +FAIL=0 +TOTAL=0 + +pass() { echo " ✓ $1"; } +fail() { echo " ✗ $1"; FAIL=$((FAIL + 1)); } +test_case() { TOTAL=$((TOTAL + 1)); echo "Test: $1"; } + +# Verify jq is available — required for JSON validation. +if ! command -v jq >/dev/null 2>&1; then + echo "ERROR: jq is required for these tests. Install via 'brew install jq'." >&2 + exit 1 +fi + +# Extract the atomic-claim lock block from the wrapper into a standalone +# executable so we can test lock-refusal behavior without spawning the real +# binary or touching real Telegram credentials. The fake wrapper takes the +# same env-var inputs as the real one (LOCK_DIR / PID_FILE paths overridden). +make_fake_wrapper() { + local out=$1 + local lock_dir=$2 + local pid_file=$3 + local binary_name=$4 + + cat > "$out" </dev/null; then + # shellcheck disable=SC1090 + eval "\$(sed -n '/^read_initialize_id()/,/^}\$/p' "$WRAPPER")" +fi +if grep -q '^emit_mcp_error_response' "$WRAPPER" 2>/dev/null; then + # shellcheck disable=SC1090 + eval "\$(sed -n '/^emit_mcp_error_response()/,/^}\$/p' "$WRAPPER")" +fi + +# Mirror the real wrapper's atomic-claim block. Force mkdir mode (skip flock +# branch) so tests are deterministic across machines with/without flock. +if ! mkdir "\$LOCK_DIR" 2>/dev/null; then + OWNER_PID= + [ -f "\$LOCK_DIR/owner.pid" ] && read -r OWNER_PID < "\$LOCK_DIR/owner.pid" 2>/dev/null + if [[ "\$OWNER_PID" =~ ^[0-9]+\$ ]] && ! kill -0 "\$OWNER_PID" 2>/dev/null; then + rm -rf "\$LOCK_DIR" + mkdir "\$LOCK_DIR" 2>/dev/null || { + echo "\$BINARY_NAME: Failed to claim lock (stale-cleanup race). Retry shortly." >&2 + exit 1 + } + else + # PR-1b: read initialize id from stdin so MCP client matches the response. + REQ_ID=\$(read_initialize_id) + emit_mcp_error_response "\${OWNER_PID:-0}" "\$REQ_ID" + echo "\$BINARY_NAME: Another instance is already running (lock held by PID \${OWNER_PID:-?}). Use the existing Claude Code window, or kill the previous wrapper first." >&2 + exit 1 + fi +fi +echo \$\$ > "\$LOCK_DIR/owner.pid" + +# Mimic the real wrapper: fork BINARY with sleep duration arg. +"\$BINARY" 1 <&0 & +BIN_PID=\$! +echo "\$BIN_PID" > "\$PID_FILE" + +cleanup() { + if [[ -n "\$BIN_PID" ]] && kill -0 "\$BIN_PID" 2>/dev/null; then + kill -TERM "\$BIN_PID" 2>/dev/null + wait "\$BIN_PID" 2>/dev/null + fi + [ -d "\$LOCK_DIR" ] && { + OWNER_PID= + [ -f "\$LOCK_DIR/owner.pid" ] && read -r OWNER_PID < "\$LOCK_DIR/owner.pid" 2>/dev/null + [[ "\$OWNER_PID" == "\$\$" ]] && rm -rf "\$LOCK_DIR" + } + [ -f "\$PID_FILE" ] && { + CURRENT_PID= + read -r CURRENT_PID < "\$PID_FILE" 2>/dev/null || true + [[ "\$CURRENT_PID" == "\$BIN_PID" ]] && rm -f "\$PID_FILE" + } +} +trap cleanup EXIT INT TERM + +wait "\$BIN_PID" +exit \$? +FAKE_EOF + chmod +x "$out" +} + +# Temp directory for this test run. +TMPDIR=$(mktemp -d -t test-wrapper-mcp-error.XXXXXX) +trap 'rm -rf "$TMPDIR"' EXIT + +# Build a single fake wrapper used by all tests (lock + pid paths injected). +LOCK_DIR="$TMPDIR/test-lock" +PID_FILE="$TMPDIR/test.pid" +FAKE_WRAPPER="$TMPDIR/fake-wrapper.sh" +make_fake_wrapper "$FAKE_WRAPPER" "$LOCK_DIR" "$PID_FILE" "CheTelegramAllMCP" + +echo "==================================" +echo "test-wrapper-mcp-error.sh (#31)" +echo "==================================" + +# --------------------------------------------------------------- +test_case "test_happy_path_no_lock" +rm -rf "$LOCK_DIR" "$PID_FILE" +STDOUT=$("$FAKE_WRAPPER" < /dev/null 2>/tmp/stderr-$$.log) +EXIT=$? +rm -f /tmp/stderr-$$.log +if [ "$EXIT" -eq 0 ]; then + pass "exits 0 on happy path" +else + fail "expected exit 0, got $EXIT" +fi +if [ -z "$STDOUT" ]; then + pass "no stdout on happy path" +else + fail "expected empty stdout, got: $STDOUT" +fi + +# --------------------------------------------------------------- +test_case "test_lock_refused_emits_valid_json" +rm -rf "$LOCK_DIR" +mkdir -p "$LOCK_DIR" +# Use our own PID as the "alive holder" — guaranteed alive during this script. +echo $$ > "$LOCK_DIR/owner.pid" + +STDOUT=$("$FAKE_WRAPPER" < /dev/null 2>/dev/null) +EXIT=$? + +if [ "$EXIT" -eq 1 ]; then + pass "exits 1 when lock refused" +else + fail "expected exit 1, got $EXIT" +fi + +FIRST_LINE=$(echo "$STDOUT" | head -1) +if [ -z "$FIRST_LINE" ]; then + fail "expected JSON-RPC error envelope on stdout, got empty" +else + if echo "$FIRST_LINE" | jq -e '.jsonrpc == "2.0" and .id == null and .error.code == -32000 and (.error.message | length > 50)' >/dev/null 2>&1; then + pass "stdout first line is valid JSON-RPC 2.0 error envelope" + else + fail "stdout first line failed envelope validation: $FIRST_LINE" + fi +fi +rm -rf "$LOCK_DIR" + +# --------------------------------------------------------------- +test_case "test_stale_lock_self_recovery" +rm -rf "$LOCK_DIR" "$PID_FILE" +mkdir -p "$LOCK_DIR" +# Use a PID guaranteed dead: spawn a no-op and immediately wait for it. +DEAD_PID=$(bash -c 'echo $$; exit 0') +echo "$DEAD_PID" > "$LOCK_DIR/owner.pid" + +STDOUT=$("$FAKE_WRAPPER" < /dev/null 2>/tmp/stderr-$$.log) +EXIT=$? +rm -f /tmp/stderr-$$.log + +if [ "$EXIT" -eq 0 ]; then + pass "stale-cleanup path exits 0 (wrapper proceeded)" +else + fail "expected exit 0 after stale cleanup, got $EXIT" +fi +if [ -z "$STDOUT" ]; then + pass "stale-cleanup path emits no stdout (proceeded normally)" +else + fail "expected empty stdout, got: $STDOUT" +fi +rm -rf "$LOCK_DIR" + +# --------------------------------------------------------------- +test_case "test_json_data_includes_recovery_command" +rm -rf "$LOCK_DIR" +mkdir -p "$LOCK_DIR" +echo $$ > "$LOCK_DIR/owner.pid" + +STDOUT=$("$FAKE_WRAPPER" < /dev/null 2>/dev/null) +FIRST_LINE=$(echo "$STDOUT" | head -1) + +if [ -z "$FIRST_LINE" ]; then + fail "expected JSON on stdout (cannot validate recoveryCommand)" +else + RECOVERY=$(echo "$FIRST_LINE" | jq -r '.error.data.recoveryCommand // empty' 2>/dev/null) + if [[ "$RECOVERY" == pkill* ]]; then + pass "error.data.recoveryCommand starts with 'pkill'" + else + fail "expected recoveryCommand starting with 'pkill', got: '$RECOVERY'" + fi +fi +rm -rf "$LOCK_DIR" + +# --------------------------------------------------------------- +# PR-1b: wrapper must read initialize id from stdin and respond with matching id +# so Claude Code's MCP client surfaces error.message instead of dropping the +# response as unmatched. Without this, id stays null and Claude Code falls +# back to generic -32000 (empirically confirmed on 2026-05-22). +test_case "test_lock_refused_with_initialize_request_id_matches" +rm -rf "$LOCK_DIR" +mkdir -p "$LOCK_DIR" +echo $$ > "$LOCK_DIR/owner.pid" + +# Feed a JSON-RPC initialize request to stdin; wrapper should respond with id=42 +INIT_REQ='{"jsonrpc":"2.0","id":42,"method":"initialize","params":{"protocolVersion":"2024-11-05","capabilities":{}}}' +STDOUT=$(printf '%s\n' "$INIT_REQ" | "$FAKE_WRAPPER" 2>/dev/null) +FIRST_LINE=$(echo "$STDOUT" | head -1) + +if [ -z "$FIRST_LINE" ]; then + fail "expected JSON envelope on stdout, got empty" +else + RESP_ID=$(echo "$FIRST_LINE" | jq -c '.id' 2>/dev/null) + if [ "$RESP_ID" = "42" ]; then + pass "response.id matches request.id (42)" + else + fail "expected response.id == 42, got: $RESP_ID (full: $FIRST_LINE)" + fi +fi +rm -rf "$LOCK_DIR" + +# --------------------------------------------------------------- +# Stdin timeout fallback: when no initialize arrives (e.g. direct shell debug), +# read_initialize_id should return null after timeout and emit envelope with +# id:null. Verifies the v1.3.2 PR-90 behavior is preserved as fallback. +test_case "test_lock_refused_no_stdin_falls_back_to_null_id" +rm -rf "$LOCK_DIR" +mkdir -p "$LOCK_DIR" +echo $$ > "$LOCK_DIR/owner.pid" + +# Empty stdin → read_initialize_id gets immediate EOF, no id available +STDOUT=$("$FAKE_WRAPPER" < /dev/null 2>/dev/null) +FIRST_LINE=$(echo "$STDOUT" | head -1) + +if [ -z "$FIRST_LINE" ]; then + fail "expected JSON envelope even on empty stdin, got empty" +else + RESP_ID=$(echo "$FIRST_LINE" | jq -c '.id' 2>/dev/null) + if [ "$RESP_ID" = "null" ]; then + pass "response.id falls back to null when stdin has no initialize" + else + fail "expected response.id == null on empty stdin, got: $RESP_ID" + fi +fi +rm -rf "$LOCK_DIR" + +# --------------------------------------------------------------- +echo "" +echo "==================================" +if [ "$FAIL" -eq 0 ]; then + echo "✓ All $TOTAL tests passed" + exit 0 +else + echo "✗ $FAIL / $TOTAL tests failed" + exit 1 +fi