From 8baeb4743a11cd1d62b006241b226541ac7341e6 Mon Sep 17 00:00:00 2001 From: Clay Good Date: Tue, 5 May 2026 10:55:06 -0500 Subject: [PATCH] post-merge bugfixes from PR #11 review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Found via deep review of the v29/v30 merge. Seven real issues fixed: 1. **Branch-exists checks always returned False.** `_run_cmd` returns the stripped stdout *string*, but `_branch_exists_locally` / `_branch_exists_remotely` were calling `getattr(result, "stdout", "")` on that string — which always yields `""`. The disambiguation tests passed only because they mocked the predicates directly. Fixed both helpers. 2. **`_run_lock` was misused as a non-context-manager.** `main()` called `cm.__enter__()` without ever calling `__exit__`, so the generator's `try: yield ... finally: _release()` block never fired on the exception path. Re-wired through `contextlib.ExitStack` and registered `stack.close` with `atexit`. The `_release` helper is now also idempotent (guarded by a flag) so the atexit + finally double-fire can't close a reused fd. 3. **Engine-fallover stale reference.** The rate-limit `while` loop updated `self.engine` only inside the loop body, leaving the verify/fix path on the stale engine when the loop exited. Now `self.engine` is re-bound from `self._engines[0]` before each `execute_chunk` and after each pop. 4. **Postmortem markdown allowed log-fence injection.** Log lines and ledger-supplied chunk titles were embedded raw into the markdown body. Now backticks in the log tail are split with a zero-width joiner and chunk titles are stripped of newlines / backticks before rendering. 5. **`enforce_token_budget` was O(n²) and ordered sub-chunks wrong.** `list.pop(0)` is O(n); a recursion that splits 50 chunks could spend most of its time shifting the queue. Switched to `collections.deque` with `popleft` / `appendleft` and clarified the comment about dependency ordering. 6. **AuditLogger could hang on a peer process's slow rotation.** The cross-process flock used the blocking `LOCK_EX` mode. Replaced with a non-blocking acquire + bounded retry (3 attempts × 10 ms); on timeout we log a one-shot warning and proceed with intra-process locking only. 7. **`verify_paths` ran pytest as bare `python -m pytest`.** That picks up whatever `python` is first on PATH — potentially Python 2 or a different venv. Now uses `sys.executable`, matching the existing `check_tests` convention. Quality: - pytest 1928 passed - ruff check / format clean - No public API changes; no test-fixture rewrites needed Co-Authored-By: Claude Opus 4.7 (1M context) --- src/codelicious/chunker.py | 18 ++++++++++----- src/codelicious/cli.py | 30 +++++++++++++++++++++---- src/codelicious/git/git_orchestrator.py | 20 +++++++++-------- src/codelicious/orchestrator.py | 14 +++++++----- src/codelicious/tools/audit_logger.py | 24 ++++++++++++++++++-- src/codelicious/verifier.py | 2 +- 6 files changed, 80 insertions(+), 28 deletions(-) diff --git a/src/codelicious/chunker.py b/src/codelicious/chunker.py index 83a61ecb..93cf9057 100644 --- a/src/codelicious/chunker.py +++ b/src/codelicious/chunker.py @@ -418,13 +418,17 @@ def enforce_token_budget( that depth a WARNING is logged and the chunk is dispatched anyway — failing fast at the engine boundary is preferable to dropping work. """ + import collections + budget = _resolve_token_budget(engines) out: list[WorkChunk] = [] # Each entry: (chunk, depth, suffix_seed). suffix_seed cycles ``b → c → ...``. - stack: list[tuple[WorkChunk, int, int]] = [(c, 0, 0) for c in chunks] + # ``deque.popleft`` is O(1); list.pop(0) was O(n) and could quadratic on + # 100 chunks. + queue: collections.deque[tuple[WorkChunk, int, int]] = collections.deque((c, 0, 0) for c in chunks) suffix_alphabet = "bcdefghij" - while stack: - chunk, depth, seed = stack.pop(0) + while queue: + chunk, depth, seed = queue.popleft() tokens = _estimate_chunk_tokens(chunk, repo) if tokens <= budget: out.append(chunk) @@ -440,9 +444,11 @@ def enforce_token_budget( continue suffix = suffix_alphabet[min(seed, len(suffix_alphabet) - 1)] head, tail = _split_chunk_in_half(chunk, suffix) - # Push back onto the front so dependent ordering is preserved. - stack.insert(0, (head, depth + 1, seed + 1)) - stack.insert(1, (tail, depth + 1, seed + 1)) + # Re-process the split halves before any other unsplit chunks so the + # tail of an over-budget chunk is examined before the next original + # chunk's first half — preserves dependency order across recursion. + queue.appendleft((tail, depth + 1, seed + 1)) + queue.appendleft((head, depth + 1, seed + 1)) return out diff --git a/src/codelicious/cli.py b/src/codelicious/cli.py index 0b97e75a..150b42f6 100644 --- a/src/codelicious/cli.py +++ b/src/codelicious/cli.py @@ -78,7 +78,11 @@ def _write_postmortem( if log_path and log_path.is_file(): try: lines = log_path.read_text(encoding="utf-8", errors="replace").splitlines() - log_tail = "\n".join(lines[-50:]) + # Defuse any backtick fences that would prematurely close our + # rendered code block in the postmortem markdown. Zero-width + # joiner between the backticks renders identically in most + # markdown viewers but no longer matches the closing-fence regex. + log_tail = "\n".join(line.replace("```", "`‍``") for line in lines[-50:]) except OSError: log_tail = "" @@ -104,7 +108,10 @@ def _write_postmortem( if failed_titles: body.append("### Failed chunks") for title in failed_titles[:25]: - body.append(f"- {title}") + # Strip newlines and backticks so a hostile ledger entry can't + # break out of the markdown list or inject code fences. + safe = title.replace("\n", " ").replace("\r", " ").replace("`", "'") + body.append(f"- {safe}") body.append("") if log_tail: body.append("## Log tail (last 50 lines)") @@ -197,7 +204,18 @@ def _run_lock(repo_root: Path): os.write(fd, f"{os.getpid()}\n".encode()) os.fsync(fd) + # Idempotent release: ``main()`` enters the context manager and never + # calls ``__exit__`` until the process is exiting, so atexit handles the + # cleanup on the SystemExit path. The ``finally`` below catches the + # generator-exit / GeneratorExit case. Both paths must be safe to call + # multiple times — otherwise we close already-closed fds (potentially + # closing an unrelated reused fd on a busy process). + released = {"done": False} + def _release() -> None: + if released["done"]: + return + released["done"] = True try: fcntl.flock(fd, fcntl.LOCK_UN) except OSError: @@ -1030,8 +1048,12 @@ def main(): # spec v30 Step 1: per-repo advisory lock — second concurrent invocation # exits 75 (EX_TEMPFAIL) before any git, sandbox, or LLM call happens. - _run_lock_cm = _run_lock(repo_path) - _run_lock_cm.__enter__() + # Use ExitStack so the lock is released on *every* exit path (clean exit, + # uncaught exception, or SystemExit) — and only released once because + # ``_release`` itself is idempotent. + _run_lock_stack = contextlib.ExitStack() + _run_lock_stack.enter_context(_run_lock(repo_path)) + atexit.register(_run_lock_stack.close) _attach_file_log_handler(repo_path) diff --git a/src/codelicious/git/git_orchestrator.py b/src/codelicious/git/git_orchestrator.py index 657b1984..ee073997 100644 --- a/src/codelicious/git/git_orchestrator.py +++ b/src/codelicious/git/git_orchestrator.py @@ -1187,27 +1187,29 @@ def revert_chunk_changes(self) -> bool: return False def _branch_exists_locally(self, branch: str) -> bool: - """Return True iff ``branch`` is a local ref.""" + """Return True iff ``branch`` is a local ref. + + ``_run_cmd`` returns the stripped stdout string; a non-empty result + means git printed the branch name, an empty result means it did not. + """ try: - result = self._run_cmd(["git", "branch", "--list", branch], check=False) - except RuntimeError: + stdout = self._run_cmd(["git", "branch", "--list", branch], check=False) + except (RuntimeError, GitOperationError): return False - # `git branch --list ` prints the branch (with optional `*` prefix) - # when present, empty output otherwise. - return bool((getattr(result, "stdout", "") or "").strip()) + return bool(stdout) def _branch_exists_remotely(self, branch: str) -> bool: """Return True iff ``branch`` exists on ``origin``. Network failures treated as "unknown / assume not present" — disambiguation is best-effort.""" try: - result = self._run_cmd( + stdout = self._run_cmd( ["git", "ls-remote", "--heads", "origin", branch], check=False, timeout=15, ) - except (RuntimeError, TypeError): + except (RuntimeError, GitOperationError, TypeError): return False - return bool((getattr(result, "stdout", "") or "").strip()) + return bool(stdout) def _disambiguate_branch(self, candidate: str, *, suffix_hint: str = "") -> str: """Resolve branch-name collisions (spec v30 Step 10). diff --git a/src/codelicious/orchestrator.py b/src/codelicious/orchestrator.py index cf26973e..e21864c3 100644 --- a/src/codelicious/orchestrator.py +++ b/src/codelicious/orchestrator.py @@ -391,18 +391,20 @@ def run( ) # ── Execute ─────────────────────────────────────── - # spec v30 Step 5: try the primary engine, then fail over to - # any remaining engines on a rate-limit signal. + # spec v30 Step 5: try the head of the engine list, fail over + # to the next on a rate-limit. ``self.engine`` always tracks + # ``self._engines[0]`` so verify/fix paths below use the same + # engine that just executed the chunk. + self.engine = self._engines[0] result = self.engine.execute_chunk(chunk, self.repo_path, context) - while result.message and "Rate limited" in (result.message or "") and len(self._engines) > 1: + while result.message and "Rate limited" in result.message and len(self._engines) > 1: rate_limited = self._engines.pop(0) - next_engine = self._engines[0] + self.engine = self._engines[0] logger.warning( "%s rate-limited; failing over to %s for the remainder of this spec.", getattr(rate_limited, "name", "engine"), - getattr(next_engine, "name", "engine"), + getattr(self.engine, "name", "engine"), ) - self.engine = next_engine result = self.engine.execute_chunk(chunk, self.repo_path, context) # ── Verify ──────────────────────────────────────── diff --git a/src/codelicious/tools/audit_logger.py b/src/codelicious/tools/audit_logger.py index 1a62a999..b7774e71 100644 --- a/src/codelicious/tools/audit_logger.py +++ b/src/codelicious/tools/audit_logger.py @@ -146,8 +146,28 @@ def _cross_process_lock(self): try: import fcntl - - fcntl.flock(self._lock_fd, fcntl.LOCK_EX) + import time as _time + + # Non-blocking with bounded retry: if a peer process holds the lock + # during a slow rotation, we don't want the orchestrator's main + # loop to block indefinitely. After ~30 ms of contention give up + # and proceed with intra-process locking only. + acquired = False + for _ in range(3): + try: + fcntl.flock(self._lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + acquired = True + break + except OSError: + _time.sleep(0.01) + if not acquired: + if not self._cross_process_lock_warned: + console_logger.warning( + "AuditLogger: could not acquire cross-process audit lock; proceeding without it" + ) + self._cross_process_lock_warned = True + yield + return try: yield finally: diff --git a/src/codelicious/verifier.py b/src/codelicious/verifier.py index 5143060d..056a0c7f 100644 --- a/src/codelicious/verifier.py +++ b/src/codelicious/verifier.py @@ -1341,7 +1341,7 @@ def verify_paths( # ── pytest scoped to mapped tests ──────────────────────────────── try: proc = _run_with_pgroup_kill( - ["python", "-m", "pytest", "-q", "--no-cov", *[str(p) for p in test_paths]], + [sys.executable, "-m", "pytest", "-q", "--no-cov", *[str(p) for p in test_paths]], cwd=str(repo), capture_output=True, text=True,