From 0839b8302145036115fcab7cce06d83faa94930e Mon Sep 17 00:00:00 2001 From: dudegladiator Date: Fri, 12 Jun 2026 12:14:17 +0530 Subject: [PATCH] feat(v1.0.0): fork-on-delete + heatmap + true token counts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Major rewrite. Every `delete` now produces a NEW session file with a fresh UUID instead of mutating the source. Output prints a `claude --resume ` command. The new id forces a fresh prefix- cache slot in Claude Code, so /context immediately reflects the smaller size — no stale cache. - delete: forks instead of mutating; output adds new_session_id, new_path, resume_command. Source file is never touched. - heatmap: new subcommand. Ranks conversational turns by true token count (tiktoken on whole JSONL line — text + tool args + tool stdout + metadata). Drop the heaviest first. - info / show: token counts now reflect wire size (typically 2-3x the old text-only estimate). - list / search: forked files are tagged with a cc-session-fork sentinel line and surface as is_fork=true with an [edited] title prefix. - Removed: --force flag, lsof concurrent-open detection, .bak backup, restore subcommand. None are needed when the source is never mutated. - AGENTS.md: agent guide moved out of source into a real markdown file at the repo root. `cc-session agent-guide` reads it via include_str! so GitHub renders it natively and the binary stays self-contained. - v1.0.0 bump. --- AGENTS.md | 286 ++++++++++++++++++++++++ Cargo.lock | 14 +- Cargo.toml | 3 +- README.md | 108 ++++++--- src/app.rs | 3 - src/cli.rs | 533 +++++++++++++++----------------------------- src/cli/fork.rs | 179 +++++++++++++++ src/io/atomic.rs | 69 +----- src/io/lsof.rs | 26 --- src/io/mod.rs | 1 - src/main.rs | 47 ++-- src/scan.rs | 100 +++++++-- src/screens/edit.rs | 49 ++-- src/screens/list.rs | 2 + src/screens/mod.rs | 2 +- src/search.rs | 2 + src/session.rs | 1 + src/tokens.rs | 149 +++++++------ 18 files changed, 962 insertions(+), 612 deletions(-) create mode 100644 AGENTS.md create mode 100644 src/cli/fork.rs delete mode 100644 src/io/lsof.rs diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..c7ef218 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,286 @@ +# cc-session agent guide + +You are an LLM driving cc-session non-interactively. This guide is the +single source of truth for how to use it. Read it once, then operate. + +## What this CLI does + +Edits Claude Code session JSONL files at `~/.claude/projects//.jsonl`. +It can browse, search, inspect, and surgically delete messages from any session +while keeping `tool_use` / `tool_result` pairs and conversational turns intact. + +**Important behavioral note (v1+):** `delete` NEVER mutates the source file. +It always writes a NEW session file with a fresh UUID and prints a +`claude --resume ` command. The original is never touched. There is +no `--force` flag, no lsof check, no `.bak` file — none of those are needed +when forking. + +## Standard workflow + +1. Discover sessions: + ```sh + cc-session list --json --limit 20 + cc-session search "" --json --limit 10 + ``` +2. Inspect one session: + ```sh + cc-session info --json + cc-session show --json + ``` +3. Find heaviest turns to drop: + ```sh + cc-session heatmap --json --limit 10 + ``` +4. Plan an edit (always dry-run first): + ```sh + cc-session delete --indices 4,6 --dry-run --json + ``` +5. Apply (writes a new session file, original untouched): + ```sh + cc-session delete --indices 4,6 --json + ``` + Output includes `new_session_id`, `new_path`, `resume_command`. +6. (Optional) self-update: + ```sh + cc-session update [--version v1.0.0] + ``` + +## Target argument (``) + +For `show` / `info` / `heatmap` / `delete` the first positional arg accepts: + +- a full filesystem path to a `.jsonl` file +- a full session UUID (preferred — unambiguous) +- any unique substring of a session UUID (8+ chars usually fine) + +If a substring matches multiple sessions, the command errors and lists the +candidates. Pass a longer prefix to disambiguate. + +## Index semantics + +Indices are 0-based positions in the raw JSONL (one per line). Use +`cc-session show --json` to map message text → index. Note: + +- "Visible" messages (user / assistant text) are a subset; system messages, + `tool_use` blocks, `tool_result` blocks, attachments, and harness wrappers + (``, ``, etc.) are hidden by default. Pass + `--include-hidden` to see them in `show`. +- Indices in the SOURCE session are stable across deletes (because deletes + fork instead of mutating). Each new fork has its own index space — if you + chain edits, re-run `show` against the new session id. + +## Auto-pair (always on) + +Two safety extensions run on every delete request: + +1. `tool_use` ↔ `tool_result` blocks always travel together. Marking either + side pulls the other. +2. Turn-level pairing: a "turn" = visible user msg + every message that + follows it until the next visible user msg. Marking ANY message in a + turn marks the whole turn (user prompt + assistant reply + intermediate + tool calls). + +The delete output reports `requested` (what you asked) and `paired_added` +(what auto-pair added). Always inspect both before applying. + +## Resume safety: parentUuid auto-relink + +Every fork rewrites surviving messages whose `parentUuid` would point to +a deleted ancestor, walking up to the nearest surviving ancestor (or null +at the root). Reported as `parent_uuid_relinked`. Foreign parent uuids +(referring to messages not in the file) are preserved verbatim. + +## `delete` output JSON + +```json +{ + "source_path": "", + "new_session_id": "", + "new_path": "", + "resume_command": "claude --resume ", + "parent_uuid_relinked": 0, + "requested": [], + "after_auto_pair": [], + "paired_added": [], + "total_messages_before": 0, + "total_messages_after": 0, + "dry_run": false, + "saved": true, + "warnings": [] +} +``` + +`new_session_id`, `new_path`, `resume_command` are populated even in dry-run +(preview values). `saved` is `true` when the fork file was actually written. + +## `show` output JSON (per message) + +```json +{ + "index": 0, + "role": "user", + "type": "user", + "timestamp": "2026-06-12T00:00:00Z", + "tokens": 0, + "visible": true, + "has_tool_use": false, + "has_tool_result": false, + "tool_use_ids": [], + "tool_result_ids": [], + "text": "...", + "truncated": false +} +``` + +`tokens` is tiktoken `cl100k_base` counted on the WHOLE raw JSONL line +(text + `tool_use` input + `tool_result` content + metadata). `text` is a +400-char preview by default; pass `--full` to get the full body. + +## `info` output JSON + +```json +{ + "path": "...", "project": "...", "session_id": "...", "title": "...", + "modified": "...", "size": 0, + "is_fork": false, + "fork_origin": null, + "total_messages": 0, "visible_messages": 0, + "user_messages": 0, "assistant_messages": 0, + "tool_use_count": 0, "tool_result_count": 0, + "orphan_result_indices": [], + "estimated_tokens": 0 +} +``` + +`estimated_tokens` is the sum of true per-msg counts. + +## `heatmap` output JSON + +```json +{ + "path": "", + "session_id": "", + "total_messages": 0, + "total_tokens": 0, + "turns": [ + { + "anchor_idx": 0, + "start_idx": 0, + "end_idx": 0, + "msg_count": 0, + "tokens": 0, + "has_tool_use": false, + "preview": "..." + } + ] +} +``` + +`turns` is sorted by `tokens` descending. Drop the heaviest first. + +## `list` / `search` output JSON (per entry) + +```json +{ + "project": "...", "session_id": "...", "title": "...", + "modified": "...", "size": 0, "path": "...", + "is_fork": false, + "fork_origin": null +} +``` + +`title` carries an `[edited] ` prefix when `is_fork` is true. + +## Selection flags for `delete` + +You may combine any/all; the union is taken before auto-pair runs. + +``` +--indices 3,5,7 # exact indices (comma-separated) +--range lo..hi # inclusive range, both ints +--from-top N # first N messages +--from-bottom N # last N messages +``` + +At least one selection flag is required. + +## Exit codes + +| Code | Meaning | +|------|---------| +| `0` | success | +| `1` | generic error (parse failure, IO error, ambiguous target, ...) | +| `2+` | reserved for future structured errors | + +Always inspect stderr on non-zero exit for the human-readable cause. + +## Environment overrides + +| Var | Effect | +|-----|--------| +| `CC_SESSION_VERSION` | pin a specific release (used by `update`) | +| `CC_SESSION_INSTALL_DIR` | where `install.sh` drops the binary | +| `CC_SESSION_INSTALLER_URL` | override installer URL for `update` (testing) | + +## End-to-end recipe (real run, copy this shape) + +```sh +# 1. Locate the session id. Inside Claude Code: /status -> Session ID. +cc-session list --json --limit 10 + +# 2. See the wire size. estimated_tokens reflects whole-line tiktoken +# (text + tool_use args + tool_result content + metadata) — usually +# 2-3x larger than what plain message text would suggest. +cc-session info --json + +# 3. Find the heaviest CONVERSATIONAL TURNS. A turn rolls up the user +# prompt + every assistant/tool message it triggered up to the next +# visible user prompt. This matches what auto-pair will delete. +cc-session heatmap --json --limit 10 + +# 4. Pick a contiguous block of turns that are clearly noise (long +# iteration loops, exploratory tool dumps, repeated re-reviews of +# the same doc, etc). Prefer one --range over many --indices: it's +# less likely to leave parentUuid orphans, and even when it does, +# auto-relink fixes them (and reports parent_uuid_relinked). +cc-session delete --range .. --dry-run --json + +# 5. Apply. The output's `resume_command` is ready to paste. +cc-session delete --range .. --json + +# 6. Resume the NEW id in Claude Code: +# claude --resume +# +# The new id forces a fresh prefix-cache slot, so Claude Code's +# /context immediately reflects the smaller size — no stale cache. +``` + +## Useful examples (one-liners) + +```sh +# find the heaviest turns and drop the worst three +cc-session heatmap --json --limit 5 +cc-session delete --indices ,, --dry-run --json +cc-session delete --indices ,, --json + +# delete top 50 messages of a long session, dry run first +cc-session delete --from-top 50 --dry-run --json + +# purge messages 200..280 inclusive +cc-session delete --range 200..280 --dry-run --json + +# find a session about "auth middleware" and inspect +cc-session search "auth middleware" --json --limit 1 +cc-session show --json + +# chain edits: each delete produces a new id; pass that id back to +# cc-session for the next trim. Forks are marked is_fork=true in list. +cc-session delete --range ... --json +``` + +## Things this CLI will NOT do + +- Edit message contents in place. +- Reorder messages. +- Merge or split sessions. +- Mutate the source session (every delete forks). diff --git a/Cargo.lock b/Cargo.lock index 913fc84..e5ac63a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -173,7 +173,7 @@ dependencies = [ [[package]] name = "cc-session" -version = "0.3.0" +version = "1.0.0" dependencies = [ "anyhow", "assert_fs", @@ -189,6 +189,7 @@ dependencies = [ "tempfile", "thiserror 2.0.18", "tiktoken-rs", + "uuid", ] [[package]] @@ -1276,6 +1277,17 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "uuid" +version = "1.23.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "144d6b123cef80b301b8f72a9e2ca4370ddec21950d0a103dd22c437006d2db7" +dependencies = [ + "getrandom 0.4.2", + "js-sys", + "wasm-bindgen", +] + [[package]] name = "walkdir" version = "2.5.0" diff --git a/Cargo.toml b/Cargo.toml index 66face6..b08b77d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cc-session" -version = "0.4.0" +version = "1.0.0" edition = "2021" rust-version = "1.75" description = "Interactive TUI editor for Claude Code session JSONL files. Browse, search, and surgically delete messages while preserving tool_use/tool_result pairing." @@ -28,6 +28,7 @@ thiserror = "2" clap = { version = "4", features = ["derive"] } chrono = { version = "0.4", default-features = false, features = ["std", "clock", "serde"] } nucleo-matcher = "0.3" +uuid = { version = "1", features = ["v4"] } [dev-dependencies] assert_fs = "1" diff --git a/README.md b/README.md index 9755d49..3139d67 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,11 @@ Claude Code persists conversations as JSONL under `~/.claude/projects//` command. The original session file is never modified, so you can revert at any time by resuming the old id. +- **Heatmap** — `cc-session heatmap ` ranks conversational turns by true token count (whole-JSONL-line tiktoken — text + tool args + tool stdout + metadata). Drop the heaviest first. +- **Safe deletes** — turn-level auto-pair removes a user prompt with its assistant reply; tool_use ↔ tool_result blocks always travel together; surviving `parentUuid` references re-link to the nearest surviving ancestor so Claude Code's resume renderer never breaks. +- **TUI + scriptable CLI** — interactive ratatui browser with fuzzy search, plus `list / search / show / info / heatmap / delete / update / agent-guide` subcommands with `--json` output so other agents (Claude Code, Codex, scripts) can drive every action. ## Install @@ -18,7 +20,7 @@ Claude Code persists conversations as JSONL under `~/.claude/projects//] [--limit N] -cc-session search [--json] [--limit N] -cc-session show [--json] [--full] [--include-hidden] -cc-session info [--json] -cc-session delete --indices 3,5,7 [--from-top N] [--from-bottom N] [--range lo..hi] [--dry-run] [--force] [--json] -cc-session update [--version v0.2.0] -cc-session restore [--list] [--json] +cc-session list [--json] [--project ] [--limit N] +cc-session search [--json] [--limit N] +cc-session show [--json] [--full] [--include-hidden] +cc-session info [--json] +cc-session heatmap [--json] [--limit N] +cc-session delete --indices 3,5,7 [--from-top N] [--from-bottom N] [--range lo..hi] [--dry-run] [--json] +cc-session update [--version v1.0.0] +cc-session agent-guide ``` -`` accepts a full path, a session UUID, or a unique substring of one. Indices are 0-based positions in the raw JSONL (use `cc-session show --json` to map text → index). Auto-pair always extends the delete set to keep `tool_use`/`tool_result` blocks together; `paired_added` in the output reports what was added. -### Example LLM workflow +`` accepts a full path, a session UUID, or a unique substring of one. Indices are 0-based positions in the raw JSONL (use `cc-session show --json` to map text → index). -```sh -# 1. Pick a session. -cc-session list --json --limit 5 +`delete` always **forks**: it writes a new session file with a new UUID and leaves the original untouched. The output includes `new_session_id`, `new_path`, and a ready-to-paste `resume_command` like `claude --resume `. There is no `--force`, no lsof check, no `.bak` file — none of those are needed when the source is never mutated. -# 2. Inspect messages. -cc-session show 20042ea8 --json +## How to actually shrink a session -# 3. Preview the edit. -cc-session delete 20042ea8 --indices 4,6 --dry-run --json +The pattern that works in practice: -# 4. Apply. -cc-session delete 20042ea8 --indices 4,6 --json +```sh +# 1. Find the session id. Inside Claude Code: /status -> Session ID. +# Or list everything: +cc-session list --json --limit 10 + +# 2. See the damage. `info` now reports TRUE wire size (text + tool args +# + tool stdout + metadata) via tiktoken on whole JSONL lines. Expect +# numbers 2-3x larger than the old text-only "estimated_tokens". +cc-session info 91e440c0 --json +# -> total_messages: 639 estimated_tokens: 662251 + +# 3. Locate the heaviest conversational turns. A "turn" = visible user +# message + every assistant/tool message it triggered until the next +# visible user message. Heatmap rolls up tool I/O into the parent turn, +# so you see the real cost of each exchange. +cc-session heatmap 91e440c0 --json --limit 8 +# anchor range msgs tokens preview +# 27 27..65 39 115756 let document the life cycle of each of rpcs of ai agent... +# 128 128..242 115 110922 Base directory for this skill: /Users/harsh/.claude/... +# 66 66..124 59 79176 can you read other ai agent folder... +# 265 265..346 82 62891 please correct the doc + +# 4. Pick the contiguous worst block. The four turns above happen to be +# adjacent (27..346) and are all replaceable by their final saved +# artifacts. Dry-run first to see what auto-pair pulls in. +cc-session delete 91e440c0 --range 27..346 --dry-run --json +# -> messages: 639 -> 319, parent_uuid_relinked: 1, warnings: [] + +# 5. Apply. This writes a NEW session file with a fresh UUID. The +# original file is untouched on disk. +cc-session delete 91e440c0 --range 27..346 --json +# -> { +# "new_session_id": "1d9a021d-609f-4d0d-9591-baea02f13195", +# "new_path": ".../1d9a021d-...jsonl", +# "resume_command": "claude --resume 1d9a021d-...", +# "total_messages_after": 319, +# "parent_uuid_relinked": 1 +# } + +# 6. Resume the new id in Claude Code. +claude --resume 1d9a021d-609f-4d0d-9591-baea02f13195 ``` -The `delete` JSON output names every key an agent needs: `requested`, `paired_added`, `after_auto_pair`, `total_messages_before`, `total_messages_after`, `dry_run`, `saved`, `backup`, `warnings`. Pair this with `--dry-run` to plan, then drop the flag to apply. +Real run from this repo's own session: `/context` reported **433k → 230k context** (47% drop, 53% drop on the messages bucket). Cost on the next API call dropped proportionally — the new session id forces a fresh prefix-cache slot, so Claude Code can't accidentally serve the old fat prefix. + +Things to know: + +- **Why fork instead of mutate?** Two reasons. First, deleting in place leaves Claude Code's prefix cache holding the OLD prefix — your `/context` keeps showing the pre-edit size until you start a new session. Second, if anything goes sideways, you just resume the original id; the source file was never touched. +- **Indices are 0-based positions in the raw JSONL**, not visible-message positions. Use `cc-session show --json` to map text → index. `heatmap` already gives you raw indices in `anchor_idx` / `start_idx` / `end_idx`. +- **Auto-pair always runs.** Marking any message in a turn marks the whole turn (user prompt + assistant reply + tool I/O). tool_use ↔ tool_result blocks always travel together. Surviving messages whose `parentUuid` would point into the deleted set are re-linked to the nearest surviving ancestor (or null at root) — the count surfaces as `parent_uuid_relinked`. +- **You can chain edits.** Each `delete` produces a new session id; pass that id back to `cc-session` to trim further. Forks show `[edited]` in `list` and carry `is_fork: true`, `fork_origin: ` in JSON. + +`cc-session agent-guide` prints the full machine-readable doc (workflow, JSON shapes, env vars, exit codes) — the canonical contract for other agents. ## Safety -- Always closes Claude Code first. `cc-session` detects open file handles via `lsof` and refuses to save unless `--force` is passed. -- Every save writes `.bak` first. -- Saves are atomic: write to `.tmp`, fsync, rename. +- Source session is **never modified** — every delete writes a new file. +- Atomic writes: `.tmp` → fsync → rename. - Tool_use and tool_result blocks always delete together. +- Surviving messages whose `parentUuid` would reference a deleted ancestor are re-linked automatically (count surfaced as `parent_uuid_relinked`). +- Forks are tagged with a `cc-session-fork` sentinel line so `list` can show an `[edited]` badge. ## License diff --git a/src/app.rs b/src/app.rs index c692dfa..7ba1915 100644 --- a/src/app.rs +++ b/src/app.rs @@ -3,7 +3,6 @@ use std::path::PathBuf; use crate::screens; pub struct Config { - pub force: bool, pub projects_dir: Option, } @@ -14,7 +13,6 @@ pub enum Screen { pub struct App { pub screen: Screen, - pub force: bool, pub projects_dir: PathBuf, pub should_quit: bool, } @@ -30,7 +28,6 @@ pub fn run(cfg: Config) -> anyhow::Result<()> { let mut app = App { screen: Screen::List(list_state), - force: cfg.force, projects_dir, should_quit: false, }; diff --git a/src/cli.rs b/src/cli.rs index 25fcf97..a04fce9 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -2,6 +2,8 @@ //! agents (Claude Code, Codex, scripts) — every `--json` mode emits a //! deterministic shape with stable keys. +pub mod fork; + use std::collections::HashSet; use std::path::{Path, PathBuf}; @@ -10,7 +12,6 @@ use chrono::{DateTime, Local}; use serde::Serialize; use serde_json::Value; -use crate::io::atomic; use crate::pairing::PairIndex; use crate::scan::{self, SessionEntry}; use crate::session::{Message, Session}; @@ -34,6 +35,8 @@ struct ListItem { modified: String, size: u64, path: String, + is_fork: bool, + fork_origin: Option, } pub fn list( @@ -55,19 +58,9 @@ pub fn list( let out: Vec = entries.iter().map(list_item).collect(); println!("{}", serde_json::to_string_pretty(&out)?); } else { - println!( - "{:<40} {:<50} {:<17} {:<10} id", - "project", "title", "modified", "size" - ); + print_table_header(); for e in &entries { - println!( - "{:<40} {:<50} {:<17} {:<10} {}", - truncate(&e.project_slug, 40), - truncate(&e.title, 50), - format_mtime(e), - human_size(e.size), - e.session_id - ); + print_table_row(e); } println!("\n{} session(s)", entries.len()); } @@ -84,19 +77,9 @@ pub fn search(projects_dir: &Path, query: &str, limit: Option, json: bool let out: Vec = hits.iter().map(|e| list_item(e)).collect(); println!("{}", serde_json::to_string_pretty(&out)?); } else { - println!( - "{:<40} {:<50} {:<17} {:<10} id", - "project", "title", "modified", "size" - ); + print_table_header(); for e in &hits { - println!( - "{:<40} {:<50} {:<17} {:<10} {}", - truncate(&e.project_slug, 40), - truncate(&e.title, 50), - format_mtime(e), - human_size(e.size), - e.session_id - ); + print_table_row(e); } println!("\n{} match(es)", hits.len()); } @@ -111,9 +94,29 @@ fn list_item(e: &SessionEntry) -> ListItem { modified: format_mtime(e), size: e.size, path: e.path.display().to_string(), + is_fork: e.is_fork, + fork_origin: e.fork_origin.clone(), } } +fn print_table_header() { + println!( + "{:<40} {:<50} {:<17} {:<10} id", + "project", "title", "modified", "size" + ); +} + +fn print_table_row(e: &SessionEntry) { + println!( + "{:<40} {:<50} {:<17} {:<10} {}", + truncate(&e.project_slug, 40), + truncate(&e.title, 50), + format_mtime(e), + human_size(e.size), + e.session_id + ); +} + // ---------- show ---------- #[derive(Serialize)] @@ -188,7 +191,7 @@ pub fn show( truncated, }); } - let _ = pairing; // pairing index could be exposed too; skip for now. + let _ = pairing; if json { let out = ShowOutput { @@ -243,13 +246,15 @@ pub fn show( Ok(()) } -// ---------- delete ---------- +// ---------- delete (always forks) ---------- #[derive(Serialize)] struct DeleteOutput { - path: String, + source_path: String, + new_session_id: Option, + new_path: Option, + resume_command: Option, parent_uuid_relinked: usize, - backup: Option, requested: Vec, after_auto_pair: Vec, paired_added: Vec, @@ -265,7 +270,6 @@ pub fn delete( target: &str, spec: DeleteSpec, dry_run: bool, - force: bool, json: bool, ) -> Result<()> { let entry = resolve_target(projects_dir, target)?; @@ -305,7 +309,7 @@ pub fn delete( } let mut marked = requested.clone(); - let added_count = pairing.auto_pair(&mut marked); + pairing.auto_pair(&mut marked); let mut requested_sorted: Vec = requested.into_iter().collect(); requested_sorted.sort_unstable(); @@ -317,7 +321,6 @@ pub fn delete( .copied() .collect(); paired_added.sort_unstable(); - let _ = added_count; let mut warnings = Vec::new(); if !pairing.orphan_results.is_empty() { @@ -328,25 +331,43 @@ pub fn delete( )); } - let (content, relinked) = session.render_with_relink(&marked)?; + // Render the would-be content (relink runs here too) just to compute + // relinked count and validate the plan. + let (_content, relinked) = session.render_with_relink(&marked)?; let after = total - marked.len(); - let (saved, backup) = if dry_run { - (false, None) + let (saved, new_session_id, new_path, resume_command) = if dry_run { + // Even in dry-run, surface the new id we would mint so the agent can + // pre-write a resume command in its plan. + let preview_id = uuid::Uuid::new_v4().to_string(); + let preview_path = entry + .path + .with_file_name(format!("{preview_id}.jsonl")) + .display() + .to_string(); + ( + false, + Some(preview_id.clone()), + Some(preview_path), + Some(format!("claude --resume {preview_id}")), + ) } else { - match atomic::save(&entry.path, &content, force) { - Ok(out) => (true, Some(out.backup.display().to_string())), - Err(atomic::SaveError::Conflict) => { - bail!("file is open by another process; close Claude Code or pass --force"); - } - Err(atomic::SaveError::Io(e)) => return Err(e.into()), - } + let outcome = fork::fork_session(&session, &marked)?; + let resume = format!("claude --resume {}", outcome.new_session_id); + ( + true, + Some(outcome.new_session_id.clone()), + Some(outcome.new_path.display().to_string()), + Some(resume), + ) }; let out = DeleteOutput { - path: entry.path.display().to_string(), + source_path: entry.path.display().to_string(), + new_session_id, + new_path, + resume_command, parent_uuid_relinked: relinked, - backup, requested: requested_sorted, after_auto_pair: all_sorted, paired_added, @@ -360,7 +381,7 @@ pub fn delete( if json { println!("{}", serde_json::to_string_pretty(&out)?); } else { - println!("path: {}", out.path); + println!("source: {}", out.source_path); println!("requested: {:?}", out.requested); println!("auto-paired added: {:?}", out.paired_added); println!("final delete set: {:?}", out.after_auto_pair); @@ -373,8 +394,11 @@ pub fn delete( println!("parent_uuid relinked: {}", out.parent_uuid_relinked); println!("dry_run: {}", out.dry_run); println!("saved: {}", out.saved); - if let Some(b) = &out.backup { - println!("backup: {b}"); + if let Some(p) = &out.new_path { + println!("forked: {p}"); + } + if let Some(r) = &out.resume_command { + println!("resume: {r}"); } for w in &out.warnings { println!("warning: {w}"); @@ -393,6 +417,8 @@ struct InfoOutput { title: String, modified: String, size: u64, + is_fork: bool, + fork_origin: Option, total_messages: usize, visible_messages: usize, user_messages: usize, @@ -442,6 +468,8 @@ pub fn info(projects_dir: &Path, target: &str, json: bool) -> Result<()> { title: entry.title.clone(), modified: format_mtime(&entry), size: entry.size, + is_fork: entry.is_fork, + fork_origin: entry.fork_origin.clone(), total_messages: session.messages.len(), visible_messages: visible, user_messages: users, @@ -461,6 +489,12 @@ pub fn info(projects_dir: &Path, target: &str, json: bool) -> Result<()> { println!("title: {}", out.title); println!("modified: {}", out.modified); println!("size: {}", human_size(out.size)); + if out.is_fork { + println!( + "fork: yes (origin: {})", + out.fork_origin.as_deref().unwrap_or("unknown") + ); + } println!( "messages: {} total, {} visible ({} user, {} assistant)", out.total_messages, out.visible_messages, out.user_messages, out.assistant_messages @@ -477,341 +511,132 @@ pub fn info(projects_dir: &Path, target: &str, json: bool) -> Result<()> { Ok(()) } -// ---------- restore ---------- +// ---------- heatmap ---------- + +#[derive(Serialize)] +struct HeatmapTurn { + anchor_idx: usize, + start_idx: usize, + end_idx: usize, + msg_count: usize, + tokens: usize, + has_tool_use: bool, + preview: String, +} #[derive(Serialize)] -struct RestoreOutput { +struct HeatmapOutput { path: String, - backup: String, - pre_restore_snapshot: Option, - backup_messages: usize, - current_messages: Option, - backup_size: u64, - backup_modified: String, - listed_only: bool, - restored: bool, + session_id: String, + total_messages: usize, + total_tokens: usize, + turns: Vec, } -pub fn restore( - projects_dir: &Path, - target: &str, - list_only: bool, - force: bool, - json: bool, -) -> Result<()> { +pub fn heatmap(projects_dir: &Path, target: &str, limit: Option, json: bool) -> Result<()> { let entry = resolve_target(projects_dir, target)?; - let bak_path = bak_path_for(&entry.path); - if !bak_path.exists() { - bail!( - "no backup found at {} — cc-session writes .bak on every save", - bak_path.display() - ); - } - - // Sanity-check the backup parses; we don't want to restore a corrupt file. - let backup_session = Session::load(&bak_path)?; - let backup_messages = backup_session.messages.len(); - - let bak_meta = std::fs::metadata(&bak_path)?; - let bak_size = bak_meta.len(); - let bak_mtime: DateTime = bak_meta - .modified() - .unwrap_or(std::time::SystemTime::UNIX_EPOCH) - .into(); - let bak_modified = bak_mtime.format("%Y-%m-%d %H:%M:%S").to_string(); - - let current_messages = if entry.path.exists() { - Session::load(&entry.path).ok().map(|s| s.messages.len()) - } else { - None - }; + let session = Session::load(&entry.path)?; + let pairing = PairIndex::build(&session.messages); + let tokens = TokenCounter::new(); - if list_only { - let out = RestoreOutput { - path: entry.path.display().to_string(), - backup: bak_path.display().to_string(), - pre_restore_snapshot: None, - backup_messages, - current_messages, - backup_size: bak_size, - backup_modified: bak_modified, - listed_only: true, - restored: false, - }; - if json { - println!("{}", serde_json::to_string_pretty(&out)?); - } else { - println!("path: {}", out.path); - println!("backup: {}", out.backup); - println!("backup msgs: {}", out.backup_messages); - if let Some(c) = out.current_messages { - println!("current msgs: {c}"); - } else { - println!("current msgs: (file missing)"); - } - println!("backup size: {}", human_size(out.backup_size)); - println!("backup mtime: {}", out.backup_modified); + // Group message indices by their turn anchor (visible-user idx). + let mut by_anchor: std::collections::BTreeMap> = + std::collections::BTreeMap::new(); + for (idx, anchor) in pairing.turn_of.iter().enumerate() { + if *anchor == usize::MAX { + continue; } - return Ok(()); + by_anchor.entry(*anchor).or_default().push(idx); } - if !force && entry.path.exists() && super::io::lsof::is_open(&entry.path)? { - bail!("file is open by another process; close Claude Code or pass --force"); - } + let mut total_tokens = 0usize; + let mut turns: Vec = by_anchor + .into_iter() + .map(|(anchor, idxs)| { + let start = *idxs.first().unwrap_or(&anchor); + let end = *idxs.last().unwrap_or(&anchor); + let mut t = 0usize; + let mut has_tool = false; + for &i in &idxs { + t += tokens.count(i, &session.messages[i]); + let (u, _) = collect_tool_ids(&session.messages[i]); + if !u.is_empty() { + has_tool = true; + } + } + total_tokens += t; + let preview = preview_for(&session.messages[anchor]); + HeatmapTurn { + anchor_idx: anchor, + start_idx: start, + end_idx: end, + msg_count: idxs.len(), + tokens: t, + has_tool_use: has_tool, + preview, + } + }) + .collect(); - // If a current file exists, snapshot it aside before overwriting so the - // restore itself is reversible. Use a sibling path that does NOT match - // *.bak (which we'd clobber on next save). - let snapshot = if entry.path.exists() { - let snap = pre_restore_snapshot_path(&entry.path); - std::fs::copy(&entry.path, &snap)?; - Some(snap) + turns.sort_by_key(|t| std::cmp::Reverse(t.tokens)); + if let Some(n) = limit { + turns.truncate(n); } else { - None - }; - - // Atomic restore: copy bak -> .tmp, fsync, rename. - let tmp = with_extension_appended(&entry.path, "tmp"); - std::fs::copy(&bak_path, &tmp)?; - { - let f = std::fs::OpenOptions::new().write(true).open(&tmp)?; - f.sync_all()?; + turns.truncate(20); } - std::fs::rename(&tmp, &entry.path)?; - let out = RestoreOutput { + let out = HeatmapOutput { path: entry.path.display().to_string(), - backup: bak_path.display().to_string(), - pre_restore_snapshot: snapshot.map(|p| p.display().to_string()), - backup_messages, - current_messages, - backup_size: bak_size, - backup_modified: bak_modified, - listed_only: false, - restored: true, + session_id: entry.session_id.clone(), + total_messages: session.messages.len(), + total_tokens, + turns, }; if json { println!("{}", serde_json::to_string_pretty(&out)?); } else { - println!("restored: {}", out.path); - println!("from: {}", out.backup); - if let Some(s) = &out.pre_restore_snapshot { - println!("prev: {s} (snapshot of state before restore)"); - } + println!("path: {}", out.path); println!( - "messages: {} (was {})", - out.backup_messages, - out.current_messages - .map(|n| n.to_string()) - .unwrap_or_else(|| "missing".into()) + "{} turns shown out of session total {} tokens", + out.turns.len(), + out.total_tokens ); + println!(); + println!( + "{:>5} {:>9} {:>5} {:>5} preview", + "idx", "tokens", "msgs", "tool" + ); + println!("{}", "-".repeat(80)); + for t in &out.turns { + println!( + "{:>5} {:>9} {:>5} {:>5} {}", + t.anchor_idx, + t.tokens, + t.msg_count, + if t.has_tool_use { "yes" } else { "" }, + truncate(&t.preview, 80), + ); + } } Ok(()) } -fn bak_path_for(path: &Path) -> PathBuf { - with_extension_appended(path, "bak") -} - -fn pre_restore_snapshot_path(path: &Path) -> PathBuf { - let stamp = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_secs()) - .unwrap_or(0); - with_extension_appended(path, &format!("pre-restore.{stamp}")) -} - -fn with_extension_appended(path: &Path, suffix: &str) -> PathBuf { - let mut s = path.as_os_str().to_owned(); - s.push("."); - s.push(suffix); - PathBuf::from(s) +fn preview_for(msg: &Message) -> String { + extract_plain_text(msg) + .unwrap_or_default() + .replace('\n', " ") + .trim() + .to_string() } // ---------- agent guide ---------- +// +// Source of truth lives at AGENTS.md in the repo root so GitHub renders it +// nicely AND `cc-session agent-guide` prints the same content. include_str! +// inlines the file at compile time, so the binary stays self-contained. -pub const AGENT_GUIDE: &str = r#"# cc-session agent guide - -You are an LLM driving cc-session non-interactively. This guide is the -single source of truth for how to use it. Read it once, then operate. - -## What this CLI does - -Edits Claude Code session JSONL files at ~/.claude/projects//.jsonl. -It can browse, search, inspect, and surgically delete messages from any session -while keeping tool_use/tool_result pairs and conversational turns intact. - -## Standard workflow - -1. Discover sessions: - cc-session list --json --limit 20 - cc-session search "" --json --limit 10 -2. Inspect one session: - cc-session info --json - cc-session show --json -3. Plan an edit (always dry-run first): - cc-session delete --indices 4,6 --dry-run --json -4. Apply: - cc-session delete --indices 4,6 --json - Pass --force only if the session is currently open in Claude Code; this - bypasses the lsof safety check. -5. (Optional) self-update: - cc-session update [--version v0.2.0] -6. If a delete breaks resume in Claude Code, restore from backup: - cc-session restore --list # inspect first - cc-session restore # apply (snapshots current - # to .pre-restore.) - -## Target argument () - -For show / info / delete the first positional arg accepts: - - a full filesystem path to a .jsonl file - - a full session UUID (preferred — unambiguous) - - any unique substring of a session UUID (8+ chars usually fine) -If a substring matches multiple sessions, the command errors and lists the -candidates. Pass a longer prefix to disambiguate. - -## Index semantics - -Indices are 0-based positions in the raw JSONL (one per line). Use -`cc-session show --json` to map message text -> index. Note: - - "Visible" messages (user / assistant text) are a subset; system messages, - tool_use blocks, tool_result blocks, attachments, and harness wrappers - (, , etc.) are hidden by default. Pass - --include-hidden to see them in `show`. - - Indices DO shift after a successful delete. Always re-run `show` between - deletes if you are picking by index. - -## Auto-pair (always on) - -Two safety extensions run on every delete request: - 1. tool_use <-> tool_result blocks always travel together. Marking either - side pulls the other. - 2. Turn-level pairing: a "turn" = visible user msg + every message that - follows it until the next visible user msg. Marking ANY message in a - turn marks the whole turn (user prompt + assistant reply + intermediate - tool calls). - -The delete output reports `requested` (what you asked) and `paired_added` -(what auto-pair added). Always inspect both before applying. - -## delete output JSON - - { - "path": "", - "parent_uuid_relinked": int, // survivors whose parentUuid - // was rewritten to skip - // deleted ancestors - "backup": ".bak | null when --dry-run", - "requested": [int, ...], // sorted, what you asked - "after_auto_pair": [int, ...], // sorted, final delete set - "paired_added": [int, ...], // sorted, set diff - "total_messages_before": int, - "total_messages_after": int, - "dry_run": bool, - "saved": bool, - "warnings": [str, ...] // e.g. orphan tool_results - } - -## show output JSON (per message) - - { - "index": int, - "role": "user" | "assistant" | "system" | ..., - "type": "", - "timestamp": ISO8601 | null, - "tokens": int, // tiktoken cl100k_base - "visible": bool, - "has_tool_use": bool, - "has_tool_result": bool, - "tool_use_ids": [str, ...], - "tool_result_ids": [str, ...], - "text": str, // 400-char preview by default - "truncated": bool // true when text was clipped - } - -## info output JSON - - { - "path", "project", "session_id", "title", "modified", "size", - "total_messages", "visible_messages", "user_messages", "assistant_messages", - "tool_use_count", "tool_result_count", - "orphan_result_indices": [int, ...], - "estimated_tokens": int - } - -## list / search output JSON (per entry) - - { "project", "session_id", "title", "modified", "size", "path" } - -## Selection flags for delete - -You may combine any/all; the union is taken before auto-pair runs. - --indices 3,5,7 // exact indices (comma-separated) - --range lo..hi // inclusive range, both ints - --from-top N // first N messages - --from-bottom N // last N messages - -At least one selection flag is required. - -## Safety guarantees - - - Atomic save: writes .tmp, fsync, rename to . - - Backup: every save first writes .bak (overwriting any prior bak). - - Concurrent-open: if `lsof` reports the file is open by another process, - save returns SaveError::Conflict ("file is open by another process; close - Claude Code or pass --force"). On non-unix or when lsof is missing, this - check is skipped with a stderr warning. - - Round-trip: untouched messages save byte-equal — unknown JSONL fields - are preserved verbatim via `serde(flatten)`. - -## Exit codes - - 0 success - 1 generic error (parse failure, conflict, IO error, ambiguous target, ...) - 2+ reserved for future structured errors -Always inspect stderr on non-zero exit for the human-readable cause. - -## Environment overrides - - CC_SESSION_VERSION pin a specific release (used by `update`). - CC_SESSION_INSTALL_DIR where install.sh drops the binary. - CC_SESSION_INSTALLER_URL override installer URL for `update` (testing). - -## Useful examples (one-liners an agent can paste) - - # delete top 50 messages of a long session, dry run first - cc-session delete --from-top 50 --dry-run --json - cc-session delete --from-top 50 --json - - # purge messages 200..280 inclusive - cc-session delete --range 200..280 --dry-run --json - - # remove a single off-topic exchange (turn-pair pulls the assistant reply) - cc-session delete --indices 14 --dry-run --json - - # find a session about "auth middleware" and inspect - cc-session search "auth middleware" --json --limit 1 - cc-session show --json - -## Resume safety: parentUuid auto-relink - -Every save scans surviving messages and rewrites any `parentUuid` that -points to a now-deleted ancestor, walking up the chain to the nearest -surviving ancestor (or null if the chain reaches the root). The count is -reported in `parent_uuid_relinked`. This keeps Claude Code's resume -renderer happy after scattered deletes; if it ever fails anyway, -`cc-session restore ` rolls back to the .bak snapshot. - -## Things this CLI will NOT do - - - Edit message contents in place. - - Reorder messages. - - Merge or split sessions. - - Apply changes while Claude Code is actively writing to the file - (refuses unless --force). -"#; +pub const AGENT_GUIDE: &str = include_str!("../AGENTS.md"); // ---------- update ---------- @@ -857,7 +682,6 @@ fn resolve_target(projects_dir: &Path, target: &str) -> Result { // Direct path? let p = PathBuf::from(target); if p.is_file() { - // Build a minimal SessionEntry from the path itself. let meta = std::fs::metadata(&p)?; let session_id = p .file_stem() @@ -875,6 +699,8 @@ fn resolve_target(projects_dir: &Path, target: &str) -> Result { mtime: meta.modified().unwrap_or(std::time::SystemTime::UNIX_EPOCH), size: meta.len(), path: p, + is_fork: false, + fork_origin: None, }); } @@ -892,7 +718,6 @@ fn resolve_target(projects_dir: &Path, target: &str) -> Result { return Err(anyhow!("no session matched '{target}'")); } if matches.len() > 1 { - // Prefer exact id match. let exact: Vec<&&SessionEntry> = matches .iter() .filter(|e| e.session_id.to_lowercase() == needle) diff --git a/src/cli/fork.rs b/src/cli/fork.rs new file mode 100644 index 0000000..bb116ea --- /dev/null +++ b/src/cli/fork.rs @@ -0,0 +1,179 @@ +//! Fork-on-delete: produces a new session JSONL file with a fresh UUID, +//! leaving the source untouched. Used by both the CLI `delete` command and +//! the TUI edit screen's save flow. + +use std::collections::HashSet; +use std::path::PathBuf; + +use anyhow::Result; +use chrono::Utc; +use serde_json::Value; +use uuid::Uuid; + +use crate::io::atomic; +use crate::scan::FORK_SENTINEL_TYPE; +use crate::session::Session; + +#[derive(Debug)] +pub struct ForkOutcome { + pub new_session_id: String, + pub new_path: PathBuf, + #[allow(dead_code)] + pub origin_session_id: String, + #[allow(dead_code)] + pub relinked: usize, +} + +/// Write a new session JSONL file under the same project directory, +/// prefixed with a `cc-session-fork` sentinel line and with all top-level +/// `sessionId` fields rewritten to the new UUID. +pub fn fork_session(session: &Session, omit: &HashSet) -> Result { + let new_id = Uuid::new_v4().to_string(); + let parent = session + .path + .parent() + .ok_or_else(|| anyhow::anyhow!("source session has no parent directory"))?; + let new_path = parent.join(format!("{new_id}.jsonl")); + + let origin_session_id = session + .path + .file_stem() + .map(|s| s.to_string_lossy().to_string()) + .unwrap_or_default(); + + let (rendered, relinked) = session.render_with_relink(omit)?; + + // Rewrite top-level `sessionId` per line. Any line that is not parseable + // JSON or has no sessionId field passes through unchanged. Prepend a + // sentinel line so `scan` can mark this file as a fork. + let sentinel = serde_json::json!({ + "type": FORK_SENTINEL_TYPE, + "origin": origin_session_id, + "forked_at": Utc::now().to_rfc3339(), + "cc_session_version": env!("CARGO_PKG_VERSION"), + }); + let mut out = String::new(); + out.push_str(&serde_json::to_string(&sentinel)?); + out.push('\n'); + + for line in rendered.split_inclusive('\n') { + let trimmed = line.trim_end_matches('\n'); + if trimmed.is_empty() { + out.push_str(line); + continue; + } + match serde_json::from_str::(trimmed) { + Ok(mut v) => { + if let Value::Object(map) = &mut v { + if let Some(sid) = map.get_mut("sessionId") { + if sid.is_string() { + *sid = Value::String(new_id.clone()); + } + } + } + out.push_str(&serde_json::to_string(&v)?); + if line.ends_with('\n') { + out.push('\n'); + } + } + Err(_) => out.push_str(line), + } + } + + atomic::write_atomic(&new_path, &out)?; + + Ok(ForkOutcome { + new_session_id: new_id, + new_path, + origin_session_id, + relinked, + }) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::session::Session; + use std::fs; + use std::io::Write; + + fn write_tmp_session(content: &str) -> (tempfile::TempDir, PathBuf) { + let dir = tempfile::tempdir().unwrap(); + let path = dir + .path() + .join("11111111-1111-1111-1111-111111111111.jsonl"); + let mut f = fs::File::create(&path).unwrap(); + f.write_all(content.as_bytes()).unwrap(); + (dir, path) + } + + #[test] + fn fork_creates_new_file_with_sentinel() { + let content = "{\"type\":\"user\",\"uuid\":\"a\",\"sessionId\":\"11111111-1111-1111-1111-111111111111\",\"message\":{\"role\":\"user\",\"content\":\"hi\"}}\n"; + let (_dir, path) = write_tmp_session(content); + let session = Session::load(&path).unwrap(); + + let omit = HashSet::new(); + let out = fork_session(&session, &omit).unwrap(); + + assert!(out.new_path.exists()); + assert_ne!(out.new_session_id, "11111111-1111-1111-1111-111111111111"); + + // Original untouched. + assert_eq!(fs::read_to_string(&path).unwrap(), content); + + // New file: first line is sentinel; second line has rewritten sessionId. + let new_content = fs::read_to_string(&out.new_path).unwrap(); + let mut lines = new_content.lines(); + let sentinel: Value = serde_json::from_str(lines.next().unwrap()).unwrap(); + assert_eq!(sentinel["type"], "cc-session-fork"); + assert_eq!(sentinel["origin"], "11111111-1111-1111-1111-111111111111"); + + let user_line: Value = serde_json::from_str(lines.next().unwrap()).unwrap(); + assert_eq!(user_line["sessionId"], out.new_session_id); + assert_eq!(user_line["uuid"], "a"); // intra-session uuids untouched + } + + #[test] + fn fork_with_delete_drops_marked_messages() { + let content = concat!( + "{\"type\":\"user\",\"uuid\":\"a\",\"sessionId\":\"old\"}\n", + "{\"type\":\"assistant\",\"uuid\":\"b\",\"parentUuid\":\"a\",\"sessionId\":\"old\"}\n", + "{\"type\":\"user\",\"uuid\":\"c\",\"parentUuid\":\"b\",\"sessionId\":\"old\"}\n", + ); + let (_dir, path) = write_tmp_session(content); + let session = Session::load(&path).unwrap(); + + let mut omit = HashSet::new(); + omit.insert(1); // drop b + let out = fork_session(&session, &omit).unwrap(); + + let new_content = fs::read_to_string(&out.new_path).unwrap(); + // sentinel + 2 surviving lines + assert_eq!(new_content.lines().count(), 3); + // c.parentUuid relinked from b -> a + assert!(new_content.contains("\"parentUuid\":\"a\"")); + assert_eq!(out.relinked, 1); + } + + #[test] + fn fork_passes_through_lines_without_sessionid() { + // ai-title and similar lines have no top-level sessionId; they must + // pass through unmodified. + let content = concat!( + "{\"type\":\"ai-title\",\"aiTitle\":\"Some Title\",\"sessionId\":\"old\"}\n", + "{\"type\":\"user\",\"uuid\":\"a\"}\n", + ); + let (_dir, path) = write_tmp_session(content); + let session = Session::load(&path).unwrap(); + + let omit = HashSet::new(); + let out = fork_session(&session, &omit).unwrap(); + let new_content = fs::read_to_string(&out.new_path).unwrap(); + + // ai-title's sessionId rewritten too (still a top-level sessionId). + assert!(new_content.contains(&out.new_session_id)); + // aiTitle preserved. + assert!(new_content.contains("Some Title")); + } +} diff --git a/src/io/atomic.rs b/src/io/atomic.rs index 024d4fa..8bd191e 100644 --- a/src/io/atomic.rs +++ b/src/io/atomic.rs @@ -2,37 +2,9 @@ use std::fs; use std::io::Write; use std::path::{Path, PathBuf}; -use thiserror::Error; - -#[derive(Debug, Error)] -pub enum SaveError { - #[error("file is open by another process; close Claude Code first or pass --force")] - Conflict, - #[error("io error: {0}")] - Io(#[from] std::io::Error), -} - -pub struct SaveOutcome { - pub backup: PathBuf, -} - -/// Save `content` to `path` atomically: -/// 1. If `!force`, abort when lsof reports the file open. -/// 2. Copy current file to `.bak` (overwrite). -/// 3. Write `.tmp`, fsync. -/// 4. Rename `.tmp` -> ``. -/// -/// If `path` does not yet exist, the backup step is skipped. -pub fn save(path: &Path, content: &str, force: bool) -> Result { - if !force && super::lsof::is_open(path)? { - return Err(SaveError::Conflict); - } - - let backup = with_extension_appended(path, "bak"); - if path.exists() { - fs::copy(path, &backup)?; - } - +/// Write `content` to `path` atomically: write `.tmp`, fsync, then +/// rename. Used by fork so partial writes never appear at the destination. +pub fn write_atomic(path: &Path, content: &str) -> std::io::Result<()> { let tmp = with_extension_appended(path, "tmp"); { let mut f = fs::File::create(&tmp)?; @@ -40,8 +12,7 @@ pub fn save(path: &Path, content: &str, force: bool) -> Result PathBuf { @@ -57,37 +28,19 @@ mod tests { use std::fs; #[test] - fn save_creates_backup_and_replaces() { + fn write_atomic_creates_file() { let dir = tempfile::tempdir().unwrap(); let path = dir.path().join("session.jsonl"); - fs::write(&path, "old").unwrap(); - - let outcome = save(&path, "new", true).unwrap(); - - assert_eq!(fs::read_to_string(&path).unwrap(), "new"); - assert_eq!(fs::read_to_string(&outcome.backup).unwrap(), "old"); + write_atomic(&path, "hello").unwrap(); + assert_eq!(fs::read_to_string(&path).unwrap(), "hello"); } #[test] - fn save_overwrites_existing_backup() { + fn write_atomic_overwrites() { let dir = tempfile::tempdir().unwrap(); let path = dir.path().join("session.jsonl"); - fs::write(&path, "v1").unwrap(); - save(&path, "v2", true).unwrap(); - save(&path, "v3", true).unwrap(); - assert_eq!(fs::read_to_string(&path).unwrap(), "v3"); - assert_eq!( - fs::read_to_string(path.with_file_name("session.jsonl.bak")).unwrap(), - "v2" - ); - } - - #[test] - fn save_to_new_file_skips_backup() { - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("fresh.jsonl"); - let outcome = save(&path, "content", true).unwrap(); - assert!(!outcome.backup.exists()); - assert_eq!(fs::read_to_string(&path).unwrap(), "content"); + write_atomic(&path, "v1").unwrap(); + write_atomic(&path, "v2").unwrap(); + assert_eq!(fs::read_to_string(&path).unwrap(), "v2"); } } diff --git a/src/io/lsof.rs b/src/io/lsof.rs deleted file mode 100644 index 8940199..0000000 --- a/src/io/lsof.rs +++ /dev/null @@ -1,26 +0,0 @@ -use std::path::Path; -use std::process::Command; - -/// Returns true if some process currently has the file open. -/// -/// Uses `lsof -t -- ` on unix. On non-unix or if `lsof` is unavailable, -/// returns `Ok(false)` and emits a warning to stderr (best-effort detection). -pub fn is_open(path: &Path) -> std::io::Result { - if !cfg!(unix) { - return Ok(false); - } - let out = match Command::new("lsof").arg("-t").arg("--").arg(path).output() { - Ok(o) => o, - Err(e) => { - eprintln!("warning: lsof unavailable ({e}); skipping concurrent-open check"); - return Ok(false); - } - }; - // lsof exit codes: 0 with stdout = open by some pid; 1 = not open. - if out.status.success() { - let s = String::from_utf8_lossy(&out.stdout); - Ok(!s.trim().is_empty()) - } else { - Ok(false) - } -} diff --git a/src/io/mod.rs b/src/io/mod.rs index 6c2627c..652223f 100644 --- a/src/io/mod.rs +++ b/src/io/mod.rs @@ -1,2 +1 @@ pub mod atomic; -pub mod lsof; diff --git a/src/main.rs b/src/main.rs index f334925..351517a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -19,10 +19,6 @@ struct Cli { #[arg(long, value_name = "DIR", global = true)] projects_dir: Option, - /// Bypass concurrent-open detection when saving. - #[arg(long, global = true)] - force: bool, - #[command(subcommand)] command: Option, } @@ -66,7 +62,10 @@ enum Command { #[arg(long)] json: bool, }, - /// Delete messages from a session by index. Auto-pairs tool_use/tool_result. + /// Delete messages from a session by index. Always forks: writes a NEW + /// session file with a new UUID and leaves the original untouched. + /// Output prints a `claude --resume ` command to continue. + /// Auto-pairs tool_use/tool_result and re-links parentUuids. Delete { /// Session id, file path, or substring of either. target: String, @@ -82,7 +81,7 @@ enum Command { /// Inclusive range "lo..hi" (0-based). #[arg(long)] range: Option, - /// Show what would be removed without writing. + /// Show what would be removed without writing the fork file. #[arg(long)] dry_run: bool, /// Output JSON. @@ -95,9 +94,21 @@ enum Command { #[arg(long)] json: bool, }, + /// Show the heaviest conversational turns by token count, descending. + /// Use this to pick which turns to drop with `delete`. + Heatmap { + /// Session id, file path, or substring of either. + target: String, + /// Limit output to N turns. Default 20. + #[arg(long)] + limit: Option, + /// Output JSON. + #[arg(long)] + json: bool, + }, /// Self-update to the latest release (or a specific version). Update { - /// Install a specific tag (e.g. `v0.2.0`). Default: latest. + /// Install a specific tag (e.g. `v1.0.0`). Default: latest. #[arg(long)] version: Option, }, @@ -105,18 +116,6 @@ enum Command { /// exit codes. Designed for LLMs and scripts to read once and operate /// autonomously. AgentGuide, - /// Restore a session from its .bak backup. Refuses to overwrite - /// while Claude Code holds the file open unless --force. - Restore { - /// Session id, file path, or substring of either. - target: String, - /// Just print the backup path and metadata; don't restore. - #[arg(long)] - list: bool, - /// Output JSON. - #[arg(long)] - json: bool, - }, } fn main() -> anyhow::Result<()> { @@ -128,7 +127,6 @@ fn main() -> anyhow::Result<()> { match cli.command { None => app::run(app::Config { - force: cli.force, projects_dir: Some(projects_dir), }), Some(Command::List { @@ -163,18 +161,19 @@ fn main() -> anyhow::Result<()> { range, }, dry_run, - cli.force, json, ), Some(Command::Info { target, json }) => cli::info(&projects_dir, &target, json), + Some(Command::Heatmap { + target, + limit, + json, + }) => cli::heatmap(&projects_dir, &target, limit, json), Some(Command::Update { version }) => cli::update(version.as_deref()), Some(Command::AgentGuide) => { print!("{}", cli::AGENT_GUIDE); Ok(()) } - Some(Command::Restore { target, list, json }) => { - cli::restore(&projects_dir, &target, list, cli.force, json) - } } } diff --git a/src/scan.rs b/src/scan.rs index 493588d..ee947fa 100644 --- a/src/scan.rs +++ b/src/scan.rs @@ -13,8 +13,19 @@ pub struct SessionEntry { pub mtime: SystemTime, pub size: u64, pub path: PathBuf, + /// True when the session file carries the cc-session-fork sentinel, + /// meaning this file was produced by `cc-session delete` and the title + /// should be displayed with an `[edited]` prefix. + pub is_fork: bool, + /// When `is_fork`, the original session id this file was forked from + /// (best-effort, may be empty). + pub fork_origin: Option, } +/// Public marker line type used to tag forked session files. Kept here as a +/// constant so scan / fork agree. +pub const FORK_SENTINEL_TYPE: &str = "cc-session-fork"; + const TITLE_LIMIT: usize = 60; pub fn scan(projects_dir: &Path) -> anyhow::Result> { @@ -67,9 +78,15 @@ pub fn scan(projects_dir: &Path) -> anyhow::Result> { .file_stem() .map(|s| s.to_string_lossy().to_string()) .unwrap_or_default(); - let title = derive_title(&path); + let scanned = scan_one(&path); let mtime = meta.modified().unwrap_or(SystemTime::UNIX_EPOCH); + let title = if scanned.is_fork { + format!("[edited] {}", scanned.title) + } else { + scanned.title + }; + entries.push(SessionEntry { project_slug: project_slug.clone(), session_id, @@ -77,6 +94,8 @@ pub fn scan(projects_dir: &Path) -> anyhow::Result> { mtime, size: meta.len(), path, + is_fork: scanned.is_fork, + fork_origin: scanned.fork_origin, }); } } @@ -85,18 +104,28 @@ pub fn scan(projects_dir: &Path) -> anyhow::Result> { Ok(entries) } -fn derive_title(path: &Path) -> String { - // Prefer Claude Code's auto-generated `aiTitle` (a single - // `{"type":"ai-title","aiTitle":"..."}` line that can appear anywhere - // in the file). Fall back to first user message text. Final fallback: - // a placeholder. Single pass, capped at 1000 lines to keep scan fast on - // very large sessions. +struct ScannedMeta { + title: String, + is_fork: bool, + fork_origin: Option, +} + +fn scan_one(path: &Path) -> ScannedMeta { let file = match fs::File::open(path) { Ok(f) => f, - Err(_) => return "".into(), + Err(_) => { + return ScannedMeta { + title: "".into(), + is_fork: false, + fork_origin: None, + }; + } }; let reader = BufReader::new(file); let mut first_user_text: Option = None; + let mut ai_title: Option = None; + let mut is_fork = false; + let mut fork_origin: Option = None; for (peeked, line) in reader.lines().enumerate() { if peeked >= 1000 { @@ -115,13 +144,24 @@ fn derive_title(path: &Path) -> String { }; let entry_type = v.get("type").and_then(Value::as_str); - if entry_type == Some("ai-title") { + if entry_type == Some(FORK_SENTINEL_TYPE) { + is_fork = true; + if let Some(o) = v.get("origin").and_then(Value::as_str) { + if !o.is_empty() { + fork_origin = Some(o.to_string()); + } + } + continue; + } + + if ai_title.is_none() && entry_type == Some("ai-title") { if let Some(t) = v.get("aiTitle").and_then(Value::as_str) { let t = t.trim(); if !t.is_empty() { - return clamp_title(t); + ai_title = Some(clamp_title(t)); } } + continue; } if first_user_text.is_none() && entry_type == Some("user") { @@ -145,10 +185,15 @@ fn derive_title(path: &Path) -> String { } } - if let Some(text) = first_user_text { - return clamp_title(&text); + let title = ai_title + .or_else(|| first_user_text.as_deref().map(clamp_title)) + .unwrap_or_else(|| "".into()); + + ScannedMeta { + title, + is_fork, + fork_origin, } - "".into() } fn clamp_title(s: &str) -> String { @@ -287,6 +332,35 @@ mod tests { assert_eq!(entries[0].title, "fallback text"); } + #[test] + fn fork_sentinel_marks_entry() { + let dir = tempfile::tempdir().unwrap(); + make_session( + dir.path(), + "p", + "s", + "{\"type\":\"cc-session-fork\",\"origin\":\"old-id\",\"forked_at\":\"2026-06-11T00:00:00Z\"}\n{\"type\":\"user\",\"message\":{\"role\":\"user\",\"content\":\"hi\"}}\n", + ); + let entries = scan(dir.path()).unwrap(); + assert!(entries[0].is_fork); + assert_eq!(entries[0].fork_origin.as_deref(), Some("old-id")); + assert!(entries[0].title.starts_with("[edited] ")); + } + + #[test] + fn no_sentinel_no_badge() { + let dir = tempfile::tempdir().unwrap(); + make_session( + dir.path(), + "p", + "s", + "{\"type\":\"user\",\"message\":{\"role\":\"user\",\"content\":\"hi\"}}\n", + ); + let entries = scan(dir.path()).unwrap(); + assert!(!entries[0].is_fork); + assert!(!entries[0].title.starts_with("[edited]")); + } + #[test] fn malformed_first_line_recovers() { let dir = tempfile::tempdir().unwrap(); diff --git a/src/screens/edit.rs b/src/screens/edit.rs index fdd8e30..a07c120 100644 --- a/src/screens/edit.rs +++ b/src/screens/edit.rs @@ -9,7 +9,7 @@ use ratatui::widgets::{Block, Borders, Clear, List, ListItem, ListState, Paragra use ratatui::Frame; use serde_json::Value; -use crate::io::atomic::{self, SaveError}; +use crate::cli::fork; use crate::pairing::PairIndex; use crate::session::{Message, Session}; use crate::tokens::TokenCounter; @@ -20,7 +20,6 @@ use super::Transition; pub enum Modal { None, ConfirmSave { count: usize }, - ConfirmForce, SaveResult(String), } @@ -174,31 +173,28 @@ impl EditState { } } - fn perform_save(&mut self, force: bool) -> Result<()> { - let content = self.session.render(&self.marked)?; - match atomic::save(&self.session.path, &content, force) { + fn perform_save(&mut self) -> Result<()> { + let removed = self.marked.len(); + match fork::fork_session(&self.session, &self.marked) { Ok(out) => { self.modal = Modal::SaveResult(format!( - "Saved. {} message(s) removed. Backup: {}", - self.marked.len(), - out.backup.display() + "Forked. {} message(s) removed.\nresume: claude --resume {}\nfile: {}", + removed, + out.new_session_id, + out.new_path.display() )); self.marked.clear(); Ok(()) } - Err(SaveError::Conflict) => { - self.modal = Modal::ConfirmForce; - Ok(()) - } - Err(SaveError::Io(e)) => { - self.modal = Modal::SaveResult(format!("Save failed: {e}")); + Err(e) => { + self.modal = Modal::SaveResult(format!("Fork failed: {e}")); Ok(()) } } } } -pub fn handle_key(state: &mut EditState, key: KeyEvent, force_flag: bool) -> Result { +pub fn handle_key(state: &mut EditState, key: KeyEvent) -> Result { match &state.modal { Modal::ConfirmSave { count } => { let count = *count; @@ -208,24 +204,13 @@ pub fn handle_key(state: &mut EditState, key: KeyEvent, force_flag: bool) -> Res if count == 0 { state.modal = Modal::SaveResult("no changes to save".into()); } else { - state.perform_save(force_flag)?; + state.perform_save()?; } } _ => state.modal = Modal::None, } return Ok(Transition::None); } - Modal::ConfirmForce => match key.code { - KeyCode::Char('f') => { - state.modal = Modal::None; - state.perform_save(true)?; - return Ok(Transition::None); - } - _ => { - state.modal = Modal::None; - return Ok(Transition::None); - } - }, Modal::SaveResult(_) => { state.modal = Modal::None; return Ok(Transition::None); @@ -469,17 +454,11 @@ fn render_modal(frame: &mut Frame, state: &EditState, area: Rect) { let (title, body) = match &state.modal { Modal::None => return, Modal::ConfirmSave { count } => ( - "save?", + "fork?", format!( - "delete {} message(s) and save to {}?\nbackup at .bak.\n[y] confirm any other key cancels", - count, - state.session.path.display() + "drop {count} message(s) and write a new session file?\noriginal stays untouched.\n[y] confirm any other key cancels" ), ), - Modal::ConfirmForce => ( - "file is open", - "claude code (or another process) has this file open.\npress [f] to force save anyway, any other key to cancel.".to_string(), - ), Modal::SaveResult(msg) => ("result", msg.clone()), }; let popup = centered_rect(60, 30, area); diff --git a/src/screens/list.rs b/src/screens/list.rs index 9c9ab15..545594e 100644 --- a/src/screens/list.rs +++ b/src/screens/list.rs @@ -201,6 +201,8 @@ mod tests { mtime: SystemTime::UNIX_EPOCH, size: 1024, path: PathBuf::from(format!("/tmp/{project}/{title}.jsonl")), + is_fork: false, + fork_origin: None, } } diff --git a/src/screens/mod.rs b/src/screens/mod.rs index 6db64fa..bfadc8c 100644 --- a/src/screens/mod.rs +++ b/src/screens/mod.rs @@ -52,7 +52,7 @@ fn main_loop( fn handle_key(app: &mut App, key: KeyEvent) -> Result<()> { let transition = match &mut app.screen { Screen::List(state) => list::handle_key(state, key), - Screen::Edit(state) => edit::handle_key(state.as_mut(), key, app.force), + Screen::Edit(state) => edit::handle_key(state.as_mut(), key), }?; apply_transition(app, transition) } diff --git a/src/search.rs b/src/search.rs index d81c507..cf0030b 100644 --- a/src/search.rs +++ b/src/search.rs @@ -50,6 +50,8 @@ mod tests { mtime: SystemTime::UNIX_EPOCH, size: 0, path: PathBuf::from("/tmp/x"), + is_fork: false, + fork_origin: None, } } diff --git a/src/session.rs b/src/session.rs index 40cc838..cb319be 100644 --- a/src/session.rs +++ b/src/session.rs @@ -208,6 +208,7 @@ impl Session { } /// Backwards-compatible render that discards the relink count. + #[cfg(test)] pub fn render(&self, omit: &std::collections::HashSet) -> Result { Ok(self.render_with_relink(omit)?.0) } diff --git a/src/tokens.rs b/src/tokens.rs index 2b297d4..cb540ce 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -1,11 +1,17 @@ use std::cell::RefCell; use std::collections::HashMap; -use serde_json::Value; use tiktoken_rs::CoreBPE; use crate::session::Message; +/// Tokenizes whole JSONL lines so the count reflects what Claude Code actually +/// loads (text + tool_use args + tool_result content + metadata). Uses +/// `cl100k_base` as a stable, well-known approximation. +/// +/// For messages that came off disk, this measures `original_line` directly — +/// no re-serialization, no shape drift. For in-memory messages (forks, +/// re-linked parents), it falls back to `serde_json::to_string`. pub struct TokenCounter { encoder: RefCell>, cache: RefCell>, @@ -25,39 +31,27 @@ impl TokenCounter { } } - /// Returns approximate token count for a message. - /// Uses pre-computed `usage.input_tokens` / `usage.output_tokens` when present. pub fn count(&self, msg_idx: usize, msg: &Message) -> usize { if let Some(cached) = self.cache.borrow().get(&msg_idx) { return *cached; } - let count = self.compute(msg); + let count = self.encode(&render_for_count(msg)); self.cache.borrow_mut().insert(msg_idx, count); count } - fn compute(&self, msg: &Message) -> usize { - if let Some(body) = &msg.message { - if let Some(usage) = body.extra.get("usage").and_then(Value::as_object) { - let input = usage - .get("input_tokens") - .and_then(Value::as_u64) - .unwrap_or(0); - let output = usage - .get("output_tokens") - .and_then(Value::as_u64) - .unwrap_or(0); - let total = (input + output) as usize; - if total > 0 { - return total; - } - } - } - let text = collect_text(msg); - if text.is_empty() { + /// Count an arbitrary string with the same encoder. Useful for sentinel + /// lines or summary text that isn't a Message. + #[allow(dead_code)] + pub fn count_str(&self, s: &str) -> usize { + self.encode(s) + } + + fn encode(&self, s: &str) -> usize { + if s.is_empty() { return 0; } - self.with_encoder(|enc| enc.encode_with_special_tokens(&text).len()) + self.with_encoder(|enc| enc.encode_with_special_tokens(s).len()) } fn with_encoder(&self, f: F) -> R @@ -66,42 +60,24 @@ impl TokenCounter { { let mut slot = self.encoder.borrow_mut(); if slot.is_none() { - // cl100k_base covers GPT-4 / Claude approximation. tiktoken-rs provides - // a constructor that returns CoreBPE directly. *slot = Some(tiktoken_rs::cl100k_base().expect("cl100k_base init")); } f(slot.as_ref().unwrap()) } } -fn collect_text(msg: &Message) -> String { - let body = match &msg.message { - Some(b) => b, - None => return String::new(), - }; - match &body.content { - Some(Value::String(s)) => s.clone(), - Some(Value::Array(blocks)) => blocks - .iter() - .filter_map(|b| { - let obj = b.as_object()?; - if obj.get("type").and_then(Value::as_str) == Some("text") { - obj.get("text").and_then(Value::as_str).map(str::to_string) - } else { - None - } - }) - .collect::>() - .join("\n"), - _ => String::new(), +fn render_for_count(msg: &Message) -> String { + if let Some(line) = &msg.original_line { + return line.clone(); } + serde_json::to_string(msg).unwrap_or_default() } #[cfg(test)] mod tests { use super::*; - use crate::session::MessageBody; - use serde_json::json; + use crate::session::{Message, MessageBody}; + use serde_json::{json, Value}; fn msg_with_text(text: &str) -> Message { Message { @@ -121,6 +97,8 @@ mod tests { #[test] fn empty_message_zero() { + // Even an "empty" message has structural JSON; just verify it counts + // the bytes of the serialized struct (more than zero). let m = Message { r#type: None, uuid: None, @@ -131,39 +109,75 @@ mod tests { original_line: None, }; let c = TokenCounter::new(); - assert_eq!(c.count(0, &m), 0); + // serialized as `{}` -> at least 1 token. + assert!(c.count(0, &m) > 0); } #[test] - fn plain_text_counts_positive() { - let m = msg_with_text("hello world this is a test"); + fn whole_line_used_when_original_present() { + // If original_line is set, it wins over serde shape — even when the + // line carries fields the struct doesn't model. + let mut m = msg_with_text("hi"); + m.original_line = Some( + "{\"type\":\"user\",\"experimental\":\"\",\"message\":{\"role\":\"user\",\"content\":\"hi\",\"x\":\"y\"}}".into(), + ); let c = TokenCounter::new(); - let n = c.count(0, &m); - assert!(n > 0 && n < 20); + let with_orig = c.count(0, &m); + + let mut m2 = msg_with_text("hi"); + m2.original_line = None; + let c2 = TokenCounter::new(); + let without = c2.count(0, &m2); + + assert!( + with_orig >= without, + "original_line count ({with_orig}) should be >= struct-only count ({without})" + ); } #[test] - fn usage_metadata_takes_precedence() { - let mut body = MessageBody { - role: Some("assistant".into()), - content: Some(Value::String("a".repeat(10_000))), + fn tool_use_input_counted() { + // Big tool_use input must not vanish. + let big_arg = "x".repeat(2000); + let line = format!( + r#"{{"type":"assistant","message":{{"role":"assistant","content":[{{"type":"tool_use","id":"t","name":"R","input":{{"data":"{big_arg}"}}}}]}}}}"# + ); + let m = Message { + r#type: Some("assistant".into()), + uuid: None, + parent_uuid: None, + timestamp: None, + message: None, extra: Default::default(), + original_line: Some(line), }; - body.extra.insert( - "usage".into(), - json!({"input_tokens": 5, "output_tokens": 7}), + let c = TokenCounter::new(); + let n = c.count(0, &m); + assert!(n > 200, "expected sizeable count for 2KB input, got {n}"); + } + + #[test] + fn tool_result_content_counted() { + let stdout = "log line\n".repeat(200); + let line = format!( + r#"{{"type":"user","message":{{"role":"user","content":[{{"type":"tool_result","tool_use_id":"t","content":{}}}]}}}}"#, + serde_json::to_string(&stdout).unwrap() ); let m = Message { - r#type: Some("assistant".into()), + r#type: Some("user".into()), uuid: None, parent_uuid: None, timestamp: None, - message: Some(body), + message: None, extra: Default::default(), - original_line: None, + original_line: Some(line), }; let c = TokenCounter::new(); - assert_eq!(c.count(0, &m), 12); + let n = c.count(0, &m); + assert!( + n > 100, + "expected sizeable count for repeated stdout, got {n}" + ); } #[test] @@ -175,6 +189,13 @@ mod tests { assert_eq!(a, b); } + #[test] + fn count_str_works() { + let c = TokenCounter::new(); + assert_eq!(c.count_str(""), 0); + assert!(c.count_str("hello world") > 0); + } + #[test] fn block_array_counts_text_blocks() { let m = Message {