From 335f3f021e6b9104a79e5e9c877c7e0c3e2ccd2c Mon Sep 17 00:00:00 2001 From: lyx-tec Date: Mon, 8 Jun 2026 07:38:46 +0800 Subject: [PATCH 01/36] docs: add session daemon design docs (v1 + v2) --- docs/design/session-daemon-design-v1.md | 483 +++++++++++++++++++++++ docs/design/session-daemon-design-v2.md | 495 ++++++++++++++++++++++++ 2 files changed, 978 insertions(+) create mode 100644 docs/design/session-daemon-design-v1.md create mode 100644 docs/design/session-daemon-design-v2.md diff --git a/docs/design/session-daemon-design-v1.md b/docs/design/session-daemon-design-v1.md new file mode 100644 index 0000000000..160e0bf77b --- /dev/null +++ b/docs/design/session-daemon-design-v1.md @@ -0,0 +1,483 @@ +# Session Daemon — Design Document V1 + +## 1. Overview + +Session Daemon 是一个持久的远程终端 session 模型。与当前 "一个 block 对应一个远程 job" 的 1:1 架构不同,Session Daemon 将**远端连接**与**block 视图**解耦。Session Daemon 独立于任何 block 存在,多个 block 可以 attach/detach 到同一个 daemon,所有 block 共享同一份 raw 输出数据,各自独立渲染。 + +**核心目标**:持久化(跨重启保持)、多视图镜像、所有 block 均可输入。 + +## 2. Architecture + +``` +┌── Local WaveTerm ──────────────────────────────────────────────────┐ +│ │ +│ ┌── SessionDaemon ────────────────────────────────────────────┐ │ +│ │ id: "sd-abc" │ │ +│ │ name: "dev" │ │ +│ │ jobId: "job-xyz" │ │ +│ │ connName: "ssh:user@host" │ │ +│ │ status: "running" │ │ +│ │ │ │ +│ │ InputSessionId: "uuid-X" │ │ +│ │ seqNum: 42 │ │ +│ │ │ │ +│ │ StreamReader ──▶ runOutputLoop() ──▶ job:job-xyz/term │ │ +│ │ │ │ +│ │ AttachedBlocks: [block-A, block-B] │ │ +│ └──────────────────────────────┬───────────────────────────────┘ │ +│ │ │ +│ ┌────────────┴────────────┐ │ +│ │ │ │ +│ Block-A Block-B │ +│ ┌──────────────────┐ ┌──────────────────┐ │ +│ │ view: "term" │ │ view: "term" │ │ +│ │ meta: │ │ meta: │ │ +│ │ daemonid:sd-abc │ │ daemonid:sd-abc │ │ +│ │ │ │ │ │ +│ │ read job file │ │ read job file │ │ +│ │ sendInput ▶ D │ │ sendInput ▶ D │ │ +│ └──────────────────┘ └──────────────────┘ │ +│ │ │ │ +│ └────────┬────────────────┘ │ +│ ▼ │ +│ SessionDaemon.SendInput() │ +│ │ │ +│ ▼ │ +│ jobcontroller.SendInput() │ +│ │ │ +│ ▼ │ +│ Remote JobManager(job-xyz) │ +│ (single attached client, 不改动) │ +│ │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +## 3. Data Model + +### 3.1 SessionDaemon(DB 持久化) + +``` +SessionDaemon { + OID: string // "sd-abc",内部标识 + Name: string // "dev",用户别名,可选 + Connection: string // "ssh:user@host" + JobId: string // "job-xyz" + Status: string // "init" | "running" | "disconnected" | "done" + Cwd: string // 创建时的 CWD + CreatedAt: int64 + IdleTimeout: int64 // 超时回收(秒),默认 86400(24h) + Meta: MetaMapType +} +``` + +- **Name 唯一性**:全局唯一。创建时若冲突,自动追加时间后缀(`dev` → `dev-150623`),并提示用户实际名称。 +- **空闲回收**:无 block attach 超过 `IdleTimeout`(默认 24h)后自动回收(`TerminateAndDetachJob` + status=done)。 + +### 3.2 Status 状态机 + +``` + ┌─────────────────────────────────────────────────┐ + │ │ + ▼ │ + ┌──────┐ StartJob成功 ┌─────────┐ SSH断开 ┌───────────────┐ + │ init │ ───────────────▶ │ running │ ──────────▶ │ disconnected │ + └──┬───┘ └────┬─────┘ └───────┬───────┘ + │ │ │ + │ StartJob失败 │ 用户删除 (ssh在线) │ 用户删除、idle timeout + │ │ 或 shell退出 │ 或 shell退出 + ▼ ▼ │ + ┌──────┐ ┌──────┐ │ + │ done │ │ done │ │ + └──────┘ └──────┘ │ + ▲ │ + └────────────────────────────────────────────────────┘ +``` + +| 状态 | 含义 | 前端 block 显示 | +|------|------|----------------| +| `init` | 正在创建,job 尚未启动 | "Starting..." | +| `running` | SSH 在线,远端 shell 运行中 | 正常终端 | +| `disconnected` | SSH 断开但远端 shell 仍存活 | "Reconnecting..." | +| `done` | 已终止 | "Session Ended" | + +### 3.3 Block(扩展) + +``` +Block { + ... // 现有字段不变 + Meta: { + ... + "session:daemonid": string // 新增。为空 = 未 attach + } +} +``` + +### 3.4 Job(不变) + +现有结构完全保留。`AttachedBlockId` 仍为单值,指向 SessionDaemon(不直接指向 block)。 + +### 3.5 DurableShellController 移除 + +SessionDaemon 完全取代旧的 `DurableShellController`: + +- 移除 `pkg/blockcontroller/durableshellcontroller.go` +- 移除 `ResyncController` 中的 `DurableShellController` 分支 +- `IsBlockIdTermDurable` 不再需要 +- `handleAppendJobFile` 不再同时写 `block:blockId/term`,只写 `job:jobId/term` + +## 4. Backend Design + +### 4.1 Controller 调度(ResyncController) + +dispatch 只取决于 block 是否 attach 到 daemon,与 connection 无关: + +``` +if block.Meta["session:daemonid"] != "" { + → SessionDaemonController // 桥接到 daemon,无进程 +} else if controllerName == "shell" || controllerName == "cmd" { + → ShellController // 本地 shell +} else if controllerName == "tsunami" { + → TsunamiController +} +``` + +block 的 `connection` meta 在未 attach 时仅作为创建/attach daemon 时的提示信息,不影响 controller 类型。远端会话的概念完全由 SessionDaemon 承载。 + +block 三态: + +``` + create / detach + ShellController ◄─────────────────► SessionDaemonController + (本地 shell) (桥接到 daemon) + │ │ + │ block 创建时默认 │ attach + │ (或 detach 后恢复) │ + │ ▼ + └── 没有 attach 时跑本地 shell session 输出实时显示 + 行为与现有非 durable block 一致 所有 block 可输入 +``` + +### 4.2 SessionDaemonController + +``` +SessionDaemonController { + BlockId: string + ConnName: string + DaemonId: string +} + +Start(): + → SessionDaemonManager.AttachBlock(daemonId, blockId) + → 返回 daemon.JobId(前端据此读文件) + → 发 controllerstatus 事件 + +SendInput(input): + → SessionDaemonManager.SendInput(daemonId, input.InputData) + → 若 input.TermSize 非空,更新 daemon 的 PTY 尺寸 + (多个 block resize 时最后一个生效) + +Stop(): + → SessionDaemonManager.DetachBlock(daemonId, blockId) + +GetRuntimeStatus(): + → 返回 daemon 的连接状态 (running/disconnected/done) +``` + +### 4.3 职责分层 + +``` +SessionDaemon jobcontroller Remote +────────────── ───────────── ────── + 生命周期编排 原子操作 远端执行 + Start / Reconnect / StartJob() RemoteStartJob + Stop ReconnectJob() RemoteReconnect + runOutputLoop goroutine SendInput() RemoteTerminate + SendInput (入口) TerminateJob() + AttachBlock / DetachBlock RunOutputLoop() ← 函数保留,goroutine 由 daemon 启动 + handleAppendJobFile() +``` + +### 4.4 runOutputLoop 归属 + +当前 `RunOutputLoop` goroutine 由 `StartJob()` 和 `restartStreaming()` 内部启动。改为 **SessionDaemon 启动 goroutine,jobcontroller 提供函数**。 + +`StartJob()` 和 `restartStreaming()` 内部移除 `go runOutputLoop(...)`,改为返回 `(reader, streamMeta)`: + +```go +// SessionDaemon 组装生命周期 +func (sd *SessionDaemon) Start(ctx) error { + jobId, reader, streamMeta, err := jobcontroller.StartJob(ctx, params) + sd.jobId = jobId + go jobcontroller.RunOutputLoop(ctx, jobId, streamMeta.Id, reader) + return nil +} + +func (sd *SessionDaemon) Reconnect(ctx) error { + reader, streamMeta, err := jobcontroller.ReconnectJob(ctx, sd.jobId, rtOpts) + // jobStreamIds 已更新,旧 RunOutputLoop 因 currentStreamId != streamId 自动退出 + go jobcontroller.RunOutputLoop(ctx, sd.jobId, streamMeta.Id, reader) + return nil +} +``` + +`RunOutputLoop` 代码本身不动——自毁逻辑 `currentStreamId != streamId → break` 直接复用。 + +### 4.5 SessionDaemonManager(全局 in-memory) + +``` +SessionDaemonManager { + daemons: map[daemonId] *SessionDaemon + + // daemon 操作 + GetOrCreate(params) → (*SessionDaemon, error) + Get(daemonId) → (*SessionDaemon, error) + Remove(daemonId) + InitFromDB() // 启动时恢复所有 running daemon + + // block 操作 + AttachBlock(daemonId, blockId) + DetachBlock(daemonId, blockId) + GetBlocksForDaemon(daemonId) → []blockId + + // 输入 + SendInput(daemonId, data []byte) → error +} + +SessionDaemon (每个 daemon 一个实例) { + daemonId string + jobId string + InputSessionId string + seqNum int + reader *streamclient.Reader + cancel context.CancelFunc // 终止 runOutputLoop + blocks map[blockId] bool // attached blocks + + Start() // StartJob + runOutputLoop + Reconnect() // ReconnectJob + runOutputLoop + Stop(reason) // cancel loop, TerminateJob, notify blocks + Shutdown() // 进程退出时优雅断开 + + GetJobId() → string + GetStatus() → connected | disconnected | done +} +``` + +## 5. Data Flow + +### 5.1 Output(只写 job 文件) + +当前 `handleAppendJobFile` 同时写 `job:jobId/term` 和 `block:blockId/term`。 +改为只写 `job:jobId/term`,所有 block 读同一份 raw 数据: + +``` +runOutputLoop(job-xyz) + │ + ▼ +handleAppendJobFile(jobId, "term", data) + │ + ├── doWFSAppend(job:jobId, "term", data) + └── WPS Publish "blockfile" scope=job:{jobId} +``` + +前端 TermWrap 根据 block 的 daemonId 找到 JobId,以 `jobId` 作为 zoneId 读取。 + +### 5.2 Input(单路复用) + +所有 attached block 的输入汇聚到同一个 SessionDaemon,使用同一 `InputSessionId`: + +``` +Block-A.sendInput("ls\n") Block-B.sendInput("grep\n") + │ │ + └────────────┬────────────────┘ + ▼ + SessionDaemon.SendInput() + │ + InputSessionId: uuid-X, seqNum: ++ + │ + ▼ + jobcontroller.SendInput() + │ + ▼ + Remote JobManager.InputQueue + (QuickReorderQueue, 按 sessionId 排序) +``` + +远程 JobManager 不改动——它仍只看到一个 attachedClient,一条输入流。 + +## 6. Lifecycle + +### 6.1 创建 + +``` +wsh session create --name "dev" --connection ssh:user@host + │ + ├── DB: Insert SessionDaemon{status:"init"} + ├── StartRemoteShellJob() → job-xyz + ├── DB: Update SessionDaemon{status:"running", jobId:"job-xyz"} + ├── 注册到 SessionDaemonManager,启动 runOutputLoop + └── ✅ Daemon 存活,AttachedBlocks:[](无 block 连接) +``` + +### 6.2 Attach + +``` +wsh session attach dev --block block-A + │ + ├── Block.Meta["session:daemonid"] = "sd-abc" + ├── SessionDaemonManager.AttachBlock("sd-abc", "block-A") + ├── 前端 TermWrap.attachToDaemon(jobId) + │ ├── unsubscribe WPS blockfile scope=block:{blockId} + │ ├── subscribe WPS blockfile scope=job:{jobId} + │ └── loadInitialTerminalData(jobId) // raw data,全量历史 + └── ✅ Block 显示 session 输出,可以输入 +``` + +### 6.3 Detach + +``` +wsh session detach --block block-A + │ + ├── 清除 Block.Meta["session:daemonid"] + ├── SessionDaemonManager.DetachBlock("sd-abc", "block-A") + ├── ControllerResync → 重建 ShellController + │ └── ShellController.Start() → 启动本地 shell + ├── 前端 TermWrap.detachFromDaemon() + │ ├── unsubscribe WPS blockfile scope=job:{jobId} + │ ├── subscribe WPS blockfile scope=block:{blockId} + │ └── loadInitialTerminalData(blockId) + └── ✅ Block 恢复为本地终端,daemon 继续运行 +``` + +### 6.4 删除 + +``` +wsh session delete dev + │ + ├── TerminateAndDetachJob(job-xyz) + ├── 遍历 AttachedBlocks: + │ Block.Meta["session:daemonid"] = "" + │ 通知前端 → 显示 "Session Ended" + ├── DB: SessionDaemon{status:"done"} + └── ✅ 从 SessionDaemonManager 移除 +``` + +### 6.5 WaveTerm 重启恢复 + +``` +WaveTerm 重启 + │ + ├── SessionDaemonManager.InitFromDB() + │ + ├── for each daemon (status = running | disconnected): + │ 1. 创建内存 daemon 对象 + │ 2. ReconnectJob(jobId) → 重连远端 JobManager + │ ├── PrepareConnect(seq = job/term 当前大小) + │ ├── 新 StreamReader + 新 runOutputLoop + │ └── 新 InputSessionId + │ + └── 有 daemonid 的 block 在渲染时自动读取 job 文件 + 显示 "reconnecting..." → 重连完成后正常显示 +``` + +### 6.6 远端意外终止 + +``` +远端 shell 退出 / 机器重启 + │ + ├── 本地 StreamReader 读到 EOF/error + ├── runOutputLoop 退出 + ├── DB: SessionDaemon{status:"done"} + └── 通知所有 attached block → 显示 "Session Ended" +``` + +## 7. Migration(一次性,启动时执行) + +### 7.1 旧模型 + +``` +Block { JobId: "job-xyz", Meta: { "term:durable": true } } +Job { OID: "job-xyz", AttachedBlockId: "block-A" } +``` + +输出同时写 `job:job-xyz/term` 和 `block:block-A/term`。 + +### 7.2 迁移目标 + +``` +Block { Meta: { "session:daemonid": "sd-abc" }, JobId: "" } +SessionDaemon { OID: "sd-abc", JobId: "job-xyz" } +Job { OID: "job-xyz", AttachedBlockId: "" } +``` + +### 7.3 流程 + +``` +WaveTerm 启动,SchemaVersion 检测到需要迁移 + │ + └── 扫描 DB 中所有 Block.JobId != "" 的记录 + │ + for each block: + ├── 创建 SessionDaemon 记录 + │ OID: uuid.new("sd-*") + │ Name: 自动生成("ssh:user@host:timestamp") + │ JobId: block.JobId(复用) + │ Status: 根据 Job.JobManagerStatus 映射 + │ Connection: block.Meta["connection"] + │ + ├── Block: Meta["session:daemonid"] = daemonId, JobId = "" + ├── Job: AttachedBlockId = "" + │ + └── 输出连续性:将 block:blockId/term 内容追加到 job:jobId/term + 完成后删除 block:blockId/term + │ + └── 迁移完成,更新 SchemaVersion +``` + +### 7.4 不兼容警告 + +- 迁移**不可逆**。回退后旧版本无法识别这些 block。 +- 迁移前建议备份 DB。 + +## 8. WSH Commands + +``` +wsh session create --name --connection # 创建 daemon +wsh session delete # 删除 daemon +wsh session list # 列出所有 daemon +wsh session attach --block # block 加入 daemon +wsh session detach --block # block 离开 daemon +wsh session info # daemon 详情 +``` + +## 9. Frontend + +### 9.1 Block 状态显示 + +| 状态 | Header 显示 | 内容区 | +|------|------------|--------| +| No Session | 无 daemon 标识 | 本地 shell | +| Attached (running) | `dev ●` (绿) | session 输出 | +| Attached (disconnected) | `dev ◌` (黄) | "Reconnecting..." | +| Session Ended | `dev ✗` (灰) | "Session Ended" | + +### 9.2 Attach/Detach 入口 + +- Block header 下拉菜单 +- 右键菜单 +- 命令面板 + +### 9.3 TermWrap 切换 zoneId + +当前 TermWrap 构造时绑定 `blockId` 作为 zoneId,从 `block:{blockId}/term` 读取。attach/detach 时动态切换数据源: + +``` +TermWrap.attachToDaemon(jobId): + 1. unsubscribe WPS blockfile scope=block:{blockId} + 2. subscribe WPS blockfile scope=job:{jobId} + 3. loadInitialTerminalData(jobId) // raw data,全量历史 + +TermWrap.detachFromDaemon(): + 1. unsubscribe WPS blockfile scope=job:{jobId} + 2. subscribe WPS blockfile scope=block:{blockId} + 3. loadInitialTerminalData(blockId) // 本地 shell +``` diff --git a/docs/design/session-daemon-design-v2.md b/docs/design/session-daemon-design-v2.md new file mode 100644 index 0000000000..678c67084c --- /dev/null +++ b/docs/design/session-daemon-design-v2.md @@ -0,0 +1,495 @@ +# Session Daemon — Design Document V2 + +## 1. Overview + +Session Daemon 是一个持久的远程终端 session 模型。与当前 "一个 block 对应一个远程 job" 的 1:1 架构不同,Session Daemon 将**远端连接**与**block 视图**解耦。Session Daemon 独立于任何 block 存在,多个 block 可以 attach/detach 到同一个 daemon,所有 block 共享同一份 raw 输出数据,各自独立渲染。 + +**核心目标**:持久化(跨重启保持)、多视图镜像、所有 block 均可输入。 + +### 与 V1 的关键差异 + +V2 在 V1 的基础上做了简化:**runOutputLoop 不动,留在 JobController 内部**。SessionDaemon 只做命名、多 block 追踪、空闲超时,不管理 PTY、不管理输出流。详见 4.4 节。 + +## 2. Architecture + +``` +┌── Local WaveTerm ──────────────────────────────────────────────────┐ +│ │ +│ ┌── SessionDaemon ────────────────────────────────────────────┐ │ +│ │ id: "sd-abc" name: "dev" │ │ +│ │ jobId: "job-xyz" │ │ +│ │ connName: "ssh:user@host" │ │ +│ │ status: "running" │ │ +│ │ │ │ +│ │ AttachedBlocks: [block-A, block-B] │ │ +│ │ │ │ +│ │ ─── 不管理 PTY、不管理输出流 ─── │ │ +│ │ runOutputLoop → JobController 内部管理 │ │ +│ │ InputSessionId → SessionDaemon 持有,用于输入排序 │ │ +│ └──────────────────────────────┬───────────────────────────────┘ │ +│ │ │ +│ ┌────────────┴────────────┐ │ +│ │ │ │ +│ Block-A Block-B │ +│ ┌──────────────────┐ ┌──────────────────┐ │ +│ │ view: "term" │ │ view: "term" │ │ +│ │ meta: │ │ meta: │ │ +│ │ daemonid:sd-abc │ │ daemonid:sd-abc │ │ +│ │ │ │ │ │ +│ │ read job file │ │ read job file │ │ +│ │ sendInput ▶ D │ │ sendInput ▶ D │ │ +│ └──────────────────┘ └──────────────────┘ │ +│ │ │ │ +│ └────────┬────────────────┘ │ +│ ▼ │ +│ SessionDaemon.SendInput() │ +│ │ │ +│ ▼ │ +│ jobcontroller.SendInput() │ +│ │ │ +│ ▼ │ +│ Remote JobManager(job-xyz) │ +│ (single attached client, 不改动) │ +│ │ +│ ┌────────── JobController ───────────────────────────────┐ │ +│ │ runOutputLoop (goroutine, 内部管理) │ │ +│ │ StartJob() → 启动 runOutputLoop │ │ +│ │ ReconnectJob() → 新流取代旧流 (自毁机制) │ │ +│ │ SendInput() / TerminateJob() / connReconcileWorker │ │ +│ └─────────────────────────────────────────────────────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +## 3. Data Model + +### 3.1 SessionDaemon(DB 持久化) + +``` +SessionDaemon { + OID: string // "sd-abc",内部标识 + Name: string // "dev",用户别名,可选 + Connection: string // "ssh:user@host" + JobId: string // "job-xyz" + Status: string // "init" | "running" | "disconnected" | "done" + Cwd: string // 创建时的 CWD + CreatedAt: int64 + IdleTimeout: int64 // 超时回收(秒),默认 86400(24h) + Meta: MetaMapType +} +``` + +- **Name 唯一性**:全局唯一。创建时若冲突,自动追加时间后缀(`dev` → `dev-150623`),并提示用户实际名称。 +- **空闲回收**:无 block attach 超过 `IdleTimeout`(默认 24h)后自动回收(`TerminateAndDetachJob` + status=done)。 + +### 3.2 Status 状态机 + +``` + ┌─────────────────────────────────────────────────┐ + │ │ + ▼ │ + ┌──────┐ StartJob成功 ┌─────────┐ SSH断开 ┌───────────────┐ + │ init │ ───────────────▶ │ running │ ──────────▶ │ disconnected │ + └──┬───┘ └────┬─────┘ └───────┬───────┘ + │ │ │ + │ StartJob失败 │ 用户删除 (ssh在线) │ 用户删除、idle timeout + │ │ 或 shell退出 │ 或 shell退出 + ▼ ▼ │ + ┌──────┐ ┌──────┐ │ + │ done │ │ done │ │ + └──────┘ └──────┘ │ + ▲ │ + └────────────────────────────────────────────────────┘ +``` + +| 状态 | 含义 | 前端 block 显示 | +|------|------|----------------| +| `init` | 正在创建,job 尚未启动 | "Starting..." | +| `running` | SSH 在线,远端 shell 运行中 | 正常终端 | +| `disconnected` | SSH 断开但远端 shell 仍存活 | "Reconnecting..." | +| `done` | 已终止 | "Session Ended" | + +### 3.3 Block(扩展) + +``` +Block { + ... // 现有字段不变 + Meta: { + ... + "session:daemonid": string // 新增。为空 = 未 attach + } +} +``` + +### 3.4 Job(不变) + +现有结构完全保留。`Job.AttachedBlockId` 仍为单值,指向 daemon(不直接指向 block)。 + +### 3.5 DurableShellController 移除 + +SessionDaemon 完全取代旧的 `DurableShellController`: + +- 移除 `pkg/blockcontroller/durableshellcontroller.go` +- 移除 `ResyncController` 中的 `DurableShellController` 分支 +- `IsBlockIdTermDurable` 不再需要 +- `handleAppendJobFile` 不再同时写 `block:blockId/term`,只写 `job:jobId/term` + +## 4. Backend Design + +### 4.1 Controller 调度(ResyncController) + +dispatch 只取决于 block 是否 attach 到 daemon,与 connection 无关: + +``` +if block.Meta["session:daemonid"] != "" { + → SessionDaemonController // 桥接到 daemon,无进程 +} else if controllerName == "shell" || controllerName == "cmd" { + → ShellController // 本地 shell +} else if controllerName == "tsunami" { + → TsunamiController +} +``` + +block 的 `connection` meta 在未 attach 时仅作为创建/attach daemon 时的提示信息,不影响 controller 类型。远端会话的概念完全由 SessionDaemon 承载。 + +block 三态: + +``` + create / detach + ShellController ◄─────────────────► SessionDaemonController + (本地 shell) (桥接到 daemon) + │ │ + │ block 创建时默认 │ attach + │ (或 detach 后恢复) │ + │ ▼ + └── 没有 attach 时跑本地 shell session 输出实时显示 + 行为与现有非 durable block 一致 所有 block 可输入 +``` + +### 4.2 SessionDaemonController + +``` +SessionDaemonController { + BlockId: string + ConnName: string + DaemonId: string +} + +Start(): + → SessionDaemonManager.AttachBlock(daemonId, blockId) + → 返回 daemon.JobId(前端据此读文件) + → 发 controllerstatus 事件 + +SendInput(input): + → SessionDaemonManager.SendInput(daemonId, input.InputData) + → 若 input.TermSize 非空,更新 daemon 的 PTY 尺寸 + (多个 block resize 时最后一个生效) + +Stop(): + → SessionDaemonManager.DetachBlock(daemonId, blockId) + +GetRuntimeStatus(): + → 返回 daemon 的连接状态 (running/disconnected/done) +``` + +### 4.3 职责分层 + +``` +SessionDaemon jobcontroller Remote +────────────── ───────────── ────── + session 管理 原子操作 远端执行 + Name → jobId 映射 StartJob() RemoteStartJob + 多 block attach/detach ReconnectJob() RemoteReconnect + 空闲超时回收 SendInput() RemoteTerminate + wsh CLI 入口 TerminateJob() + runOutputLoop (goroutine) + connReconcileWorker + jobPruningWorker + handleAppendJobFile() +``` + +### 4.4 runOutputLoop 保持原位(与 V1 的关键差异) + +V1 提议将 runOutputLoop 从 `StartJob()` 内部迁入 SessionDaemon,使 daemon 获得输出流的生命周期控制权。V2 决定不迁移,理由如下: + +**1. 现有自毁机制足够** + +JobController 的 `RestartStreaming()` 在重连时创建新 StreamReader,新的 `runOutputLoop` 通过 `currentStreamId != streamId` 自毁检查自动取代旧 loop。SessionDaemon 无需感知或干预这个过程。 + +重连流程完全在 JobController 内部闭环: + +``` +ReconnectJob(jobId) + → PrepareConnect() → 新 StreamReader + 新 streamId + → go runOutputLoop(ctx, jobId, newStreamId, newReader) + → 每次循环检查 currentStreamId == streamId? + → 旧 runOutputLoop 检测到 streamId 不匹配 → break +``` + +**2. 避免破坏已有契约** + +`StartJob()` 当前返回 `(string, error)`,内部启动 goroutine。将 runOutputLoop 迁出需要改为返回 `(string, *streamclient.Reader, *StreamMeta, error)`,影响所有现有调用方。V2 认为不值得为这个改动破坏已有 API。 + +**3. 职责分离** + +SessionDaemon 关注"有哪些 block 在看我",不关注"字节流怎么读怎么写"。输出流的生命周期是 JobController 的内部实现细节,SessionDaemon 不需要知道 StreamReader 的存在。 + +### 4.5 SessionDaemonManager(全局 in-memory) + +``` +SessionDaemonManager { + daemons: map[daemonId] *SessionDaemon + + // daemon 操作 + GetOrCreate(params) → (*SessionDaemon, error) + Get(daemonId) → (*SessionDaemon, error) + Remove(daemonId) + InitFromDB() // 启动时恢复所有 running daemon + + // block 操作 + AttachBlock(daemonId, blockId) + DetachBlock(daemonId, blockId) + GetBlocksForDaemon(daemonId) → []blockId + + // 输入 + SendInput(daemonId, data []byte) → error +} + +SessionDaemon (每个 daemon 一个实例) { + daemonId string + name string + jobId string + InputSessionId string // 输入排序用,所有 attached block 共用 + seqNum int // 单调递增 + blocks map[blockId] bool + + // 不管理 PTY / reader / runOutputLoop + // 这些全部由 JobController 内部管理 + + Start() // → jobcontroller.StartJob(),委托 + Reconnect() // → jobcontroller.ReconnectJob() + Stop() // → jobcontroller.TerminateJob() + SendInput() // → jobcontroller.SendInput() + Status() // → jobcontroller.GetJobManagerStatus() +} +``` + +## 5. Data Flow + +### 5.1 Output(只写 job 文件) + +当前 `handleAppendJobFile` 同时写 `job:jobId/term` 和 `block:blockId/term`。 +改为只写 `job:jobId/term`,所有 block 读同一份 raw 数据: + +``` +runOutputLoop(job-xyz) ← JobController 内部管理 + │ + ▼ +handleAppendJobFile(jobId, "term", data) + │ + ├── doWFSAppend(job:jobId, "term", data) + └── WPS Publish "blockfile" scope=job:{jobId} +``` + +前端 TermWrap 根据 block 的 daemonId 找到 JobId,以 `jobId` 作为 zoneId 读取。 + +### 5.2 Input(单路复用) + +所有 attached block 的输入汇聚到同一个 SessionDaemon,使用同一 `InputSessionId`: + +``` +Block-A.sendInput("ls\n") Block-B.sendInput("grep\n") + │ │ + └────────────┬────────────────┘ + ▼ + SessionDaemon.SendInput() + │ + InputSessionId: uuid-X, seqNum: ++ + │ + ▼ + jobcontroller.SendInput() + │ + ▼ + Remote JobManager.InputQueue + (QuickReorderQueue, 按 sessionId 排序) +``` + +远程 JobManager 不改动——它仍只看到一个 attachedClient,一条输入流。 + +## 6. Lifecycle + +### 6.1 创建 + +``` +wsh session create --name "dev" --connection ssh:user@host + │ + ├── DB: Insert SessionDaemon{status:"init"} + ├── StartRemoteShellJob() → job-xyz + │ (JobController 内部启动 runOutputLoop) + ├── DB: Update SessionDaemon{status:"running", jobId:"job-xyz"} + ├── 注册到 SessionDaemonManager + └── ✅ Daemon 存活,AttachedBlocks:[](无 block 连接) +``` + +### 6.2 Attach + +``` +wsh session attach dev --block block-A + │ + ├── Block.Meta["session:daemonid"] = "sd-abc" + ├── SessionDaemonManager.AttachBlock("sd-abc", "block-A") + ├── 前端 TermWrap.attachToDaemon(jobId) + │ ├── unsubscribe WPS blockfile scope=block:{blockId} + │ ├── subscribe WPS blockfile scope=job:{jobId} + │ └── loadInitialTerminalData(jobId) // raw data,全量历史 + └── ✅ Block 显示 session 输出,可以输入 +``` + +### 6.3 Detach + +``` +wsh session detach --block block-A + │ + ├── 清除 Block.Meta["session:daemonid"] + ├── SessionDaemonManager.DetachBlock("sd-abc", "block-A") + ├── ControllerResync → 重建 ShellController + │ └── ShellController.Start() → 启动本地 shell + ├── 前端 TermWrap.detachFromDaemon() + │ ├── unsubscribe WPS blockfile scope=job:{jobId} + │ ├── subscribe WPS blockfile scope=block:{blockId} + │ └── loadInitialTerminalData(blockId) + └── ✅ Block 恢复为本地终端,daemon 继续运行 +``` + +### 6.4 删除 + +``` +wsh session delete dev + │ + ├── TerminateAndDetachJob(job-xyz) + │ (JobController 终止进程 + 停止 runOutputLoop) + ├── 遍历 AttachedBlocks: + │ Block.Meta["session:daemonid"] = "" + │ 通知前端 → 显示 "Session Ended" + ├── DB: SessionDaemon{status:"done"} + └── ✅ 从 SessionDaemonManager 移除 +``` + +### 6.5 WaveTerm 重启恢复 + +``` +WaveTerm 重启 + │ + ├── SessionDaemonManager.InitFromDB() + │ + ├── for each daemon (status = running | disconnected): + │ 1. 创建内存 daemon 对象 + │ 2. JobController.ReconnectJob(jobId) → 内部管理新 runOutputLoop + │ 3. 新 InputSessionId + │ + └── 有 daemonid 的 block 在渲染时自动读取 job 文件 + 显示 "reconnecting..." → 重连完成后正常显示 +``` + +### 6.6 远端意外终止 + +``` +远端 shell 退出 / 机器重启 + │ + ├── 本地 StreamReader 读到 EOF/error (JobController 内部) + ├── runOutputLoop 退出 (JobController 内部) + ├── DB: SessionDaemon{status:"done"} + └── 通知所有 attached block → 显示 "Session Ended" +``` + +## 7. Migration(一次性,启动时执行) + +### 7.1 旧模型 + +``` +Block { JobId: "job-xyz", Meta: { "term:durable": true } } +Job { OID: "job-xyz", AttachedBlockId: "block-A" } +``` + +输出同时写 `job:job-xyz/term` 和 `block:block-A/term`。 + +### 7.2 迁移目标 + +``` +Block { Meta: { "session:daemonid": "sd-abc" }, JobId: "" } +SessionDaemon { OID: "sd-abc", JobId: "job-xyz" } +Job { OID: "job-xyz", AttachedBlockId: "" } +``` + +### 7.3 流程 + +``` +WaveTerm 启动,SchemaVersion 检测到需要迁移 + │ + └── 扫描 DB 中所有 Block.JobId != "" 的记录 + │ + for each block: + ├── 创建 SessionDaemon 记录 + │ OID: uuid.new("sd-*") + │ Name: 自动生成("ssh:user@host:timestamp") + │ JobId: block.JobId(复用) + │ Status: 根据 Job.JobManagerStatus 映射 + │ Connection: block.Meta["connection"] + │ + ├── Block: Meta["session:daemonid"] = daemonId, JobId = "" + ├── Job: AttachedBlockId = "" + │ + └── 输出连续性:将 block:blockId/term 内容追加到 job:jobId/term + 完成后删除 block:blockId/term + │ + └── 迁移完成,更新 SchemaVersion +``` + +### 7.4 不兼容警告 + +- 迁移**不可逆**。回退后旧版本无法识别这些 block。 +- 迁移前建议备份 DB。 + +## 8. WSH Commands + +``` +wsh session create --name --connection # 创建 daemon +wsh session delete # 删除 daemon +wsh session list # 列出所有 daemon +wsh session attach --block # block 加入 daemon +wsh session detach --block # block 离开 daemon +wsh session info # daemon 详情 +``` + +## 9. Frontend + +### 9.1 Block 状态显示 + +| 状态 | Header 显示 | 内容区 | +|------|------------|--------| +| No Session | 无 daemon 标识 | 本地 shell | +| Attached (running) | `dev ●` (绿) | session 输出 | +| Attached (disconnected) | `dev ◌` (黄) | "Reconnecting..." | +| Session Ended | `dev ✗` (灰) | "Session Ended" | + +### 9.2 Attach/Detach 入口 + +- Block header 下拉菜单 +- 右键菜单 +- 命令面板 + +### 9.3 TermWrap 切换 zoneId + +当前 TermWrap 构造时绑定 `blockId` 作为 zoneId,从 `block:{blockId}/term` 读取。attach/detach 时动态切换数据源: + +``` +TermWrap.attachToDaemon(jobId): + 1. unsubscribe WPS blockfile scope=block:{blockId} + 2. subscribe WPS blockfile scope=job:{jobId} + 3. loadInitialTerminalData(jobId) // raw data,全量历史 + +TermWrap.detachFromDaemon(): + 1. unsubscribe WPS blockfile scope=job:{jobId} + 2. subscribe WPS blockfile scope=block:{blockId} + 3. loadInitialTerminalData(blockId) // 本地 shell +``` From b32ae21a0507a4006935f1834c51bb39db39c25b Mon Sep 17 00:00:00 2001 From: lyx-tec Date: Mon, 8 Jun 2026 07:52:24 +0800 Subject: [PATCH 02/36] docs: update v2 - anonymous daemon for SSH, 1h idle timeout --- docs/design/session-daemon-design-v2.md | 35 +++++++++++++++---------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/docs/design/session-daemon-design-v2.md b/docs/design/session-daemon-design-v2.md index 678c67084c..4beaa5cfe6 100644 --- a/docs/design/session-daemon-design-v2.md +++ b/docs/design/session-daemon-design-v2.md @@ -68,19 +68,23 @@ V2 在 V1 的基础上做了简化:**runOutputLoop 不动,留在 JobControll ``` SessionDaemon { OID: string // "sd-abc",内部标识 - Name: string // "dev",用户别名,可选 + Name: string // "dev",用户别名。空 = 匿名 daemon Connection: string // "ssh:user@host" JobId: string // "job-xyz" + IsAnonymous: bool // true = 自动创建,无 name Status: string // "init" | "running" | "disconnected" | "done" Cwd: string // 创建时的 CWD CreatedAt: int64 - IdleTimeout: int64 // 超时回收(秒),默认 86400(24h) + IdleTimeout: int64 // 超时回收(秒) Meta: MetaMapType } ``` -- **Name 唯一性**:全局唯一。创建时若冲突,自动追加时间后缀(`dev` → `dev-150623`),并提示用户实际名称。 -- **空闲回收**:无 block attach 超过 `IdleTimeout`(默认 24h)后自动回收(`TerminateAndDetachJob` + status=done)。 +- **命名 daemon**:通过 `wsh session create --name dev` 创建,`Name` 全局唯一。冲突时自动追加时间后缀(`dev` → `dev-150623`)。 +- **匿名 daemon**:SSH block 启动时自动创建,`Name=""`,`IsAnonymous=true`。 +- **空闲回收**:无 block attach 超过 `IdleTimeout` 后自动回收。默认值按类型区分: + - 匿名 daemon:**1h**(`3600` 秒) + - 命名 daemon:**24h**(`86400` 秒) ### 3.2 Status 状态机 @@ -125,32 +129,35 @@ Block { 现有结构完全保留。`Job.AttachedBlockId` 仍为单值,指向 daemon(不直接指向 block)。 -### 3.5 DurableShellController 移除 +### 3.5 DurableShellController 被 SessionDaemon 取代 -SessionDaemon 完全取代旧的 `DurableShellController`: +SessionDaemon 覆盖了 DurableShellController 的全部职责,且支持多 block attach: - 移除 `pkg/blockcontroller/durableshellcontroller.go` - 移除 `ResyncController` 中的 `DurableShellController` 分支 - `IsBlockIdTermDurable` 不再需要 +- SSH block 启动时自动创建匿名 daemon,行为与之前一致(持久化、自动重连),同时获得多 block 共享能力 - `handleAppendJobFile` 不再同时写 `block:blockId/term`,只写 `job:jobId/term` ## 4. Backend Design ### 4.1 Controller 调度(ResyncController) -dispatch 只取决于 block 是否 attach 到 daemon,与 connection 无关: - ``` if block.Meta["session:daemonid"] != "" { - → SessionDaemonController // 桥接到 daemon,无进程 -} else if controllerName == "shell" || controllerName == "cmd" { - → ShellController // 本地 shell -} else if controllerName == "tsunami" { - → TsunamiController + → SessionDaemonController // 桥接到 daemon +} else if connType == SSH { + → 创建匿名 SessionDaemon + Block.Meta["session:daemonid"] = newDaemonId + ControllerResync(下一轮进入 SessionDaemonController) +} else { + → ShellController // 本地 / WSL } ``` -block 的 `connection` meta 在未 attach 时仅作为创建/attach daemon 时的提示信息,不影响 controller 类型。远端会话的概念完全由 SessionDaemon 承载。 +SSH block 启动时自动创建匿名 daemon(`IsAnonymous=true`,`IdleTimeout=1h`),后续交互全通过 `SessionDaemonController`。daemon 的创建对用户透明——用户打开 SSH block 的体验与之前一致。 + +只有当用户主动 `wsh session create --name` 时,才会产生命名 daemon。命名 daemon 可被多个 block attach,空闲超时 24h。 block 三态: From f9b7f3f3ec3467f23e8a9a2d619fbd445e3c6f98 Mon Sep 17 00:00:00 2001 From: lyx-tec Date: Mon, 8 Jun 2026 23:03:33 +0800 Subject: [PATCH 03/36] feat: session daemon full implementation Add SessionDaemon data model, DB migration, meta key. Implement sessiondaemon package (Manager, idle reaper). Implement SessionDaemonController (Start/Stop/SendInput). Replace DurableShellController with SessionDaemonController. ResyncController dispatch: SSH blocks auto-create anonymous daemon. Simplify handleAppendJobFile (job-only write), remove IsBlockTermDurable. Frontend: TermWrap attachToDaemon/detachFromDaemon, zoneId switching. Add wsh session CLI (create/delete/list/attach/detach/info/tag). Add RPC types, handlers, generated bindings for session commands. Frontend: SessionDaemonIndicator in header + context menu items. Add startup migration for existing Block.JobId to SessionDaemon. Fix critical: GetRuntimeStatus returns init when no JobId. Fix TOCTOU race in Manager AttachBlock/DetachBlock. Add BlockId to SessionDetachData for per-block detach. Add resyncBlockController after detach. Add Version field to controller runtime status. --- cmd/server/main-server.go | 12 + cmd/wsh/cmd/wshcmd-session.go | 277 +++++++++++++++++ .../000012_sessiondaemon.down.sql | 1 + .../000012_sessiondaemon.up.sql | 5 + frontend/app/block/blockframe-header.tsx | 2 + .../app/block/session-daemon-indicator.tsx | 28 ++ frontend/app/store/wshclientapi.ts | 42 +++ frontend/app/view/term/term-model.ts | 31 ++ frontend/app/view/term/termwrap.ts | 23 +- frontend/types/gotypes.d.ts | 68 ++++ .../changes/session-daemon/.openspec.yaml | 2 + openspec/changes/session-daemon/design.md | 83 +++++ openspec/changes/session-daemon/proposal.md | 37 +++ .../specs/session-attach-detach/spec.md | 42 +++ .../specs/session-auto-create/spec.md | 31 ++ .../specs/session-create-delete/spec.md | 57 ++++ .../specs/session-idle-timeout/spec.md | 42 +++ .../specs/session-reconnect/spec.md | 51 +++ .../specs/session-wsh-cli/spec.md | 88 ++++++ openspec/changes/session-daemon/tasks.md | 65 ++++ pkg/blockcontroller/blockcontroller.go | 41 ++- pkg/blockcontroller/durableshellcontroller.go | 277 ----------------- .../sessiondaemoncontroller.go | 272 ++++++++++++++++ pkg/jobcontroller/jobcontroller.go | 72 +---- pkg/sessiondaemon/sessiondaemon.go | 290 ++++++++++++++++++ pkg/waveobj/metaconsts.go | 2 + pkg/waveobj/wtype.go | 25 +- pkg/waveobj/wtypemeta.go | 4 +- pkg/wshrpc/wshclient/wshclient.go | 42 +++ pkg/wshrpc/wshrpctypes.go | 56 ++++ pkg/wshrpc/wshserver/wshserver.go | 181 +++++++++++ pkg/wstore/wstore_dbsessionmigration.go | 95 ++++++ 32 files changed, 1976 insertions(+), 368 deletions(-) create mode 100644 cmd/wsh/cmd/wshcmd-session.go create mode 100644 db/migrations-wstore/000012_sessiondaemon.down.sql create mode 100644 db/migrations-wstore/000012_sessiondaemon.up.sql create mode 100644 frontend/app/block/session-daemon-indicator.tsx create mode 100644 openspec/changes/session-daemon/.openspec.yaml create mode 100644 openspec/changes/session-daemon/design.md create mode 100644 openspec/changes/session-daemon/proposal.md create mode 100644 openspec/changes/session-daemon/specs/session-attach-detach/spec.md create mode 100644 openspec/changes/session-daemon/specs/session-auto-create/spec.md create mode 100644 openspec/changes/session-daemon/specs/session-create-delete/spec.md create mode 100644 openspec/changes/session-daemon/specs/session-idle-timeout/spec.md create mode 100644 openspec/changes/session-daemon/specs/session-reconnect/spec.md create mode 100644 openspec/changes/session-daemon/specs/session-wsh-cli/spec.md create mode 100644 openspec/changes/session-daemon/tasks.md delete mode 100644 pkg/blockcontroller/durableshellcontroller.go create mode 100644 pkg/blockcontroller/sessiondaemoncontroller.go create mode 100644 pkg/sessiondaemon/sessiondaemon.go create mode 100644 pkg/wstore/wstore_dbsessionmigration.go diff --git a/cmd/server/main-server.go b/cmd/server/main-server.go index b204643ee8..cd403a7620 100644 --- a/cmd/server/main-server.go +++ b/cmd/server/main-server.go @@ -25,6 +25,7 @@ import ( "github.com/wavetermdev/waveterm/pkg/remote/conncontroller" "github.com/wavetermdev/waveterm/pkg/remote/fileshare/wshfs" "github.com/wavetermdev/waveterm/pkg/secretstore" + "github.com/wavetermdev/waveterm/pkg/sessiondaemon" "github.com/wavetermdev/waveterm/pkg/service" "github.com/wavetermdev/waveterm/pkg/telemetry" "github.com/wavetermdev/waveterm/pkg/telemetry/telemetrydata" @@ -525,6 +526,10 @@ func main() { log.Printf("error initializing wstore: %v\n", err) return } + err = wstore.RunSessionDaemonMigration(context.Background()) + if err != nil { + log.Printf("error running session daemon migration: %v\n", err) + } panichandler.PanicTelemetryHandler = panicTelemetryHandler go func() { defer func() { @@ -554,6 +559,13 @@ func main() { return } + ctx := context.Background() + err = sessiondaemon.Manager.InitFromDB(ctx) + if err != nil { + log.Printf("error initializing session daemon manager: %v\n", err) + } + sessiondaemon.Manager.StartIdleReaper(ctx) + err = shellutil.FixupWaveZshHistory() if err != nil { log.Printf("error fixing up wave zsh history: %v\n", err) diff --git a/cmd/wsh/cmd/wshcmd-session.go b/cmd/wsh/cmd/wshcmd-session.go new file mode 100644 index 0000000000..9b4d8cab47 --- /dev/null +++ b/cmd/wsh/cmd/wshcmd-session.go @@ -0,0 +1,277 @@ +// Copyright 2025, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +package cmd + +import ( + "fmt" + "time" + + "github.com/spf13/cobra" + "github.com/wavetermdev/waveterm/pkg/waveobj" + "github.com/wavetermdev/waveterm/pkg/wshrpc" + "github.com/wavetermdev/waveterm/pkg/wshrpc/wshclient" +) + +var sessionCmd = &cobra.Command{ + Use: "session", + Short: "manage session daemons", + Long: "Commands to create, list, attach to, and manage session daemons for persistent remote shells.", +} + +var sessionCreateCmd = &cobra.Command{ + Use: "create", + Short: "create a new session daemon", + Long: `Create a named session daemon. Anonymous daemons are created automatically for SSH blocks.`, + Args: cobra.NoArgs, + RunE: sessionCreateRun, + PreRunE: preRunSetupRpcClient, +} + +var sessionDeleteCmd = &cobra.Command{ + Use: "delete DAEMONID", + Short: "delete a session daemon", + Long: `Delete a session daemon, stopping any attached job and detaching all blocks.`, + Args: cobra.ExactArgs(1), + RunE: sessionDeleteRun, + PreRunE: preRunSetupRpcClient, +} + +var sessionListCmd = &cobra.Command{ + Use: "list", + Short: "list session daemons", + Long: `List all named session daemons. Use --all to include anonymous daemons.`, + Args: cobra.NoArgs, + RunE: sessionListRun, + PreRunE: preRunSetupRpcClient, +} + +var sessionAttachCmd = &cobra.Command{ + Use: "attach DAEMONID", + Short: "attach current block to a session daemon", + Long: `Attach the current block to the specified session daemon.`, + Args: cobra.ExactArgs(1), + RunE: sessionAttachRun, + PreRunE: preRunSetupRpcClient, +} + +var sessionDetachCmd = &cobra.Command{ + Use: "detach", + Short: "detach current block from its session daemon", + Long: `Detach the current block from its attached session daemon.`, + Args: cobra.NoArgs, + RunE: sessionDetachRun, + PreRunE: preRunSetupRpcClient, +} + +var sessionInfoCmd = &cobra.Command{ + Use: "info DAEMONID", + Short: "show session daemon info", + Long: `Show detailed information about a session daemon.`, + Args: cobra.ExactArgs(1), + RunE: sessionInfoRun, + PreRunE: preRunSetupRpcClient, +} + +var sessionTagCmd = &cobra.Command{ + Use: "tag DAEMONID", + Short: "tag an anonymous session daemon with a name", + Long: `Convert an anonymous session daemon to a named one, preventing auto-cleanup.`, + Args: cobra.ExactArgs(1), + RunE: sessionTagRun, + PreRunE: preRunSetupRpcClient, +} + +var sessionCreateFlagName string +var sessionCreateFlagConnection string +var sessionCreateFlagIdleTimeout int64 +var sessionListFlagAll bool +var sessionTagFlagName string + +func init() { + rootCmd.AddCommand(sessionCmd) + sessionCmd.AddCommand(sessionCreateCmd) + sessionCmd.AddCommand(sessionDeleteCmd) + sessionCmd.AddCommand(sessionListCmd) + sessionCmd.AddCommand(sessionAttachCmd) + sessionCmd.AddCommand(sessionDetachCmd) + sessionCmd.AddCommand(sessionInfoCmd) + sessionCmd.AddCommand(sessionTagCmd) + + sessionCreateCmd.Flags().StringVarP(&sessionCreateFlagName, "name", "n", "", "session name (creates a named daemon)") + sessionCreateCmd.Flags().StringVarP(&sessionCreateFlagConnection, "connection", "c", "", "connection name (e.g. ssh://host)") + sessionCreateCmd.Flags().Int64Var(&sessionCreateFlagIdleTimeout, "idle-timeout", 0, "idle timeout in seconds (default: 86400 for named, 3600 for anonymous)") + + sessionListCmd.Flags().BoolVarP(&sessionListFlagAll, "all", "a", false, "include anonymous session daemons") + + sessionTagCmd.Flags().StringVarP(&sessionTagFlagName, "name", "n", "", "new name for the session daemon") + sessionTagCmd.MarkFlagRequired("name") +} + +func sessionCreateRun(cmd *cobra.Command, args []string) (rtnErr error) { + defer func() { + sendActivity("session:create", rtnErr == nil) + }() + + data := wshrpc.CommandSessionCreateData{ + Name: sessionCreateFlagName, + Connection: sessionCreateFlagConnection, + IdleTimeout: sessionCreateFlagIdleTimeout, + } + + info, err := wshclient.SessionCreateCommand(RpcClient, data, &wshrpc.RpcOpts{Timeout: 10000}) + if err != nil { + return fmt.Errorf("creating session daemon: %w", err) + } + + WriteStdout("session daemon %s created\n", info.DaemonId) + WriteStdout(" name: %s\n", info.Name) + WriteStdout(" connection: %s\n", info.Connection) + return nil +} + +func sessionDeleteRun(cmd *cobra.Command, args []string) (rtnErr error) { + defer func() { + sendActivity("session:delete", rtnErr == nil) + }() + + daemonId := args[0] + err := wshclient.SessionDeleteCommand(RpcClient, wshrpc.CommandSessionDeleteData{DaemonId: daemonId}, &wshrpc.RpcOpts{Timeout: 10000}) + if err != nil { + return fmt.Errorf("deleting session daemon: %w", err) + } + WriteStdout("session daemon %s deleted\n", daemonId) + return nil +} + +func sessionListRun(cmd *cobra.Command, args []string) (rtnErr error) { + defer func() { + sendActivity("session:list", rtnErr == nil) + }() + + data := wshrpc.CommandSessionListData{ShowAll: sessionListFlagAll} + sessions, err := wshclient.SessionListCommand(RpcClient, data, &wshrpc.RpcOpts{Timeout: 10000}) + if err != nil { + return fmt.Errorf("listing session daemons: %w", err) + } + + if len(sessions) == 0 { + WriteStdout("no session daemons\n") + return nil + } + + WriteStdout("%-36s %-20s %-30s %-12s %s\n", "daemonid", "name", "connection", "status", "blocks") + WriteStdout("----------------------------------------------------------------------\n") + for _, s := range sessions { + blocks := fmt.Sprintf("%d", len(s.Blocks)) + if s.IsAnonymous { + blocks += " (anon)" + } + WriteStdout("%-36s %-20s %-30s %-12s %s\n", s.DaemonId, s.Name, s.Connection, s.Status, blocks) + } + return nil +} + +func sessionAttachRun(cmd *cobra.Command, args []string) (rtnErr error) { + defer func() { + sendActivity("session:attach", rtnErr == nil) + }() + + daemonId := args[0] + fullORef, err := resolveBlockArg() + if err != nil { + return err + } + + data := wshrpc.CommandSessionAttachData{ + DaemonId: daemonId, + BlockId: fullORef.OID, + } + err = wshclient.SessionAttachCommand(RpcClient, data, &wshrpc.RpcOpts{Timeout: 10000}) + if err != nil { + return fmt.Errorf("attaching block: %w", err) + } + WriteStdout("block %s attached to session daemon %s\n", fullORef.OID, daemonId) + return nil +} + +func sessionDetachRun(cmd *cobra.Command, args []string) (rtnErr error) { + defer func() { + sendActivity("session:detach", rtnErr == nil) + }() + + fullORef, err := resolveBlockArg() + if err != nil { + return err + } + blockId := fullORef.OID + + info, err := wshclient.BlockInfoCommand(RpcClient, blockId, &wshrpc.RpcOpts{Timeout: 5000}) + if err != nil { + return fmt.Errorf("getting block info: %w", err) + } + if info.Block == nil { + return fmt.Errorf("block %s not found", blockId) + } + + daemonId := info.Block.Meta.GetString(waveobj.MetaKey_SessionDaemonId, "") + if daemonId == "" { + return fmt.Errorf("block %s is not attached to any session daemon", blockId) + } + + err = wshclient.SessionDetachCommand(RpcClient, wshrpc.CommandSessionDetachData{DaemonId: daemonId, BlockId: blockId}, &wshrpc.RpcOpts{Timeout: 10000}) + if err != nil { + return fmt.Errorf("detaching block: %w", err) + } + WriteStdout("block %s detached from session daemon %s\n", blockId, daemonId) + return nil +} + +func sessionInfoRun(cmd *cobra.Command, args []string) (rtnErr error) { + defer func() { + sendActivity("session:info", rtnErr == nil) + }() + + daemonId := args[0] + info, err := wshclient.SessionInfoCommand(RpcClient, wshrpc.CommandSessionInfoData{DaemonId: daemonId}, &wshrpc.RpcOpts{Timeout: 10000}) + if err != nil { + return fmt.Errorf("getting session info: %w", err) + } + + createdAt := time.UnixMilli(info.CreatedAt).Format("2006-01-02 15:04:05") + WriteStdout("daemonid: %s\n", info.DaemonId) + WriteStdout("name: %s\n", info.Name) + WriteStdout("connection: %s\n", info.Connection) + WriteStdout("jobid: %s\n", info.JobId) + WriteStdout("status: %s\n", info.Status) + WriteStdout("anonymous: %v\n", info.IsAnonymous) + WriteStdout("created: %s\n", createdAt) + WriteStdout("timeout: %ds\n", info.IdleTimeout) + if info.IdleSince > 0 { + WriteStdout("idle since: %s\n", time.UnixMilli(info.IdleSince).Format("2006-01-02 15:04:05")) + } + WriteStdout("blocks: %d\n", len(info.Blocks)) + for _, b := range info.Blocks { + WriteStdout(" - %s\n", b) + } + return nil +} + +func sessionTagRun(cmd *cobra.Command, args []string) (rtnErr error) { + defer func() { + sendActivity("session:tag", rtnErr == nil) + }() + + daemonId := args[0] + data := wshrpc.CommandSessionTagData{ + DaemonId: daemonId, + Name: sessionTagFlagName, + } + + err := wshclient.SessionTagCommand(RpcClient, data, &wshrpc.RpcOpts{Timeout: 10000}) + if err != nil { + return fmt.Errorf("tagging session daemon: %w", err) + } + WriteStdout("session daemon %s tagged as %q\n", daemonId, sessionTagFlagName) + return nil +} diff --git a/db/migrations-wstore/000012_sessiondaemon.down.sql b/db/migrations-wstore/000012_sessiondaemon.down.sql new file mode 100644 index 0000000000..83eff15470 --- /dev/null +++ b/db/migrations-wstore/000012_sessiondaemon.down.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS db_sessiondaemon; diff --git a/db/migrations-wstore/000012_sessiondaemon.up.sql b/db/migrations-wstore/000012_sessiondaemon.up.sql new file mode 100644 index 0000000000..6912568c6d --- /dev/null +++ b/db/migrations-wstore/000012_sessiondaemon.up.sql @@ -0,0 +1,5 @@ +CREATE TABLE IF NOT EXISTS db_sessiondaemon ( + oid varchar(36) PRIMARY KEY, + version int NOT NULL, + data json NOT NULL +); diff --git a/frontend/app/block/blockframe-header.tsx b/frontend/app/block/blockframe-header.tsx index a70f323e71..4f29e7a2af 100644 --- a/frontend/app/block/blockframe-header.tsx +++ b/frontend/app/block/blockframe-header.tsx @@ -10,6 +10,7 @@ import { } from "@/app/block/blockutil"; import { ConnectionButton } from "@/app/block/connectionbutton"; import { DurableSessionFlyover } from "@/app/block/durable-session-flyover"; +import { SessionDaemonIndicator } from "@/app/block/session-daemon-indicator"; import { getBlockBadgeAtom } from "@/app/store/badge"; import { createBlockSplitHorizontally, @@ -280,6 +281,7 @@ const BlockFrame_Header = ({ divClassName="iconbutton disabled text-[13px] ml-[-4px]" /> )} + {useTermHeader && badge && (
diff --git a/frontend/app/block/session-daemon-indicator.tsx b/frontend/app/block/session-daemon-indicator.tsx new file mode 100644 index 0000000000..637d909571 --- /dev/null +++ b/frontend/app/block/session-daemon-indicator.tsx @@ -0,0 +1,28 @@ +// Copyright 2026, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +import { useWaveEnv } from "@/app/waveenv/waveenv"; +import * as jotai from "jotai"; +import { BlockEnv } from "./blockenv"; + +interface SessionDaemonIndicatorProps { + blockId: string; + useTermHeader: boolean; +} + +export function SessionDaemonIndicator({ blockId, useTermHeader }: SessionDaemonIndicatorProps) { + const waveEnv = useWaveEnv(); + const daemonId = jotai.useAtomValue( + waveEnv.getBlockMetaKeyAtom(blockId, "session:daemonid") + ); + + if (!useTermHeader || !daemonId) { + return null; + } + + return ( +
+ +
+ ); +} diff --git a/frontend/app/store/wshclientapi.ts b/frontend/app/store/wshclientapi.ts index 8482be260d..d5cad46c67 100644 --- a/frontend/app/store/wshclientapi.ts +++ b/frontend/app/store/wshclientapi.ts @@ -834,6 +834,48 @@ export class RpcApiType { return client.wshRpcCall("sendtelemetry", null, opts); } + // command "sessionattach" [call] + SessionAttachCommand(client: WshClient, data: CommandSessionAttachData, opts?: RpcOpts): Promise { + if (this.mockClient) return this.mockClient.mockWshRpcCall(client, "sessionattach", data, opts); + return client.wshRpcCall("sessionattach", data, opts); + } + + // command "sessioncreate" [call] + SessionCreateCommand(client: WshClient, data: CommandSessionCreateData, opts?: RpcOpts): Promise { + if (this.mockClient) return this.mockClient.mockWshRpcCall(client, "sessioncreate", data, opts); + return client.wshRpcCall("sessioncreate", data, opts); + } + + // command "sessiondelete" [call] + SessionDeleteCommand(client: WshClient, data: CommandSessionDeleteData, opts?: RpcOpts): Promise { + if (this.mockClient) return this.mockClient.mockWshRpcCall(client, "sessiondelete", data, opts); + return client.wshRpcCall("sessiondelete", data, opts); + } + + // command "sessiondetach" [call] + SessionDetachCommand(client: WshClient, data: CommandSessionDetachData, opts?: RpcOpts): Promise { + if (this.mockClient) return this.mockClient.mockWshRpcCall(client, "sessiondetach", data, opts); + return client.wshRpcCall("sessiondetach", data, opts); + } + + // command "sessioninfo" [call] + SessionInfoCommand(client: WshClient, data: CommandSessionInfoData, opts?: RpcOpts): Promise { + if (this.mockClient) return this.mockClient.mockWshRpcCall(client, "sessioninfo", data, opts); + return client.wshRpcCall("sessioninfo", data, opts); + } + + // command "sessionlist" [call] + SessionListCommand(client: WshClient, data: CommandSessionListData, opts?: RpcOpts): Promise { + if (this.mockClient) return this.mockClient.mockWshRpcCall(client, "sessionlist", data, opts); + return client.wshRpcCall("sessionlist", data, opts); + } + + // command "sessiontag" [call] + SessionTagCommand(client: WshClient, data: CommandSessionTagData, opts?: RpcOpts): Promise { + if (this.mockClient) return this.mockClient.mockWshRpcCall(client, "sessiontag", data, opts); + return client.wshRpcCall("sessiontag", data, opts); + } + // command "setblockfocus" [call] SetBlockFocusCommand(client: WshClient, data: string, opts?: RpcOpts): Promise { if (this.mockClient) return this.mockClient.mockWshRpcCall(client, "setblockfocus", data, opts); diff --git a/frontend/app/view/term/term-model.ts b/frontend/app/view/term/term-model.ts index a256929e7d..c9094849b2 100644 --- a/frontend/app/view/term/term-model.ts +++ b/frontend/app/view/term/term-model.ts @@ -1353,6 +1353,37 @@ export class TermViewModel implements ViewModel { }); } + const sessionDaemonId = blockData?.meta?.["session:daemonid"]; + if (sessionDaemonId) { + advancedSubmenu.push({ type: "separator" }); + advancedSubmenu.push({ + label: "Session Info", + click: () => { + fireAndForget(async () => { + try { + const info = await RpcApi.SessionInfoCommand(TabRpcClient, { daemonid: sessionDaemonId }); + const msg = `Session Daemon: ${info.name || "(unnamed)"}\nID: ${info.daemonid}\nStatus: ${info.status}\nConnection: ${info.connection || "N/A"}\nBlocks: ${(info.blocks || []).length}`; + modalsModel.pushModal("MessageModal", { children: msg }); + } catch (e) { + modalsModel.pushModal("MessageModal", { children: `Error: ${e?.message || e}` }); + } + }); + }, + }); + advancedSubmenu.push({ + label: "Detach from Session", + click: () => { + fireAndForget(async () => { + try { + await RpcApi.SessionDetachCommand(TabRpcClient, { daemonid: sessionDaemonId, blockid: this.blockId }); + } catch (e) { + modalsModel.pushModal("MessageModal", { children: `Error: ${e?.message || e}` }); + } + }); + }, + }); + } + fullMenu.push({ label: "Advanced", submenu: advancedSubmenu, diff --git a/frontend/app/view/term/termwrap.ts b/frontend/app/view/term/termwrap.ts index 4840b5d914..ea51e0fa9d 100644 --- a/frontend/app/view/term/termwrap.ts +++ b/frontend/app/view/term/termwrap.ts @@ -77,6 +77,7 @@ type TermWrapOptions = { export class TermWrap { tabId: string; blockId: string; + zoneId: string; ptyOffset: number; dataBytesProcessed: number; terminal: Terminal; @@ -132,6 +133,7 @@ export class TermWrap { this.loaded = false; this.tabId = tabId; this.blockId = blockId; + this.zoneId = blockId; this.sendDataHandler = waveOptions.sendDataHandler; this.nodeModel = waveOptions.nodeModel; this.ptyOffset = 0; @@ -325,7 +327,26 @@ export class TermWrap { } getZoneId(): string { - return this.blockId; + return this.zoneId; + } + + async attachToDaemon(jobId: string): Promise { + if (this.mainFileSubject) { + this.mainFileSubject.release(); + } + this.zoneId = jobId; + this.mainFileSubject = getFileSubject(this.getZoneId(), TermFileName); + this.mainFileSubject.subscribe(this.handleNewFileSubjectData.bind(this)); + await this.loadInitialTerminalData(); + } + + detachFromDaemon(): void { + if (this.mainFileSubject) { + this.mainFileSubject.release(); + } + this.zoneId = this.blockId; + this.mainFileSubject = getFileSubject(this.getZoneId(), TermFileName); + this.mainFileSubject.subscribe(this.handleNewFileSubjectData.bind(this)); } setCursorStyle(cursorStyle: string) { diff --git a/frontend/types/gotypes.d.ts b/frontend/types/gotypes.d.ts index bb07f466b0..da0bb147b5 100644 --- a/frontend/types/gotypes.d.ts +++ b/frontend/types/gotypes.d.ts @@ -635,6 +635,46 @@ declare global { builderid: string; }; + // wshrpc.CommandSessionAttachData + type CommandSessionAttachData = { + daemonid: string; + blockid: string; + }; + + // wshrpc.CommandSessionCreateData + type CommandSessionCreateData = { + name?: string; + connection?: string; + idletimeout?: number; + }; + + // wshrpc.CommandSessionDeleteData + type CommandSessionDeleteData = { + daemonid: string; + }; + + // wshrpc.CommandSessionDetachData + type CommandSessionDetachData = { + daemonid: string; + blockid?: string; + }; + + // wshrpc.CommandSessionInfoData + type CommandSessionInfoData = { + daemonid: string; + }; + + // wshrpc.CommandSessionListData + type CommandSessionListData = { + showall?: boolean; + }; + + // wshrpc.CommandSessionTagData + type CommandSessionTagData = { + daemonid: string; + name: string; + }; + // wshrpc.CommandSetMetaData type CommandSetMetaData = { oref: ORef; @@ -1133,6 +1173,7 @@ declare global { "cmd:initscript.zsh"?: string; "cmd:initscript.pwsh"?: string; "cmd:initscript.fish"?: string; + "session:daemonid"?: string; "ai:*"?: boolean; "ai:preset"?: string; "ai:apitype"?: string; @@ -1374,6 +1415,33 @@ declare global { optional: boolean; }; + // waveobj.SessionDaemon + type SessionDaemon = WaveObj & { + name?: string; + connection?: string; + jobid?: string; + isanonymous?: boolean; + status?: string; + cwd?: string; + createdat?: number; + idletimeout?: number; + idlesince?: number; + }; + + // wshrpc.SessionInfoRtnData + type SessionInfoRtnData = { + daemonid: string; + name: string; + connection: string; + jobid?: string; + isanonymous: boolean; + status: string; + createdat: number; + idletimeout: number; + idlesince?: number; + blocks?: string[]; + }; + // wconfig.SettingsType type SettingsType = { "app:*"?: boolean; diff --git a/openspec/changes/session-daemon/.openspec.yaml b/openspec/changes/session-daemon/.openspec.yaml new file mode 100644 index 0000000000..e8d4ccfe90 --- /dev/null +++ b/openspec/changes/session-daemon/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-06-08 diff --git a/openspec/changes/session-daemon/design.md b/openspec/changes/session-daemon/design.md new file mode 100644 index 0000000000..c8fa40df41 --- /dev/null +++ b/openspec/changes/session-daemon/design.md @@ -0,0 +1,83 @@ +## Context + +当前 Wave 使用 `DurableShellController` + `JobController` 管理远端 SSH session,采用 1 block ↔ 1 job 的 1:1 模型。不支持多 block 共享会话、会话命名、空闲超时回收等能力。 + +已有设计文档 `docs/design/session-daemon-design-v2.md` 包含完整架构图和数据流。 + +### 现有架构 + +``` +Block → DurableShellController → JobController → Remote JobManager + 1:1 函数库 每个 job 一个 +``` + +### 目标架构 + +``` +Block → SessionDaemonController → SessionDaemon → JobController → Remote + N:1 桥接 命名/超时/attach 函数库 (不动) +``` + +## Goals / Non-Goals + +**Goals:** +- SSH block 启动时自动创建匿名 daemon(`IdleTimeout=1h`),行为与当前一致 +- 用户可通过 `wsh session create` 创建命名 daemon(`IdleTimeout=24h`) +- 多个 block 可 attach 到同一 daemon,共享输出、各自可输入 +- daemon 空闲超时自动回收 +- 网络重连后的 TerminateOnReconnect 机制保持不变 +- 前端显示 daemon 名称和状态 + +**Non-Goals:** +- 本地/WSL block 不受影响(继续用 ShellController) +- runOutputLoop 不搬(留在 JobController 内部) +- SessionDaemon 不做进程管理,只做 session 管理 + +## Decisions + +### 1. runOutputLoop 保持原位(vs 迁入 SessionDaemon) + +详见 `docs/design/session-daemon-design-v2.md#44-runoutputloop-保持原位与-v1-的关键差异`。 + +理由:JobController 的 `currentStreamId != streamId` 自毁机制已能处理重连流切换,无需 SessionDaemon 介入。不破坏 `StartJob()` 的现有返回值契约。 + +### 2. 匿名 daemon vs 命名 daemon 区分 + +匿名 daemon: +- SSH block 启动时自动创建,`Name=""`, `IsAnonymous=true` +- `IdleTimeout=1h` +- 用户无感知,不能 attach 其他 block(除非先命名) +- 可通过 `wsh session tag sd-xxx --name dev` 转为命名 + +命名 daemon: +- 通过 `wsh session create --name dev` 创建 +- `IsAnonymous=false`, `IdleTimeout=24h` +- 可被多个 block attach + +### 3. DurableShellController 完全移除 + +SessionDaemon + 匿名 daemon 覆盖了 DurableShellController 的全部能力(持久化、自动重连),同时新增多 block attach 和空闲超时。 + +### 4. ControllerResync 调度 + +``` +if block.Meta["session:daemonid"] != "": + → SessionDaemonController +else if connType == SSH: + → 创建匿名 daemon → Block.Meta 写入 daemonId → 下一轮进入 SessionDaemonController +else: + → ShellController +``` + +### 5. 输出流共享 + +所有 attached block 读同一份 `job:jobId/term`。现有 WPS `scope=job:{jobId}` 发布机制已支持多订阅者。前端 TermWrap 在 attach/detach 时切换 zoneId。 + +### 6. 输入汇聚 + +所有 attached block 的输入使用同一个 `InputSessionId`,远端 QuickReorderQueue 按 sessionId 排序去重。 + +## Risks / Trade-offs + +- **远端 jobmanager 无心跳超时**:如果网络永远不恢复且 shell 进程不退出,jobmanager 会一直存在。可接受——1h 内无 block attach 则本地 daemon 标记为 done,但远端进程不受影响。远端侧可后续加 `wsh session prune` 命令手动清理。 +- **迁移不可逆**:从 DurableShellController 迁移后,回退到旧版本无法识别 `session:daemonid`。建议迁移前备份 DB。 diff --git a/openspec/changes/session-daemon/proposal.md b/openspec/changes/session-daemon/proposal.md new file mode 100644 index 0000000000..7ef3c66c56 --- /dev/null +++ b/openspec/changes/session-daemon/proposal.md @@ -0,0 +1,37 @@ +## Why + +当前 Wave 的远端 SSH session 模型是 "一个 block 对应一个远程 job" 的 1:1 架构。无法实现多个 block 共享同一个远端会话、会话跨重启持久保持、以及会话有名称可管理。Session Daemon 将**远端连接**与**block 视图**解耦,允许用户创建命名的持久 session,多个 block 可以 attach/detach 到同一个 session。 + +## What Changes + +- **SessionDaemon** — 新增持久化实体(DB 记录),每个 SSH block 启动时自动创建匿名 daemon,用户也可通过 `wsh session create` 创建命名 daemon +- **SessionDaemonController** — 新增 Controller 类型,桥接到 daemon,不管理进程。取代现有 `DurableShellController` +- **DurableShellController** — 移除,功能由 SessionDaemon 覆盖 +- **空闲超时** — 匿名 daemon 默认 1h 超时回收,命名 daemon 默认 24h +- **wsh 命令** — 新增 `wsh session create/delete/list/attach/detach/info` 一组 CLI 命令 +- **前端** — block header 显示 daemon 名称和状态(`dev ●`),支持 attach/detach 操作 +- TermWrap 支持动态切换数据源 zoneId(block ↔ job) + +## Capabilities + +### New Capabilities +- `session-create-delete`: 创建和删除 SessionDaemon(命名 daemon 和匿名 daemon) +- `session-attach-detach`: Block attach/detach 到 daemon,前端切换 zoneId +- `session-idle-timeout`: 空闲超时回收,区分匿名 daemon(1h)和命名 daemon(24h) +- `session-auto-create`: SSH block 启动时自动创建匿名 daemon,IdleTimeout=1h +- `session-reconnect`: 网络重连后恢复,TerminateOnReconnect 机制确保关闭的 block 的远端 job 被清理 +- `session-wsh-cli`: `wsh session` 命令组(create/delete/list/attach/detach/info) + +### Modified Capabilities +- (无现有 spec 变更) + +## Impact + +- **新增** `pkg/sessiondaemon/` 包(SessionDaemon + SessionDaemonManager) +- **新增** `pkg/blockcontroller/sessiondaemoncontroller.go`(Controller 实现) +- **移除** `pkg/blockcontroller/durableshellcontroller.go` +- **修改** `pkg/blockcontroller/blockcontroller.go`(ResyncController 调度分支) +- **修改** `pkg/jobcontroller/jobcontroller.go`(runOutputLoop 不变,IsBlockTermDurable 不再需要) +- **新增** `cmd/wsh/cmd/wshcmd-session.go`(wsh CLI 命令) +- **新增** DB migration(创建 session_daemon 表,迁移旧 Job 记录) +- 前端新增 attach/detach 逻辑 diff --git a/openspec/changes/session-daemon/specs/session-attach-detach/spec.md b/openspec/changes/session-daemon/specs/session-attach-detach/spec.md new file mode 100644 index 0000000000..8208386587 --- /dev/null +++ b/openspec/changes/session-daemon/specs/session-attach-detach/spec.md @@ -0,0 +1,42 @@ +## ADDED Requirements + +### Requirement: Attach block to daemon + +The system SHALL allow attaching a term block to an existing SessionDaemon via `wsh session attach --block `. + +On attach: +- `Block.Meta["session:daemonid"]` is set to the daemon OID +- `SessionDaemonManager.AttachBlock()` is called +- The block's ControllerResync creates a `SessionDaemonController` +- Frontend `TermWrap.attachToDaemon(jobId)` switches zoneId from `block:{blockId}` to `job:{jobId}` +- The block displays the daemon's terminal output in real time +- The block can send input, which goes through the daemon's InputSessionId + +#### Scenario: Attach block to named daemon +- **WHEN** user runs `wsh session attach dev --block block-A` +- **THEN** block-A's `session:daemonid` is set to the daemon's OID +- **AND** the block's controller becomes `SessionDaemonController` +- **AND** the frontend shows the daemon's terminal output + +#### Scenario: Attach same block to multiple daemons +- **WHEN** user runs `wsh session attach dev --block block-A` +- **AND** block-A is already attached to daemon `dev` +- **THEN** the system returns an error (block can only attach to one daemon at a time) + +### Requirement: Detach block from daemon + +The system SHALL allow detaching a block from its SessionDaemon via `wsh session detach --block `. + +On detach: +- `Block.Meta["session:daemonid"]` is cleared +- `SessionDaemonManager.DetachBlock()` is called +- ControllerResync creates a `ShellController` for local/WSL or a new anonymous daemon for SSH +- Frontend `TermWrap.detachFromDaemon()` switches zoneId back to `block:{blockId}` +- The daemon continues running (unless idle timeout triggers) + +#### Scenario: Detach block from daemon +- **WHEN** user runs `wsh session detach --block block-A` +- **AND** block-A is attached to daemon `dev` +- **THEN** block-A's `session:daemonid` is cleared +- **AND** the block reverts to its default controller +- **AND** daemon `dev` continues running (no attached blocks) diff --git a/openspec/changes/session-daemon/specs/session-auto-create/spec.md b/openspec/changes/session-daemon/specs/session-auto-create/spec.md new file mode 100644 index 0000000000..6da6a51ce6 --- /dev/null +++ b/openspec/changes/session-daemon/specs/session-auto-create/spec.md @@ -0,0 +1,31 @@ +## ADDED Requirements + +### Requirement: Auto-create daemon on SSH block start + +When a term block is created with an SSH connection and has no `session:daemonid` meta, the system SHALL automatically create an anonymous SessionDaemon. + +The flow: +1. `ControllerResync` detects SSH connection, no `session:daemonid` +2. Creates anonymous `SessionDaemon` (IsAnonymous=true, IdleTimeout=1h) +3. Writes `session:daemonid` to block meta +4. Triggers `ControllerResync` again +5. Second round detects `session:daemonid` → creates `SessionDaemonController` +6. `SessionDaemonController.Start()` → `daemon.EnsureStarted()` → `jobcontroller.StartJob()` + +#### Scenario: New SSH block creates anonymous daemon +- **WHEN** a user opens a new term block with an SSH connection +- **AND** the block has no `session:daemonid` +- **THEN** an anonymous SessionDaemon is created +- **AND** the block's controller becomes SessionDaemonController +- **AND** a remote job is started +- **AND** the user sees the terminal normally + +#### Scenario: Existing daemonid skips creation +- **WHEN** a user opens a term block +- **AND** `block.Meta["session:daemonid"]` is already set +- **THEN** the system uses the existing daemon directly (no auto-creation) + +#### Scenario: Local/WSL block does not create daemon +- **WHEN** a user opens a term block with a local or WSL connection +- **THEN** the system uses ShellController directly +- **AND** no SessionDaemon is created diff --git a/openspec/changes/session-daemon/specs/session-create-delete/spec.md b/openspec/changes/session-daemon/specs/session-create-delete/spec.md new file mode 100644 index 0000000000..fdb2ba29e4 --- /dev/null +++ b/openspec/changes/session-daemon/specs/session-create-delete/spec.md @@ -0,0 +1,57 @@ +## ADDED Requirements + +### Requirement: Create named SessionDaemon + +The system SHALL allow a user to create a named SessionDaemon via `wsh session create --name --connection `. + +A named SessionDaemon SHALL: +- Have a globally unique `Name` (conflict appends timestamp suffix like `dev-150623`) +- Have `IsAnonymous=false` +- Have `IdleTimeout=86400` (24h) by default +- Be persisted to DB with status `init` +- Start a remote job immediately via `jobcontroller.StartJob()` +- Transition to `running` when the remote JobManager confirms startup + +#### Scenario: Create named daemon successfully +- **WHEN** user runs `wsh session create --name dev --connection ssh:user@host` +- **THEN** a SessionDaemon record is created in DB with status `init` +- **AND** a remote job is started +- **AND** status transitions to `running` +- **AND** the daemon is registered in SessionDaemonManager + +#### Scenario: Create daemon with duplicate name +- **WHEN** user runs `wsh session create --name dev --connection ssh:host1` +- **AND** a daemon named `dev` already exists +- **THEN** the system creates with name `dev-` and notifies the user + +### Requirement: Create anonymous SessionDaemon + +The system SHALL automatically create an anonymous SessionDaemon when a new SSH block is started without a `session:daemonid`. + +An anonymous SessionDaemon SHALL: +- Have `Name=""` and `IsAnonymous=true` +- Have `IdleTimeout=3600` (1h) by default +- Be invisible to `wsh session list` by default (unless `--all` flag) +- Be upgradable to named via `wsh session tag --name ` + +#### Scenario: Auto-create anonymous daemon +- **WHEN** a user opens a new SSH term block +- **AND** the block has no `session:daemonid` meta +- **THEN** an anonymous SessionDaemon is created and attached to the block +- **AND** the process is transparent to the user (no UI indication) + +### Requirement: Delete SessionDaemon + +The system SHALL allow deleting a SessionDaemon via `wsh session delete `. + +Deletion SHALL: +- Call `TerminateAndDetachJob` on the associated job +- Detach all currently attached blocks (clear their `session:daemonid`) +- Set daemon status to `done` +- Remove daemon from SessionDaemonManager + +#### Scenario: Delete daemon +- **WHEN** user runs `wsh session delete dev` +- **THEN** the remote job is terminated +- **AND** all attached blocks have their `session:daemonid` cleared +- **AND** daemon status is set to `done` diff --git a/openspec/changes/session-daemon/specs/session-idle-timeout/spec.md b/openspec/changes/session-daemon/specs/session-idle-timeout/spec.md new file mode 100644 index 0000000000..5661701e0d --- /dev/null +++ b/openspec/changes/session-daemon/specs/session-idle-timeout/spec.md @@ -0,0 +1,42 @@ +## ADDED Requirements + +### Requirement: Idle timeout for anonymous daemons + +An anonymous SessionDaemon (`IsAnonymous=true`) SHALL have a default `IdleTimeout` of 3600 seconds (1 hour). + +When the last block is detached: +- A countdown timer starts for `IdleTimeout` +- If a block re-attaches before timeout, the timer is cancelled +- If the timer expires, `TerminateAndDetachJob` is called on the associated job +- Daemon status is set to `done` +- The daemon is removed from SessionDaemonManager + +#### Scenario: Anonymous daemon auto-reclaim +- **WHEN** the last block is detached from an anonymous daemon +- **AND** no block re-attaches within 1 hour +- **THEN** the daemon is terminated and status set to `done` + +#### Scenario: Re-attach cancels timer +- **WHEN** the last block is detached from an anonymous daemon +- **AND** a block re-attaches within 1 hour +- **THEN** the idle timer is cancelled +- **AND** the daemon continues running + +### Requirement: Idle timeout for named daemons + +A named SessionDaemon (`IsAnonymous=false`) SHALL have a default `IdleTimeout` of 86400 seconds (24 hours). + +The same timer mechanism applies. Named daemons have a longer timeout because they are intentionally created by the user. + +#### Scenario: Named daemon idle timeout +- **WHEN** all blocks are detached from a named daemon +- **AND** no block re-attaches within 24 hours +- **THEN** the daemon is terminated and status set to `done` + +### Requirement: Configurable idle timeout + +The system SHALL allow overriding `IdleTimeout` on daemon creation via `--idle-timeout ` flag. + +#### Scenario: Custom idle timeout +- **WHEN** user runs `wsh session create --name dev --connection ssh:host --idle-timeout 7200` +- **THEN** the daemon's IdleTimeout is set to 7200 seconds (2 hours) diff --git a/openspec/changes/session-daemon/specs/session-reconnect/spec.md b/openspec/changes/session-daemon/specs/session-reconnect/spec.md new file mode 100644 index 0000000000..9042b65b99 --- /dev/null +++ b/openspec/changes/session-daemon/specs/session-reconnect/spec.md @@ -0,0 +1,51 @@ +## ADDED Requirements + +### Requirement: Reconnect daemon after network recovery + +When the network reconnects after a disconnection, the system SHALL automatically attempt to reconnect all SessionDaemons whose status is `running` or `disconnected`. + +On reconnection: +- `onConnectionUp` finds all daemons with this connection name +- Calls `daemon.Reconnect()` → `jobcontroller.ReconnectJob()` +- If the remote jobmanager process is still alive, streaming resumes +- If the remote jobmanager is gone (`JobManagerGone: true`), daemon status set to `done` +- All attached blocks resume displaying output from the stream + +#### Scenario: Reconnect daemon after transient SSH drop +- **WHEN** the SSH connection drops and re-establishes +- **AND** the daemon status is `disconnected` +- **AND** the remote jobmanager is still alive +- **THEN** the daemon reconnects and streaming resumes +- **AND** attached blocks display the continued output + +### Requirement: TerminateOnReconnect for closed blocks + +When a block is closed while the network is down, the system SHALL set `TerminateOnReconnect=true` on the associated Job. On reconnection, the remote jobmanager SHALL be terminated instead of reconnected. + +This ensures that a user closing a block while offline does not leave a stale remote process. + +#### Scenario: Block closed offline, remote cleaned on reconnect +- **WHEN** a user closes a block while the SSH connection is down +- **THEN** `TerminateOnReconnect=true` is persisted in the DB +- **WHEN** the network reconnects +- **THEN** the jobmanager is terminated via SIGTERM +- **AND** no orphaned processes remain on the remote side + +### Requirement: Restart recovery + +When WaveTerm restarts, `SessionDaemonManager.InitFromDB()` SHALL: +1. Load all daemons with status `running` or `disconnected` from DB +2. For each, call `jobcontroller.ReconnectJob()` to reconnect +3. Blocks with `session:daemonid` pointing to a daemon that no longer exists SHALL have their `session:daemonid` cleared and trigger a new ControllerResync + +#### Scenario: Restart with active daemon +- **WHEN** WaveTerm restarts +- **AND** a daemon has status `running` in DB +- **THEN** InitFromDB() loads the daemon and reconnects +- **AND** attached blocks display the resumed output + +#### Scenario: Restart with stale daemonid +- **WHEN** WaveTerm restarts +- **AND** a block has `session:daemonid` pointing to a non-existent daemon +- **THEN** the daemonid is cleared +- **AND** the block falls back to its default controller diff --git a/openspec/changes/session-daemon/specs/session-wsh-cli/spec.md b/openspec/changes/session-daemon/specs/session-wsh-cli/spec.md new file mode 100644 index 0000000000..47c5159244 --- /dev/null +++ b/openspec/changes/session-daemon/specs/session-wsh-cli/spec.md @@ -0,0 +1,88 @@ +## ADDED Requirements + +### Requirement: wsh session create + +The system SHALL provide a `wsh session create` command. + +``` +wsh session create --name --connection [--idle-timeout ] +``` + +This command SHALL: +- Create a new named SessionDaemon and persist to DB +- Start a remote job immediately +- Return the daemon OID + +### Requirement: wsh session delete + +The system SHALL provide a `wsh session delete` command. + +``` +wsh session delete +``` + +This command SHALL: +- Terminate the associated remote job +- Detach all attached blocks +- Set daemon status to `done` + +### Requirement: wsh session list + +The system SHALL provide a `wsh session list` command. + +``` +wsh session list [--all] +``` + +Without `--all`, only named daemons (IsAnonymous=false) are shown. +With `--all`, anonymous daemons are also shown. + +### Requirement: wsh session attach + +The system SHALL provide a `wsh session attach` command. + +``` +wsh session attach --block +``` + +### Requirement: wsh session detach + +The system SHALL provide a `wsh session detach` command. + +``` +wsh session detach --block +``` + +### Requirement: wsh session info + +The system SHALL provide a `wsh session info` command. + +``` +wsh session info +``` + +This command SHALL display: +- Name, Status, Connection, CreatedAt +- JobId +- List of currently attached block IDs +- Time remaining before idle timeout (if no blocks attached) + +### Requirement: wsh session tag + +The system SHALL provide a `wsh session tag` command to convert an anonymous daemon to a named one. + +``` +wsh session tag --name +``` + +After tagging, the daemon SHALL: +- Have `Name` set to the provided name +- Have `IsAnonymous=false` +- Have `IdleTimeout` updated to 24h +- Appear in `wsh session list` output + +#### Scenario: Tag anonymous daemon +- **WHEN** user runs `wsh session tag sd-abc --name dev` +- **THEN** the daemon's Name is set to `dev` +- **AND** IsAnonymous is set to `false` +- **AND** IdleTimeout is updated to 24h diff --git a/openspec/changes/session-daemon/tasks.md b/openspec/changes/session-daemon/tasks.md new file mode 100644 index 0000000000..0876e78579 --- /dev/null +++ b/openspec/changes/session-daemon/tasks.md @@ -0,0 +1,65 @@ +## 1. Data Model — SessionDaemon DB 记录 + +- [x] 1.1 在 `pkg/waveobj/` 新增 `SessionDaemon` struct(OID, Name, Connection, JobId, IsAnonymous, Status, Cwd, CreatedAt, IdleTimeout, Meta) +- [x] 1.2 在 DB 创建 `sessiondaemon` 表(或扩展现有 schema) +- [x] 1.3 新增 `MetaKey_SessionDaemonId = "session:daemonid"` 常量 +- [x] 1.4 在 `MetaTSType` 新增 `SessionDaemonId string` 字段 + +## 2. SessionDaemon + SessionDaemonManager + +- [x] 2.1 新建 `pkg/sessiondaemon/` 包 +- [x] 2.2 实现 `SessionDaemon` struct(daemonId, name, jobId, InputSessionId, seqNum, blocks, status) +- [x] 2.3 实现 `SessionDaemon.Start()` → `jobcontroller.StartJob()` +- [x] 2.4 实现 `SessionDaemon.Reconnect()` → `jobcontroller.ReconnectJob()` +- [x] 2.5 实现 `SessionDaemon.Stop()` → `jobcontroller.TerminateJob()` +- [x] 2.6 实现 `SessionDaemon.SendInput()` → `jobcontroller.SendInput()` +- [x] 2.7 实现 `SessionDaemonManager`(map, GetOrCreate, Get, Remove, InitFromDB) +- [x] 2.8 实现 AttachBlock / DetachBlock / GetBlocksForDaemon +- [x] 2.9 实现空闲超时回收 goroutine(检查 IdleTimeout,定时扫描) + +## 3. SessionDaemonController + +- [x] 3.1 新建 `pkg/blockcontroller/sessiondaemoncontroller.go` +- [x] 3.2 实现 `SessionDaemonController` struct + Controller 接口方法(Start, SendInput, Stop, GetRuntimeStatus, Resync) +- [x] 3.3 修改 `ResyncController` 调度:检测 `session:daemonid` 走 SessionDaemonController +- [x] 3.4 修改 ResyncController:SSH block + 无 daemonid 时自动创建匿名 daemon + +## 4. DurableShellController 移除 + +- [x] 4.1 删除 `pkg/blockcontroller/durableshellcontroller.go` +- [x] 4.2 移除 `ResyncController` 中的 DurableShellController 分支 +- [x] 4.3 移除 `IsBlockIdTermDurable` 调用(不再需要) + +## 5. 输出流修改 + +- [x] 5.1 `runOutputLoop` 中的 `handleAppendJobFile` 不再写 `block:blockId/term`(只写 `job:jobId/term`) +- [x] 5.2 前端 TermWrap 支持动态切换 zoneId(block ↔ job) + +## 6. wsh CLI 命令 + +- [x] 6.1 新建 `cmd/wsh/cmd/wshcmd-session.go` +- [x] 6.2 实现 `wsh session create`(支持 --name, --connection, --idle-timeout) +- [x] 6.3 实现 `wsh session delete` +- [x] 6.4 实现 `wsh session list`(支持 --all 显示匿名 daemon) +- [x] 6.5 实现 `wsh session attach` +- [x] 6.6 实现 `wsh session detach` +- [x] 6.7 实现 `wsh session info` +- [x] 6.8 实现 `wsh session tag`(匿名转命名) + +## 7. 前端 + +- [ ] 7.1 Block header 显示 daemon 名称和状态(dev ● / dev ◌ / dev ✗) +- [ ] 7.2 右键菜单 / header 下拉添加 attach/detach 入口 +- [ ] 7.3 TermWrap 实现 `attachToDaemon(jobId)` 和 `detachFromDaemon()` + +## 8. Migration + +- [x] 8.1 编写 DB migration:创建 sessiondaemon 表 +- [x] 8.2 扫描所有 Block.JobId != "" 的记录,迁移到 SessionDaemon +- [x] 8.3 迁移完成后清理旧 block:blockId/term 文件(数据已合并到 job:jobId/term) + +## 9. Build & Verify + +- [x] 9.1 编译通过(`go build ./...`) +- [x] 9.2 前端 build 通过(`npm run build:prod`) +- [x] 9.3 `task package` 构建成功 diff --git a/pkg/blockcontroller/blockcontroller.go b/pkg/blockcontroller/blockcontroller.go index 75f1938e12..531244f771 100644 --- a/pkg/blockcontroller/blockcontroller.go +++ b/pkg/blockcontroller/blockcontroller.go @@ -16,7 +16,6 @@ import ( "github.com/google/uuid" "github.com/wavetermdev/waveterm/pkg/blocklogger" "github.com/wavetermdev/waveterm/pkg/filestore" - "github.com/wavetermdev/waveterm/pkg/jobcontroller" "github.com/wavetermdev/waveterm/pkg/remote" "github.com/wavetermdev/waveterm/pkg/remote/conncontroller" "github.com/wavetermdev/waveterm/pkg/util/ds" @@ -187,8 +186,17 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts return nil } - // Determine if we should use DurableShellController vs ShellController - shouldUseDurableShellController := controllerName == BlockController_Shell && jobcontroller.IsBlockIdTermDurable(blockId) + // Check for SessionDaemon controller + daemonId := blockData.Meta.GetString(waveobj.MetaKey_SessionDaemonId, "") + + // Auto-create anonymous daemon for SSH blocks without daemonid + if daemonId == "" && controllerName == BlockController_Shell && !conncontroller.IsLocalConnName(connName) && !conncontroller.IsWslConnName(connName) { + newDaemonId, err := autoCreateSessionDaemon(ctx, blockId, blockData.Meta, connName, rtOpts) + if err != nil { + return fmt.Errorf("auto-create session daemon: %w", err) + } + daemonId = newDaemonId + } // Check if we need to morph controller type if existing != nil { @@ -196,13 +204,11 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts switch existing.(type) { case *ShellController: - if controllerName != BlockController_Shell && controllerName != BlockController_Cmd { - needsReplace = true - } else if shouldUseDurableShellController { + if daemonId != "" || (controllerName != BlockController_Shell && controllerName != BlockController_Cmd) { needsReplace = true } - case *DurableShellController: - if !shouldUseDurableShellController { + case *SessionDaemonController: + if daemonId == "" { needsReplace = true } case *TsunamiController: @@ -242,17 +248,18 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts if existing != nil { controller = existing } else { - // Create new controller based on type - switch controllerName { - case BlockController_Shell, BlockController_Cmd: - if shouldUseDurableShellController { - controller = MakeDurableShellController(tabId, blockId, controllerName, connName) - } else { - controller = MakeShellController(tabId, blockId, controllerName, connName) - } + switch { + case daemonId != "": + sdc := MakeSessionDaemonController(tabId, blockId, connName) + sdc.DaemonId = daemonId + controller = sdc + registerController(blockId, controller) + + case controllerName == BlockController_Shell || controllerName == BlockController_Cmd: + controller = MakeShellController(tabId, blockId, controllerName, connName) registerController(blockId, controller) - case BlockController_Tsunami: + case controllerName == BlockController_Tsunami: controller = MakeTsunamiController(tabId, blockId, connName) registerController(blockId, controller) diff --git a/pkg/blockcontroller/durableshellcontroller.go b/pkg/blockcontroller/durableshellcontroller.go deleted file mode 100644 index a21dac153b..0000000000 --- a/pkg/blockcontroller/durableshellcontroller.go +++ /dev/null @@ -1,277 +0,0 @@ -// Copyright 2025, Command Line Inc. -// SPDX-License-Identifier: Apache-2.0 - -package blockcontroller - -import ( - "context" - "encoding/base64" - "fmt" - "io/fs" - "log" - "sync" - "time" - - "github.com/google/uuid" - "github.com/wavetermdev/waveterm/pkg/filestore" - "github.com/wavetermdev/waveterm/pkg/jobcontroller" - "github.com/wavetermdev/waveterm/pkg/remote" - "github.com/wavetermdev/waveterm/pkg/remote/conncontroller" - "github.com/wavetermdev/waveterm/pkg/shellexec" - "github.com/wavetermdev/waveterm/pkg/util/shellutil" - "github.com/wavetermdev/waveterm/pkg/utilds" - "github.com/wavetermdev/waveterm/pkg/wavebase" - "github.com/wavetermdev/waveterm/pkg/waveobj" - "github.com/wavetermdev/waveterm/pkg/wps" - "github.com/wavetermdev/waveterm/pkg/wshrpc" - "github.com/wavetermdev/waveterm/pkg/wshrpc/wshclient" - "github.com/wavetermdev/waveterm/pkg/wshutil" - "github.com/wavetermdev/waveterm/pkg/wstore" -) - -type DurableShellController struct { - Lock *sync.Mutex - - ControllerType string - TabId string - BlockId string - ConnName string - BlockDef *waveobj.BlockDef - VersionTs utilds.VersionTs - - InputSessionId string // random uuid - inputSeqNum int // monotonic sequence number for inputs, starts at 1 - - JobId string - LastKnownStatus string -} - -func MakeDurableShellController(tabId string, blockId string, controllerType string, connName string) Controller { - return &DurableShellController{ - Lock: &sync.Mutex{}, - ControllerType: controllerType, - TabId: tabId, - BlockId: blockId, - ConnName: connName, - LastKnownStatus: Status_Init, - InputSessionId: uuid.New().String(), - } -} - -func (dsc *DurableShellController) WithLock(f func()) { - dsc.Lock.Lock() - defer dsc.Lock.Unlock() - f() -} - -func (dsc *DurableShellController) getJobId() string { - dsc.Lock.Lock() - defer dsc.Lock.Unlock() - return dsc.JobId -} - -func (dsc *DurableShellController) getNextInputSeq() (string, int) { - dsc.Lock.Lock() - defer dsc.Lock.Unlock() - dsc.inputSeqNum++ - return dsc.InputSessionId, dsc.inputSeqNum -} - -func (dsc *DurableShellController) getJobStatus_withlock() string { - if dsc.JobId == "" { - dsc.LastKnownStatus = Status_Init - return Status_Init - } - status, err := jobcontroller.GetJobManagerStatus(context.Background(), dsc.JobId) - if err != nil { - log.Printf("error getting job status for %s: %v, using last known status: %s", dsc.JobId, err, dsc.LastKnownStatus) - return dsc.LastKnownStatus - } - dsc.LastKnownStatus = status - return status -} - -func (dsc *DurableShellController) getRuntimeStatus_withlock() BlockControllerRuntimeStatus { - var rtn BlockControllerRuntimeStatus - rtn.Version = dsc.VersionTs.GetVersionTs() - rtn.BlockId = dsc.BlockId - rtn.ShellProcStatus = dsc.getJobStatus_withlock() - rtn.ShellProcConnName = dsc.ConnName - return rtn -} - -func (dsc *DurableShellController) GetRuntimeStatus() *BlockControllerRuntimeStatus { - var rtn BlockControllerRuntimeStatus - dsc.WithLock(func() { - rtn = dsc.getRuntimeStatus_withlock() - }) - return &rtn -} - -func (dsc *DurableShellController) GetConnName() string { - dsc.Lock.Lock() - defer dsc.Lock.Unlock() - return dsc.ConnName -} - -func (dsc *DurableShellController) sendUpdate_withlock() { - rtStatus := dsc.getRuntimeStatus_withlock() - log.Printf("sending blockcontroller update %#v\n", rtStatus) - wps.Broker.Publish(wps.WaveEvent{ - Event: wps.Event_ControllerStatus, - Scopes: []string{ - waveobj.MakeORef(waveobj.OType_Tab, dsc.TabId).String(), - waveobj.MakeORef(waveobj.OType_Block, dsc.BlockId).String(), - }, - Data: rtStatus, - }) -} - -// Start initializes or reconnects to a durable shell for the block. -// Logic: -// - If block has no existing jobId: starts a new job and attaches it -// - If block has existing jobId with running job manager: reconnects to existing job -// - If block has existing jobId with non-running job manager: -// - force=true: detaches old job and starts new one -// - force=false: returns without starting (leaves block unstarted) -// -// After establishing jobId, ensures job connection is active (reconnects if needed) -func (dsc *DurableShellController) Start(ctx context.Context, blockMeta waveobj.MetaMapType, rtOpts *waveobj.RuntimeOpts, force bool) error { - blockData, err := wstore.DBMustGet[*waveobj.Block](ctx, dsc.BlockId) - if err != nil { - return fmt.Errorf("error getting block: %w", err) - } - - if conncontroller.IsLocalConnName(dsc.ConnName) { - return fmt.Errorf("durable shell controller requires a remote connection") - } - - var jobId string - if blockData.JobId != "" { - status, err := jobcontroller.GetJobManagerStatus(ctx, blockData.JobId) - if err != nil { - return fmt.Errorf("error getting job manager status: %w", err) - } - if status == jobcontroller.JobManagerStatus_Running { - jobId = blockData.JobId - } else if !force { - log.Printf("block %q has jobId %s but manager is not running (status: %s), not starting (force=false)\n", dsc.BlockId, blockData.JobId, status) - return nil - } else { - log.Printf("block %q has jobId %s but manager is not running (status: %s), starting new job (force=true)\n", dsc.BlockId, blockData.JobId, status) - // intentionally leave jobId empty to trigger starting a new job below - } - } - - if jobId == "" { - log.Printf("block %q starting new durable shell\n", dsc.BlockId) - fsErr := filestore.WFS.MakeFile(ctx, dsc.BlockId, wavebase.BlockFile_Term, nil, wshrpc.FileOpts{MaxSize: DefaultTermMaxFileSize, Circular: true}) - if fsErr != nil && fsErr != fs.ErrExist { - return fmt.Errorf("error creating block term file: %w", fsErr) - } - newJobId, err := dsc.startNewJob(ctx, blockMeta, dsc.ConnName, rtOpts) - if err != nil { - return fmt.Errorf("failed to start new job: %w", err) - } - jobId = newJobId - } - - dsc.WithLock(func() { - dsc.JobId = jobId - dsc.sendUpdate_withlock() - }) - - err = jobcontroller.ReconnectJob(ctx, jobId, rtOpts) - if err != nil { - return fmt.Errorf("failed to reconnect to job: %w", err) - } - - return nil -} - -func (dsc *DurableShellController) Stop(graceful bool, newStatus string, destroy bool) { - if !destroy { - return - } - jobId := dsc.getJobId() - if jobId == "" { - return - } - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - jobcontroller.TerminateAndDetachJob(ctx, jobId) -} - -func (dsc *DurableShellController) SendInput(inputUnion *BlockInputUnion) error { - if inputUnion == nil { - return nil - } - jobId := dsc.getJobId() - if jobId == "" { - return fmt.Errorf("no job attached to controller") - } - inputSessionId, seqNum := dsc.getNextInputSeq() - data := wshrpc.CommandJobInputData{ - JobId: jobId, - InputSessionId: inputSessionId, - SeqNum: seqNum, - TermSize: inputUnion.TermSize, - SigName: inputUnion.SigName, - } - if len(inputUnion.InputData) > 0 { - data.InputData64 = base64.StdEncoding.EncodeToString(inputUnion.InputData) - } - return jobcontroller.SendInput(context.Background(), data) -} - -func (dsc *DurableShellController) startNewJob(ctx context.Context, blockMeta waveobj.MetaMapType, connName string, rtOpts *waveobj.RuntimeOpts) (string, error) { - termSize := waveobj.TermSize{ - Rows: shellutil.DefaultTermRows, - Cols: shellutil.DefaultTermCols, - } - if rtOpts != nil && rtOpts.TermSize.Rows > 0 && rtOpts.TermSize.Cols > 0 { - termSize = rtOpts.TermSize - } - cmdStr := blockMeta.GetString(waveobj.MetaKey_Cmd, "") - cwd := blockMeta.GetString(waveobj.MetaKey_CmdCwd, "") - opts, err := remote.ParseOpts(connName) - if err != nil { - return "", fmt.Errorf("invalid ssh remote name (%s): %w", connName, err) - } - conn := conncontroller.MaybeGetConn(opts) - if conn == nil { - return "", fmt.Errorf("connection %q not found", connName) - } - connRoute := wshutil.MakeConnectionRouteId(connName) - remoteInfo, err := wshclient.RemoteGetInfoCommand(wshclient.GetBareRpcClient(), &wshrpc.RpcOpts{Route: connRoute, Timeout: 2000}) - if err != nil { - return "", fmt.Errorf("unable to obtain remote info from connserver: %w", err) - } - shellType := shellutil.GetShellTypeFromShellPath(remoteInfo.Shell) - swapToken := makeSwapToken(ctx, ctx, dsc.BlockId, blockMeta, connName, shellType) - sockName := wavebase.GetPersistentRemoteSockName(wstore.GetClientId()) - rpcContext := wshrpc.RpcContext{ - ProcRoute: true, - SockName: sockName, - BlockId: dsc.BlockId, - Conn: connName, - } - jwtStr, err := wshutil.MakeClientJWTToken(rpcContext) - if err != nil { - return "", fmt.Errorf("error making jwt token: %w", err) - } - swapToken.RpcContext = &rpcContext - swapToken.Env[wshutil.WaveJwtTokenVarName] = jwtStr - cmdOpts := shellexec.CommandOptsType{ - Interactive: true, - Login: true, - Cwd: cwd, - SwapToken: swapToken, - ForceJwt: blockMeta.GetBool(waveobj.MetaKey_CmdJwt, false), - } - jobId, err := shellexec.StartRemoteShellJob(ctx, ctx, termSize, cmdStr, cmdOpts, conn, dsc.BlockId) - if err != nil { - return "", fmt.Errorf("failed to start durable shell: %w", err) - } - return jobId, nil -} diff --git a/pkg/blockcontroller/sessiondaemoncontroller.go b/pkg/blockcontroller/sessiondaemoncontroller.go new file mode 100644 index 0000000000..8babe6f90d --- /dev/null +++ b/pkg/blockcontroller/sessiondaemoncontroller.go @@ -0,0 +1,272 @@ +package blockcontroller + +import ( + "context" + "fmt" + "io/fs" + "log" + "sync" + "time" + + "github.com/google/uuid" + "github.com/wavetermdev/waveterm/pkg/filestore" + "github.com/wavetermdev/waveterm/pkg/jobcontroller" + "github.com/wavetermdev/waveterm/pkg/remote" + "github.com/wavetermdev/waveterm/pkg/remote/conncontroller" + "github.com/wavetermdev/waveterm/pkg/sessiondaemon" + "github.com/wavetermdev/waveterm/pkg/shellexec" + "github.com/wavetermdev/waveterm/pkg/util/shellutil" + "github.com/wavetermdev/waveterm/pkg/wavebase" + "github.com/wavetermdev/waveterm/pkg/waveobj" + "github.com/wavetermdev/waveterm/pkg/wps" + "github.com/wavetermdev/waveterm/pkg/wshrpc" + "github.com/wavetermdev/waveterm/pkg/wshrpc/wshclient" + "github.com/wavetermdev/waveterm/pkg/wshutil" + "github.com/wavetermdev/waveterm/pkg/wstore" +) + +type SessionDaemonController struct { + Lock *sync.Mutex + + BlockId string + ConnName string + DaemonId string + TabId string + InputSessionId string + inputSeqNum int + versionTs int64 +} + +func MakeSessionDaemonController(tabId string, blockId string, connName string) *SessionDaemonController { + return &SessionDaemonController{ + Lock: &sync.Mutex{}, + BlockId: blockId, + ConnName: connName, + TabId: tabId, + InputSessionId: uuid.New().String(), + versionTs: 1, + } +} + +func (sdc *SessionDaemonController) WithLock(f func()) { + sdc.Lock.Lock() + defer sdc.Lock.Unlock() + f() +} + +func (sdc *SessionDaemonController) getNextInputSeq() (string, int) { + sdc.Lock.Lock() + defer sdc.Lock.Unlock() + sdc.inputSeqNum++ + return sdc.InputSessionId, sdc.inputSeqNum +} + +func (sdc *SessionDaemonController) Start(ctx context.Context, blockMeta waveobj.MetaMapType, rtOpts *waveobj.RuntimeOpts, force bool) error { + daemon := sessiondaemon.Manager.Get(sdc.DaemonId) + if daemon == nil { + return fmt.Errorf("session daemon %s not found in manager", sdc.DaemonId) + } + + sessiondaemon.Manager.AttachBlock(ctx, sdc.DaemonId, sdc.BlockId) + + dbDaemon, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, sdc.DaemonId) + if err != nil { + return fmt.Errorf("error getting session daemon: %w", err) + } + + if dbDaemon.JobId != "" { + status, err := jobcontroller.GetJobManagerStatus(ctx, dbDaemon.JobId) + if err == nil && status == jobcontroller.JobManagerStatus_Running { + sdc.WithLock(func() { + sdc.incrementVersion() + sdc.sendControllerStatus() + }) + return nil + } + } + + // Terminate old job if it exists (crashed or network issue) + if dbDaemon.JobId != "" { + jobcontroller.TerminateAndDetachJob(ctx, dbDaemon.JobId) + } + + fsErr := filestore.WFS.MakeFile(ctx, sdc.BlockId, wavebase.BlockFile_Term, nil, wshrpc.FileOpts{MaxSize: DefaultTermMaxFileSize, Circular: true}) + if fsErr != nil && fsErr != fs.ErrExist { + return fmt.Errorf("error creating block term file: %w", fsErr) + } + + jobId, err := sdc.startNewJob(ctx, blockMeta, rtOpts) + if err != nil { + return fmt.Errorf("failed to start job: %w", err) + } + + err = daemon.SetJobId(ctx, dbDaemon, jobId) + if err != nil { + return fmt.Errorf("failed to set job id on daemon: %w", err) + } + + sdc.WithLock(func() { + sdc.incrementVersion() + sdc.sendControllerStatus() + }) + return nil +} + +func (sdc *SessionDaemonController) startNewJob(ctx context.Context, blockMeta waveobj.MetaMapType, rtOpts *waveobj.RuntimeOpts) (string, error) { + termSize := waveobj.TermSize{ + Rows: shellutil.DefaultTermRows, + Cols: shellutil.DefaultTermCols, + } + if rtOpts != nil && rtOpts.TermSize.Rows > 0 && rtOpts.TermSize.Cols > 0 { + termSize = rtOpts.TermSize + } + cmdStr := blockMeta.GetString(waveobj.MetaKey_Cmd, "") + cwd := blockMeta.GetString(waveobj.MetaKey_CmdCwd, "") + opts, err := remote.ParseOpts(sdc.ConnName) + if err != nil { + return "", fmt.Errorf("invalid ssh remote name (%s): %w", sdc.ConnName, err) + } + conn := conncontroller.MaybeGetConn(opts) + if conn == nil { + return "", fmt.Errorf("connection %q not found", sdc.ConnName) + } + connRoute := wshutil.MakeConnectionRouteId(sdc.ConnName) + remoteInfo, err := wshclient.RemoteGetInfoCommand(wshclient.GetBareRpcClient(), &wshrpc.RpcOpts{Route: connRoute, Timeout: 2000}) + if err != nil { + return "", fmt.Errorf("unable to obtain remote info from connserver: %w", err) + } + shellType := shellutil.GetShellTypeFromShellPath(remoteInfo.Shell) + swapToken := makeSwapToken(ctx, ctx, sdc.BlockId, blockMeta, sdc.ConnName, shellType) + sockName := wavebase.GetPersistentRemoteSockName(wstore.GetClientId()) + rpcContext := wshrpc.RpcContext{ + ProcRoute: true, + SockName: sockName, + BlockId: sdc.BlockId, + Conn: sdc.ConnName, + } + jwtStr, err := wshutil.MakeClientJWTToken(rpcContext) + if err != nil { + return "", fmt.Errorf("error making jwt token: %w", err) + } + swapToken.RpcContext = &rpcContext + swapToken.Env[wshutil.WaveJwtTokenVarName] = jwtStr + cmdOpts := shellexec.CommandOptsType{ + Interactive: true, + Login: true, + Cwd: cwd, + SwapToken: swapToken, + ForceJwt: blockMeta.GetBool(waveobj.MetaKey_CmdJwt, false), + } + jobId, err := shellexec.StartRemoteShellJob(ctx, ctx, termSize, cmdStr, cmdOpts, conn, sdc.BlockId) + if err != nil { + return "", fmt.Errorf("failed to start remote shell job: %w", err) + } + return jobId, nil +} + +func (sdc *SessionDaemonController) Stop(graceful bool, newStatus string, destroy bool) { + if !destroy { + return + } + ctx := context.Background() + sessiondaemon.Manager.DetachBlock(ctx, sdc.DaemonId, sdc.BlockId) + dbDaemon, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, sdc.DaemonId) + if err != nil { + return + } + if dbDaemon.IsAnonymous && len(sessiondaemon.Manager.GetBlocksForDaemon(sdc.DaemonId)) == 0 { + daemon := sessiondaemon.Manager.Get(sdc.DaemonId) + if daemon != nil { + daemon.Stop(ctx) + } + sessiondaemon.Manager.Remove(sdc.DaemonId) + wstore.DBUpdateFn(ctx, sdc.DaemonId, func(sd *waveobj.SessionDaemon) { + sd.Status = "done" + }) + } +} + +func (sdc *SessionDaemonController) SendInput(inputUnion *BlockInputUnion) error { + if inputUnion == nil { + return nil + } + daemon := sessiondaemon.Manager.Get(sdc.DaemonId) + if daemon == nil { + return fmt.Errorf("session daemon %s not found", sdc.DaemonId) + } + return daemon.SendInput(context.Background(), inputUnion.InputData, inputUnion.SigName, inputUnion.TermSize) +} + +func (sdc *SessionDaemonController) GetRuntimeStatus() *BlockControllerRuntimeStatus { + var rtn BlockControllerRuntimeStatus + sdc.WithLock(func() { + rtn.BlockId = sdc.BlockId + rtn.ShellProcConnName = sdc.ConnName + rtn.Version = sdc.versionTs + daemon := sessiondaemon.Manager.Get(sdc.DaemonId) + if daemon != nil { + if daemon.JobId == "" { + rtn.ShellProcStatus = "init" + } else { + rtn.ShellProcStatus = "running" + } + } else { + rtn.ShellProcStatus = "done" + } + }) + return &rtn +} + +func (sdc *SessionDaemonController) incrementVersion() { + sdc.Lock.Lock() + defer sdc.Lock.Unlock() + sdc.versionTs++ +} + +func (sdc *SessionDaemonController) GetConnName() string { + return sdc.ConnName +} + +func (sdc *SessionDaemonController) sendControllerStatus() { + rtStatus := sdc.GetRuntimeStatus() + log.Printf("sending blockcontroller update %#v\n", rtStatus) + wps.Broker.Publish(wps.WaveEvent{ + Event: wps.Event_ControllerStatus, + Scopes: []string{ + waveobj.MakeORef(waveobj.OType_Tab, sdc.TabId).String(), + waveobj.MakeORef(waveobj.OType_Block, sdc.BlockId).String(), + }, + Data: rtStatus, + }) +} + +func autoCreateSessionDaemon(ctx context.Context, blockId string, blockMeta waveobj.MetaMapType, connName string, rtOpts *waveobj.RuntimeOpts) (string, error) { + dbDaemon := &waveobj.SessionDaemon{ + OID: uuid.New().String(), + Name: "", + Connection: connName, + IsAnonymous: true, + Status: "init", + CreatedAt: time.Now().UnixMilli(), + IdleTimeout: sessiondaemon.DefaultAnonymousIdleTimeout, + } + + err := wstore.DBInsert(ctx, dbDaemon) + if err != nil { + return "", fmt.Errorf("insert session daemon: %w", err) + } + + err = wstore.DBUpdateFn(ctx, blockId, func(block *waveobj.Block) { + block.Meta[waveobj.MetaKey_SessionDaemonId] = dbDaemon.OID + }) + if err != nil { + return "", fmt.Errorf("update block meta: %w", err) + } + + _, err = sessiondaemon.Manager.GetOrCreate(ctx, dbDaemon) + if err != nil { + return "", fmt.Errorf("create session daemon in manager: %w", err) + } + + return dbDaemon.OID, nil +} diff --git a/pkg/jobcontroller/jobcontroller.go b/pkg/jobcontroller/jobcontroller.go index e41d77585c..73e3dc3587 100644 --- a/pkg/jobcontroller/jobcontroller.go +++ b/pkg/jobcontroller/jobcontroller.go @@ -30,7 +30,6 @@ import ( "github.com/wavetermdev/waveterm/pkg/wavebase" "github.com/wavetermdev/waveterm/pkg/wavejwt" "github.com/wavetermdev/waveterm/pkg/waveobj" - "github.com/wavetermdev/waveterm/pkg/wconfig" "github.com/wavetermdev/waveterm/pkg/wcore" "github.com/wavetermdev/waveterm/pkg/wps" "github.com/wavetermdev/waveterm/pkg/wshrpc" @@ -792,23 +791,7 @@ func doWFSAppend(ctx context.Context, oref waveobj.ORef, fileName string, data [ } func handleAppendJobFile(ctx context.Context, jobId string, fileName string, data []byte) error { - err := doWFSAppend(ctx, waveobj.MakeORef(waveobj.OType_Job, jobId), fileName, data) - if err != nil { - return fmt.Errorf("error appending to job file: %w", err) - } - - job, err := wstore.DBGet[*waveobj.Job](ctx, jobId) - if err != nil { - return fmt.Errorf("error getting job: %w", err) - } - if job != nil && job.AttachedBlockId != "" { - err = doWFSAppend(ctx, waveobj.MakeORef(waveobj.OType_Block, job.AttachedBlockId), fileName, data) - if err != nil { - return fmt.Errorf("error appending to block file: %w", err) - } - } - - return nil + return doWFSAppend(ctx, waveobj.MakeORef(waveobj.OType_Job, jobId), fileName, data) } func runOutputLoop(ctx context.Context, jobId string, streamId string, reader *streamclient.Reader) { @@ -1345,59 +1328,6 @@ func restartStreaming(ctx context.Context, jobId string, knownConnected bool, rt } // this function must be kept up to date with getBlockTermDurableAtom in frontend/app/store/global.ts -func IsBlockTermDurable(block *waveobj.Block) bool { - if block == nil { - return false - } - - // Check if view is "term", and controller is "shell" - if block.Meta.GetString(waveobj.MetaKey_View, "") != "term" || block.Meta.GetString(waveobj.MetaKey_Controller, "") != "shell" { - return false - } - - // 1. Check if block has a JobId - if block.JobId != "" { - return true - } - - // 2. Check if connection is local or WSL (not durable) - connName := block.Meta.GetString(waveobj.MetaKey_Connection, "") - if conncontroller.IsLocalConnName(connName) || conncontroller.IsWslConnName(connName) { - return false - } - - // 3. Check config hierarchy: blockmeta → connection → global (default true) - // Check block meta first - if val, exists := block.Meta[waveobj.MetaKey_TermDurable]; exists { - if boolVal, ok := val.(bool); ok { - return boolVal - } - } - // Check connection config - fullConfig := wconfig.GetWatcher().GetFullConfig() - if connName != "" { - if connConfig, exists := fullConfig.Connections[connName]; exists { - if connConfig.TermDurable != nil { - return *connConfig.TermDurable - } - } - } - // Check global settings - if fullConfig.Settings.TermDurable != nil { - return *fullConfig.Settings.TermDurable - } - // Default to true for non-local connections - return true -} - -func IsBlockIdTermDurable(blockId string) bool { - block, err := wstore.DBGet[*waveobj.Block](context.Background(), blockId) - if err != nil || block == nil { - return false - } - return IsBlockTermDurable(block) -} - func DeleteJob(ctx context.Context, jobId string) error { SetJobConnStatus(jobId, JobConnStatus_Disconnected) jobTerminationMessageWritten.Delete(jobId) diff --git a/pkg/sessiondaemon/sessiondaemon.go b/pkg/sessiondaemon/sessiondaemon.go new file mode 100644 index 0000000000..2b1c072d80 --- /dev/null +++ b/pkg/sessiondaemon/sessiondaemon.go @@ -0,0 +1,290 @@ +package sessiondaemon + +import ( + "context" + "encoding/base64" + "fmt" + "log" + "sync" + "time" + + "github.com/google/uuid" + "github.com/wavetermdev/waveterm/pkg/jobcontroller" + "github.com/wavetermdev/waveterm/pkg/waveobj" + "github.com/wavetermdev/waveterm/pkg/wshrpc" + "github.com/wavetermdev/waveterm/pkg/wstore" +) + +const ( + DefaultAnonymousIdleTimeout = 3600 // 1h + DefaultNamedIdleTimeout = 86400 // 24h + IdleCheckInterval = 60 // 检查间隔(秒) +) + +type SessionDaemon struct { + Lock sync.Mutex + + DaemonId string + Name string + JobId string + InputSessionId string + SeqNum int + Blocks map[string]bool +} + +type SessionDaemonManager struct { + Lock sync.Mutex + Daemons map[string]*SessionDaemon +} + +var Manager = &SessionDaemonManager{ + Daemons: make(map[string]*SessionDaemon), +} + +func (sd *SessionDaemon) GetNextInputSeq() (string, int) { + sd.Lock.Lock() + defer sd.Lock.Unlock() + sd.SeqNum++ + return sd.InputSessionId, sd.SeqNum +} + +func (sd *SessionDaemon) HasAttachedBlocks() bool { + sd.Lock.Lock() + defer sd.Lock.Unlock() + return len(sd.Blocks) > 0 +} + +func (sd *SessionDaemon) HasBlock(blockId string) bool { + sd.Lock.Lock() + defer sd.Lock.Unlock() + return sd.Blocks[blockId] +} + +func (sd *SessionDaemon) SetJobId(ctx context.Context, dbDaemon *waveobj.SessionDaemon, jobId string) error { + sd.Lock.Lock() + sd.JobId = jobId + sd.Lock.Unlock() + + err := wstore.DBUpdateFn(ctx, dbDaemon.OID, func(sdDb *waveobj.SessionDaemon) { + sdDb.JobId = jobId + sdDb.Status = "running" + }) + if err != nil { + log.Printf("[sessiondaemon:%s] warning: failed to update jobid in db: %v", sd.DaemonId, err) + } + return nil +} + +func (sd *SessionDaemon) Reconnect(ctx context.Context, dbDaemon *waveobj.SessionDaemon, rtOpts *waveobj.RuntimeOpts) error { + if dbDaemon.JobId == "" { + return fmt.Errorf("no jobid to reconnect") + } + sd.Lock.Lock() + sd.JobId = dbDaemon.JobId + sd.Lock.Unlock() + return jobcontroller.ReconnectJob(ctx, dbDaemon.JobId, rtOpts) +} + +func (sd *SessionDaemon) Stop(ctx context.Context) { + sd.Lock.Lock() + jobId := sd.JobId + sd.Lock.Unlock() + if jobId != "" { + jobcontroller.TerminateAndDetachJob(ctx, jobId) + } +} + +func (sd *SessionDaemon) SendInput(ctx context.Context, inputData []byte, sigName string, termSize *waveobj.TermSize) error { + sd.Lock.Lock() + jobId := sd.JobId + if jobId == "" { + sd.Lock.Unlock() + return fmt.Errorf("no job attached") + } + inputSessionId, seqNum := sd.InputSessionId, sd.SeqNum + sd.SeqNum++ + sd.Lock.Unlock() + + data := wshrpc.CommandJobInputData{ + JobId: jobId, + InputSessionId: inputSessionId, + SeqNum: seqNum, + TermSize: termSize, + SigName: sigName, + } + if len(inputData) > 0 { + data.InputData64 = base64.StdEncoding.EncodeToString(inputData) + } + return jobcontroller.SendInput(ctx, data) +} + +func (sd *SessionDaemonManager) GetOrCreate(ctx context.Context, dbDaemon *waveobj.SessionDaemon) (*SessionDaemon, error) { + sd.Lock.Lock() + defer sd.Lock.Unlock() + + if existing, ok := sd.Daemons[dbDaemon.OID]; ok { + existing.Lock.Lock() + if existing.JobId == "" { + existing.JobId = dbDaemon.JobId + } + existing.Lock.Unlock() + return existing, nil + } + + daemon := &SessionDaemon{ + DaemonId: dbDaemon.OID, + Name: dbDaemon.Name, + JobId: dbDaemon.JobId, + InputSessionId: uuid.New().String(), + Blocks: make(map[string]bool), + } + sd.Daemons[dbDaemon.OID] = daemon + return daemon, nil +} + +func (sd *SessionDaemonManager) Get(daemonId string) *SessionDaemon { + sd.Lock.Lock() + defer sd.Lock.Unlock() + return sd.Daemons[daemonId] +} + +func (sd *SessionDaemonManager) Remove(daemonId string) { + sd.Lock.Lock() + defer sd.Lock.Unlock() + delete(sd.Daemons, daemonId) +} + +func (sd *SessionDaemonManager) AttachBlock(ctx context.Context, daemonId string, blockId string) { + sd.Lock.Lock() + daemon, ok := sd.Daemons[daemonId] + if !ok { + sd.Lock.Unlock() + return + } + daemon.Lock.Lock() + sd.Lock.Unlock() + defer daemon.Lock.Unlock() + daemon.Blocks[blockId] = true + wstore.DBUpdateFn(ctx, daemonId, func(dbD *waveobj.SessionDaemon) { + dbD.IdleSince = 0 + }) +} + +func (sd *SessionDaemonManager) DetachBlock(ctx context.Context, daemonId string, blockId string) { + sd.Lock.Lock() + daemon, ok := sd.Daemons[daemonId] + if !ok { + sd.Lock.Unlock() + return + } + daemon.Lock.Lock() + sd.Lock.Unlock() + defer daemon.Lock.Unlock() + delete(daemon.Blocks, blockId) + if len(daemon.Blocks) == 0 { + wstore.DBUpdateFn(ctx, daemonId, func(dbD *waveobj.SessionDaemon) { + dbD.IdleSince = time.Now().UnixMilli() + }) + } +} + +func (sd *SessionDaemonManager) GetBlocksForDaemon(daemonId string) []string { + sd.Lock.Lock() + daemon, ok := sd.Daemons[daemonId] + if !ok { + sd.Lock.Unlock() + return nil + } + daemon.Lock.Lock() + sd.Lock.Unlock() + defer daemon.Lock.Unlock() + var rtn []string + for blockId := range daemon.Blocks { + rtn = append(rtn, blockId) + } + return rtn +} + +func (sd *SessionDaemonManager) SendInput(daemonId string, inputData []byte, sigName string, termSize *waveobj.TermSize) error { + ctx := context.Background() + sd.Lock.Lock() + daemon, ok := sd.Daemons[daemonId] + sd.Lock.Unlock() + if !ok { + return fmt.Errorf("daemon %s not found", daemonId) + } + return daemon.SendInput(ctx, inputData, sigName, termSize) +} + +func (sd *SessionDaemonManager) InitFromDB(ctx context.Context) error { + daemons, err := wstore.DBGetAllObjsByType[*waveobj.SessionDaemon](ctx, waveobj.OType_SessionDaemon) + if err != nil { + return fmt.Errorf("load session daemons: %w", err) + } + + for _, dbDaemon := range daemons { + if dbDaemon.Status == "running" || dbDaemon.Status == "disconnected" { + daemon, err := sd.GetOrCreate(ctx, dbDaemon) + if err != nil { + log.Printf("[sessiondaemon] warning: failed to load daemon %s: %v", dbDaemon.OID, err) + continue + } + err = daemon.Reconnect(ctx, dbDaemon, nil) + if err != nil { + log.Printf("[sessiondaemon:%s] reconnect failed: %v", dbDaemon.OID, err) + } + } + } + return nil +} + +func (sd *SessionDaemonManager) StartIdleReaper(ctx context.Context) { + go func() { + ticker := time.NewTicker(IdleCheckInterval * time.Second) + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + sd.reapIdleDaemons(ctx) + } + } + }() +} + +func (sd *SessionDaemonManager) reapIdleDaemons(ctx context.Context) { + allDaemons, err := wstore.DBGetAllObjsByType[*waveobj.SessionDaemon](ctx, waveobj.OType_SessionDaemon) + if err != nil { + return + } + + for _, dbDaemon := range allDaemons { + if dbDaemon.Status != "running" { + continue + } + + sd.Lock.Lock() + memDaemon, hasMem := sd.Daemons[dbDaemon.OID] + sd.Lock.Unlock() + + if hasMem && memDaemon.HasAttachedBlocks() { + continue + } + + if dbDaemon.IdleTimeout <= 0 || dbDaemon.IdleSince == 0 { + continue + } + + if time.Since(time.UnixMilli(dbDaemon.IdleSince)) > time.Duration(dbDaemon.IdleTimeout)*time.Second { + log.Printf("[sessiondaemon:%s] idle timeout reached, terminating", dbDaemon.OID) + if hasMem { + memDaemon.Stop(ctx) + sd.Remove(dbDaemon.OID) + } + wstore.DBUpdateFn(ctx, dbDaemon.OID, func(sdDb *waveobj.SessionDaemon) { + sdDb.Status = "done" + }) + } + } +} diff --git a/pkg/waveobj/metaconsts.go b/pkg/waveobj/metaconsts.go index 0ce08099d8..be1bd4f077 100644 --- a/pkg/waveobj/metaconsts.go +++ b/pkg/waveobj/metaconsts.go @@ -62,6 +62,8 @@ const ( MetaKey_CmdInitScriptPwsh = "cmd:initscript.pwsh" MetaKey_CmdInitScriptFish = "cmd:initscript.fish" + MetaKey_SessionDaemonId = "session:daemonid" + MetaKey_AiClear = "ai:*" MetaKey_AiPresetKey = "ai:preset" MetaKey_AiApiType = "ai:apitype" diff --git a/pkg/waveobj/wtype.go b/pkg/waveobj/wtype.go index 0ac9e92eb1..4e50a20ae8 100644 --- a/pkg/waveobj/wtype.go +++ b/pkg/waveobj/wtype.go @@ -31,7 +31,8 @@ const ( OType_MainServer = "mainserver" OType_Job = "job" OType_Temp = "temp" - OType_Builder = "builder" // not persisted to DB + OType_Builder = "builder" // not persisted to DB + OType_SessionDaemon = "sessiondaemon" ) var ValidOTypes = map[string]bool{ @@ -45,6 +46,7 @@ var ValidOTypes = map[string]bool{ OType_Job: true, OType_Temp: true, OType_Builder: true, + OType_SessionDaemon: true, } type WaveObjUpdate struct { @@ -354,6 +356,26 @@ func (*Job) GetOType() string { return OType_Job } +type SessionDaemon struct { + OID string `json:"oid"` + Version int `json:"version"` + + Name string `json:"name,omitempty"` + Connection string `json:"connection,omitempty"` + JobId string `json:"jobid,omitempty"` + IsAnonymous bool `json:"isanonymous,omitempty"` + Status string `json:"status,omitempty"` + Cwd string `json:"cwd,omitempty"` + CreatedAt int64 `json:"createdat,omitempty"` + IdleTimeout int64 `json:"idletimeout,omitempty"` + IdleSince int64 `json:"idlesince,omitempty"` // ms timestamp when last block detached (0 = has attached blocks) + Meta MetaMapType `json:"meta"` +} + +func (*SessionDaemon) GetOType() string { + return OType_SessionDaemon +} + func AllWaveObjTypes() []reflect.Type { return []reflect.Type{ reflect.TypeOf(&Client{}), @@ -364,6 +386,7 @@ func AllWaveObjTypes() []reflect.Type { reflect.TypeOf(&LayoutState{}), reflect.TypeOf(&MainServer{}), reflect.TypeOf(&Job{}), + reflect.TypeOf(&SessionDaemon{}), } } diff --git a/pkg/waveobj/wtypemeta.go b/pkg/waveobj/wtypemeta.go index 2280b55d2d..be2283d082 100644 --- a/pkg/waveobj/wtypemeta.go +++ b/pkg/waveobj/wtypemeta.go @@ -61,7 +61,9 @@ type MetaTSType struct { CmdInitScriptBash string `json:"cmd:initscript.bash,omitempty"` CmdInitScriptZsh string `json:"cmd:initscript.zsh,omitempty"` CmdInitScriptPwsh string `json:"cmd:initscript.pwsh,omitempty"` - CmdInitScriptFish string `json:"cmd:initscript.fish,omitempty"` + CmdInitScriptFish string `json:"cmd:initscript.fish,omitempty"` + + SessionDaemonId string `json:"session:daemonid,omitempty"` // AI options match settings AiClear bool `json:"ai:*,omitempty"` diff --git a/pkg/wshrpc/wshclient/wshclient.go b/pkg/wshrpc/wshclient/wshclient.go index d5333aec2b..c8f11e6122 100644 --- a/pkg/wshrpc/wshclient/wshclient.go +++ b/pkg/wshrpc/wshclient/wshclient.go @@ -830,6 +830,48 @@ func SendTelemetryCommand(w *wshutil.WshRpc, opts *wshrpc.RpcOpts) error { return err } +// command "sessionattach", wshserver.SessionAttachCommand +func SessionAttachCommand(w *wshutil.WshRpc, data wshrpc.CommandSessionAttachData, opts *wshrpc.RpcOpts) error { + _, err := sendRpcRequestCallHelper[any](w, "sessionattach", data, opts) + return err +} + +// command "sessioncreate", wshserver.SessionCreateCommand +func SessionCreateCommand(w *wshutil.WshRpc, data wshrpc.CommandSessionCreateData, opts *wshrpc.RpcOpts) (*wshrpc.SessionInfoRtnData, error) { + resp, err := sendRpcRequestCallHelper[*wshrpc.SessionInfoRtnData](w, "sessioncreate", data, opts) + return resp, err +} + +// command "sessiondelete", wshserver.SessionDeleteCommand +func SessionDeleteCommand(w *wshutil.WshRpc, data wshrpc.CommandSessionDeleteData, opts *wshrpc.RpcOpts) error { + _, err := sendRpcRequestCallHelper[any](w, "sessiondelete", data, opts) + return err +} + +// command "sessiondetach", wshserver.SessionDetachCommand +func SessionDetachCommand(w *wshutil.WshRpc, data wshrpc.CommandSessionDetachData, opts *wshrpc.RpcOpts) error { + _, err := sendRpcRequestCallHelper[any](w, "sessiondetach", data, opts) + return err +} + +// command "sessioninfo", wshserver.SessionInfoCommand +func SessionInfoCommand(w *wshutil.WshRpc, data wshrpc.CommandSessionInfoData, opts *wshrpc.RpcOpts) (*wshrpc.SessionInfoRtnData, error) { + resp, err := sendRpcRequestCallHelper[*wshrpc.SessionInfoRtnData](w, "sessioninfo", data, opts) + return resp, err +} + +// command "sessionlist", wshserver.SessionListCommand +func SessionListCommand(w *wshutil.WshRpc, data wshrpc.CommandSessionListData, opts *wshrpc.RpcOpts) ([]wshrpc.SessionInfoRtnData, error) { + resp, err := sendRpcRequestCallHelper[[]wshrpc.SessionInfoRtnData](w, "sessionlist", data, opts) + return resp, err +} + +// command "sessiontag", wshserver.SessionTagCommand +func SessionTagCommand(w *wshutil.WshRpc, data wshrpc.CommandSessionTagData, opts *wshrpc.RpcOpts) error { + _, err := sendRpcRequestCallHelper[any](w, "sessiontag", data, opts) + return err +} + // command "setblockfocus", wshserver.SetBlockFocusCommand func SetBlockFocusCommand(w *wshutil.WshRpc, data string, opts *wshrpc.RpcOpts) error { _, err := sendRpcRequestCallHelper[any](w, "setblockfocus", data, opts) diff --git a/pkg/wshrpc/wshrpctypes.go b/pkg/wshrpc/wshrpctypes.go index 51e2338ba8..f8a94e928b 100644 --- a/pkg/wshrpc/wshrpctypes.go +++ b/pkg/wshrpc/wshrpctypes.go @@ -211,6 +211,15 @@ type WshRpcInterface interface { JobControllerDetachJobCommand(ctx context.Context, jobId string) error JobControllerGetAllJobManagerStatusCommand(ctx context.Context) ([]*JobManagerStatusUpdate, error) BlockJobStatusCommand(ctx context.Context, blockId string) (*BlockJobStatusData, error) + + // session daemon + SessionCreateCommand(ctx context.Context, data CommandSessionCreateData) (*SessionInfoRtnData, error) + SessionDeleteCommand(ctx context.Context, data CommandSessionDeleteData) error + SessionListCommand(ctx context.Context, data CommandSessionListData) ([]SessionInfoRtnData, error) + SessionAttachCommand(ctx context.Context, data CommandSessionAttachData) error + SessionDetachCommand(ctx context.Context, data CommandSessionDetachData) error + SessionInfoCommand(ctx context.Context, data CommandSessionInfoData) (*SessionInfoRtnData, error) + SessionTagCommand(ctx context.Context, data CommandSessionTagData) error } // for frontend @@ -925,3 +934,50 @@ type CommandRemoteProcessSignalData struct { Pid int32 `json:"pid"` Signal string `json:"signal"` } + +// session daemon +type CommandSessionCreateData struct { + Name string `json:"name,omitempty"` + Connection string `json:"connection,omitempty"` + IdleTimeout int64 `json:"idletimeout,omitempty"` +} + +type CommandSessionDeleteData struct { + DaemonId string `json:"daemonid"` +} + +type CommandSessionListData struct { + ShowAll bool `json:"showall,omitempty"` +} + +type CommandSessionAttachData struct { + DaemonId string `json:"daemonid"` + BlockId string `json:"blockid"` +} + +type CommandSessionDetachData struct { + DaemonId string `json:"daemonid"` + BlockId string `json:"blockid,omitempty"` +} + +type CommandSessionInfoData struct { + DaemonId string `json:"daemonid"` +} + +type CommandSessionTagData struct { + DaemonId string `json:"daemonid"` + Name string `json:"name"` +} + +type SessionInfoRtnData struct { + DaemonId string `json:"daemonid"` + Name string `json:"name"` + Connection string `json:"connection"` + JobId string `json:"jobid,omitempty"` + IsAnonymous bool `json:"isanonymous"` + Status string `json:"status"` + CreatedAt int64 `json:"createdat"` + IdleTimeout int64 `json:"idletimeout"` + IdleSince int64 `json:"idlesince,omitempty"` + Blocks []string `json:"blocks,omitempty"` +} diff --git a/pkg/wshrpc/wshserver/wshserver.go b/pkg/wshrpc/wshserver/wshserver.go index 38006fd9a8..a908585313 100644 --- a/pkg/wshrpc/wshserver/wshserver.go +++ b/pkg/wshrpc/wshserver/wshserver.go @@ -20,6 +20,7 @@ import ( "strings" "time" + "github.com/google/uuid" "github.com/skratchdot/open-golang/open" "github.com/wavetermdev/waveterm/pkg/aiusechat" "github.com/wavetermdev/waveterm/pkg/aiusechat/chatstore" @@ -37,6 +38,7 @@ import ( "github.com/wavetermdev/waveterm/pkg/remote/conncontroller" "github.com/wavetermdev/waveterm/pkg/remote/fileshare/wshfs" "github.com/wavetermdev/waveterm/pkg/secretstore" + "github.com/wavetermdev/waveterm/pkg/sessiondaemon" "github.com/wavetermdev/waveterm/pkg/suggestion" "github.com/wavetermdev/waveterm/pkg/telemetry" "github.com/wavetermdev/waveterm/pkg/telemetry/telemetrydata" @@ -1574,3 +1576,182 @@ func (ws *WshServer) JobControllerDetachJobCommand(ctx context.Context, jobId st func (ws *WshServer) BlockJobStatusCommand(ctx context.Context, blockId string) (*wshrpc.BlockJobStatusData, error) { return jobcontroller.GetBlockJobStatus(ctx, blockId) } + +func (ws *WshServer) SessionCreateCommand(ctx context.Context, data wshrpc.CommandSessionCreateData) (*wshrpc.SessionInfoRtnData, error) { + dbDaemon := &waveobj.SessionDaemon{ + OID: uuid.New().String(), + Name: data.Name, + Connection: data.Connection, + IsAnonymous: data.Name == "", + Status: "init", + CreatedAt: time.Now().UnixMilli(), + IdleTimeout: data.IdleTimeout, + } + if dbDaemon.IsAnonymous { + dbDaemon.IdleTimeout = sessiondaemon.DefaultAnonymousIdleTimeout + } else if dbDaemon.IdleTimeout <= 0 { + dbDaemon.IdleTimeout = sessiondaemon.DefaultNamedIdleTimeout + } + + err := wstore.DBInsert(ctx, dbDaemon) + if err != nil { + return nil, fmt.Errorf("insert session daemon: %w", err) + } + + _, err = sessiondaemon.Manager.GetOrCreate(ctx, dbDaemon) + if err != nil { + return nil, fmt.Errorf("create session daemon in manager: %w", err) + } + + return buildSessionInfoRtnData(ctx, dbDaemon) +} + +func (ws *WshServer) SessionDeleteCommand(ctx context.Context, data wshrpc.CommandSessionDeleteData) error { + _, err := wstore.DBGet[*waveobj.SessionDaemon](ctx, data.DaemonId) + if err != nil { + return fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) + } + + memDaemon := sessiondaemon.Manager.Get(data.DaemonId) + if memDaemon != nil { + memDaemon.Stop(ctx) + sessiondaemon.Manager.Remove(data.DaemonId) + } + + err = wstore.DBUpdateFn(ctx, data.DaemonId, func(sd *waveobj.SessionDaemon) { + sd.Status = "done" + }) + if err != nil { + return fmt.Errorf("update session daemon status: %w", err) + } + return nil +} + +func (ws *WshServer) SessionListCommand(ctx context.Context, data wshrpc.CommandSessionListData) ([]wshrpc.SessionInfoRtnData, error) { + allDaemons, err := wstore.DBGetAllObjsByType[*waveobj.SessionDaemon](ctx, waveobj.OType_SessionDaemon) + if err != nil { + return nil, fmt.Errorf("list session daemons: %w", err) + } + + var rtn []wshrpc.SessionInfoRtnData + for _, dbDaemon := range allDaemons { + if dbDaemon.IsAnonymous && !data.ShowAll { + continue + } + info, err := buildSessionInfoRtnData(ctx, dbDaemon) + if err != nil { + return nil, err + } + rtn = append(rtn, *info) + } + sort.Slice(rtn, func(i, j int) bool { + return rtn[i].CreatedAt > rtn[j].CreatedAt + }) + return rtn, nil +} + +func (ws *WshServer) SessionAttachCommand(ctx context.Context, data wshrpc.CommandSessionAttachData) error { + _, err := wstore.DBGet[*waveobj.SessionDaemon](ctx, data.DaemonId) + if err != nil { + return fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) + } + + sessiondaemon.Manager.AttachBlock(ctx, data.DaemonId, data.BlockId) + + err = wstore.DBUpdateFn(ctx, data.BlockId, func(block *waveobj.Block) { + block.Meta[waveobj.MetaKey_SessionDaemonId] = data.DaemonId + }) + if err != nil { + return fmt.Errorf("update block meta: %w", err) + } + return nil +} + +func (ws *WshServer) SessionDetachCommand(ctx context.Context, data wshrpc.CommandSessionDetachData) error { + _, err := wstore.DBGet[*waveobj.SessionDaemon](ctx, data.DaemonId) + if err != nil { + return fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) + } + + blockIds := []string{} + if data.BlockId != "" { + blockIds = append(blockIds, data.BlockId) + } else { + blockIds = sessiondaemon.Manager.GetBlocksForDaemon(data.DaemonId) + } + + for _, blockId := range blockIds { + sessiondaemon.Manager.DetachBlock(ctx, data.DaemonId, blockId) + err = wstore.DBUpdateFn(ctx, blockId, func(block *waveobj.Block) { + delete(block.Meta, waveobj.MetaKey_SessionDaemonId) + }) + if err != nil { + return fmt.Errorf("update block meta: %w", err) + } + resyncBlockController(ctx, blockId) + } + return nil +} + +func (ws *WshServer) SessionInfoCommand(ctx context.Context, data wshrpc.CommandSessionInfoData) (*wshrpc.SessionInfoRtnData, error) { + dbDaemon, err := wstore.DBGet[*waveobj.SessionDaemon](ctx, data.DaemonId) + if err != nil { + return nil, fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) + } + return buildSessionInfoRtnData(ctx, dbDaemon) +} + +func (ws *WshServer) SessionTagCommand(ctx context.Context, data wshrpc.CommandSessionTagData) error { + _, err := wstore.DBGet[*waveobj.SessionDaemon](ctx, data.DaemonId) + if err != nil { + return fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) + } + + memDaemon := sessiondaemon.Manager.Get(data.DaemonId) + if memDaemon != nil { + memDaemon.Lock.Lock() + memDaemon.Name = data.Name + memDaemon.Lock.Unlock() + } + + err = wstore.DBUpdateFn(ctx, data.DaemonId, func(sd *waveobj.SessionDaemon) { + sd.Name = data.Name + sd.IsAnonymous = false + }) + if err != nil { + return fmt.Errorf("update session daemon: %w", err) + } + return nil +} + +func buildSessionInfoRtnData(ctx context.Context, dbDaemon *waveobj.SessionDaemon) (*wshrpc.SessionInfoRtnData, error) { + blocks := sessiondaemon.Manager.GetBlocksForDaemon(dbDaemon.OID) + return &wshrpc.SessionInfoRtnData{ + DaemonId: dbDaemon.OID, + Name: dbDaemon.Name, + Connection: dbDaemon.Connection, + JobId: dbDaemon.JobId, + IsAnonymous: dbDaemon.IsAnonymous, + Status: dbDaemon.Status, + CreatedAt: dbDaemon.CreatedAt, + IdleTimeout: dbDaemon.IdleTimeout, + IdleSince: dbDaemon.IdleSince, + Blocks: blocks, + }, nil +} + +func resyncBlockController(ctx context.Context, blockId string) { + tabs, err := wstore.DBGetAllObjsByType[*waveobj.Tab](ctx, waveobj.OType_Tab) + if err != nil { + log.Printf("[sessiondaemon] warning: error getting tabs for resync: %v", err) + return + } + for _, tab := range tabs { + for _, bid := range tab.BlockIds { + if bid == blockId { + blockcontroller.ResyncController(ctx, tab.OID, blockId, nil, true) + return + } + } + } +} diff --git a/pkg/wstore/wstore_dbsessionmigration.go b/pkg/wstore/wstore_dbsessionmigration.go new file mode 100644 index 0000000000..bf32abd88a --- /dev/null +++ b/pkg/wstore/wstore_dbsessionmigration.go @@ -0,0 +1,95 @@ +// Copyright 2026, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +package wstore + +import ( + "context" + "fmt" + "log" + "time" + + "github.com/google/uuid" + "github.com/wavetermdev/waveterm/pkg/waveobj" +) + +const MigrateSessionDaemonKey = "migrate:sessiondaemon" + +func runSessionDaemonMigration(ctx context.Context) error { + client, err := DBGetSingleton[*waveobj.Client](ctx) + if err != nil { + return fmt.Errorf("get client: %w", err) + } + + if client.Meta != nil && client.Meta[MigrateSessionDaemonKey] == true { + return nil + } + + blocks, err := DBGetAllObjsByType[*waveobj.Block](ctx, waveobj.OType_Block) + if err != nil { + return fmt.Errorf("list blocks: %w", err) + } + + var migrated int + for _, block := range blocks { + if block.JobId == "" { + continue + } + connName := block.Meta.GetString("connection", "") + if connName == "" { + continue + } + + daemonId := uuid.New().String() + dbDaemon := &waveobj.SessionDaemon{ + OID: daemonId, + Name: "", + Connection: connName, + JobId: block.JobId, + IsAnonymous: true, + Status: "running", + CreatedAt: time.Now().UnixMilli(), + IdleTimeout: 3600, + } + + err = DBInsert(ctx, dbDaemon) + if err != nil { + log.Printf("[migration] warning: error inserting session daemon for block %s: %v", block.OID, err) + continue + } + + err = DBUpdateFn(ctx, block.OID, func(b *waveobj.Block) { + if b.Meta == nil { + b.Meta = make(waveobj.MetaMapType) + } + b.Meta[waveobj.MetaKey_SessionDaemonId] = daemonId + b.JobId = "" + }) + if err != nil { + log.Printf("[migration] warning: error updating block %s: %v", block.OID, err) + continue + } + + migrated++ + } + + if client.Meta == nil { + client.Meta = make(waveobj.MetaMapType) + } + client.Meta[MigrateSessionDaemonKey] = true + err = DBUpdate(ctx, client) + if err != nil { + return fmt.Errorf("update client meta: %w", err) + } + + if migrated > 0 { + log.Printf("[migration] migrated %d blocks to session daemon\n", migrated) + } + return nil +} + +func RunSessionDaemonMigration(ctx context.Context) error { + ctx, cancelFn := context.WithTimeout(ctx, 30*time.Second) + defer cancelFn() + return runSessionDaemonMigration(ctx) +} From de7cc8ae535f45148131cb6c4e2d1ddc9826861e Mon Sep 17 00:00:00 2001 From: lyx-tec Date: Mon, 8 Jun 2026 23:12:26 +0800 Subject: [PATCH 04/36] chore: stop tracking .kilocode/, CLAUDE.md (ai config); add to .gitignore --- .kilocode/rules/overview.md | 154 --- .kilocode/rules/rules.md | 204 ---- .kilocode/skills/add-config/SKILL.md | 471 --------- .kilocode/skills/add-rpc/SKILL.md | 453 --------- .kilocode/skills/add-wshcmd/SKILL.md | 921 ------------------ .kilocode/skills/context-menu/SKILL.md | 160 --- .kilocode/skills/create-view/SKILL.md | 520 ---------- .kilocode/skills/electron-api/SKILL.md | 182 ---- .../skills/openspec-apply-change/SKILL.md | 156 --- .../skills/openspec-archive-change/SKILL.md | 114 --- .kilocode/skills/openspec-explore/SKILL.md | 288 ------ .kilocode/skills/openspec-propose/SKILL.md | 110 --- .kilocode/skills/waveenv/SKILL.md | 133 --- .kilocode/skills/wps-events/SKILL.md | 339 ------- .kilocode/workflows/opsx-apply.md | 145 --- .kilocode/workflows/opsx-archive.md | 150 --- .kilocode/workflows/opsx-explore.md | 166 ---- .kilocode/workflows/opsx-propose.md | 99 -- CLAUDE.md | 18 - 19 files changed, 4783 deletions(-) delete mode 100644 .kilocode/rules/overview.md delete mode 100644 .kilocode/rules/rules.md delete mode 100644 .kilocode/skills/add-config/SKILL.md delete mode 100644 .kilocode/skills/add-rpc/SKILL.md delete mode 100644 .kilocode/skills/add-wshcmd/SKILL.md delete mode 100644 .kilocode/skills/context-menu/SKILL.md delete mode 100644 .kilocode/skills/create-view/SKILL.md delete mode 100644 .kilocode/skills/electron-api/SKILL.md delete mode 100644 .kilocode/skills/openspec-apply-change/SKILL.md delete mode 100644 .kilocode/skills/openspec-archive-change/SKILL.md delete mode 100644 .kilocode/skills/openspec-explore/SKILL.md delete mode 100644 .kilocode/skills/openspec-propose/SKILL.md delete mode 100644 .kilocode/skills/waveenv/SKILL.md delete mode 100644 .kilocode/skills/wps-events/SKILL.md delete mode 100644 .kilocode/workflows/opsx-apply.md delete mode 100644 .kilocode/workflows/opsx-archive.md delete mode 100644 .kilocode/workflows/opsx-explore.md delete mode 100644 .kilocode/workflows/opsx-propose.md delete mode 100644 CLAUDE.md diff --git a/.kilocode/rules/overview.md b/.kilocode/rules/overview.md deleted file mode 100644 index 944a4021dd..0000000000 --- a/.kilocode/rules/overview.md +++ /dev/null @@ -1,154 +0,0 @@ -# Wave Terminal - High Level Architecture Overview - -## Project Description - -Wave Terminal is an open-source AI-native terminal built for seamless workflows. It's an Electron application that serves as a command line terminal host (it hosts CLI applications rather than running inside a CLI). The application combines a React frontend with a Go backend server to provide a modern terminal experience with advanced features. - -## Top-Level Directory Structure - -``` -waveterm/ -├── emain/ # Electron main process code -├── frontend/ # React application (renderer process) -├── cmd/ # Go command-line applications -├── pkg/ # Go packages/modules -├── db/ # Database migrations -├── docs/ # Documentation (Docusaurus) -├── build/ # Build configuration and assets -├── assets/ # Application assets (icons, images) -├── public/ # Static public assets -├── tests/ # Test files -├── .github/ # GitHub workflows and configuration -└── Configuration files (package.json, tsconfig.json, etc.) -``` - -## Architecture Components - -### 1. Electron Main Process (`emain/`) - -The Electron main process handles the native desktop application layer: - -**Key Files:** - -- [`emain.ts`](emain/emain.ts) - Main entry point, application lifecycle management -- [`emain-window.ts`](emain/emain-window.ts) - Window management (`WaveBrowserWindow` class) -- [`emain-tabview.ts`](emain/emain-tabview.ts) - Tab view management (`WaveTabView` class) -- [`emain-wavesrv.ts`](emain/emain-wavesrv.ts) - Go backend server integration -- [`emain-wsh.ts`](emain/emain-wsh.ts) - WSH (Wave Shell) client integration -- [`emain-ipc.ts`](emain/emain-ipc.ts) - IPC handlers for frontend ↔ main process communication -- [`emain-menu.ts`](emain/emain-menu.ts) - Application menu system -- [`updater.ts`](emain/updater.ts) - Auto-update functionality -- [`preload.ts`](emain/preload.ts) - Preload script for renderer security -- [`preload-webview.ts`](emain/preload-webview.ts) - Webview preload script - -### 2. Frontend React Application (`frontend/`) - -The React application runs in the Electron renderer process: - -**Structure:** - -``` -frontend/ -├── app/ # Main application code -│ ├── app.tsx # Root App component -│ ├── aipanel/ # AI panel UI -│ ├── block/ # Block-based UI components -│ ├── element/ # Reusable UI elements -│ ├── hook/ # Custom React hooks -│ ├── modals/ # Modal components -│ ├── store/ # State management (Jotai) -│ ├── tab/ # Tab components -│ ├── view/ # Different view types -│ │ ├── codeeditor/ # Code editor (Monaco) -│ │ ├── preview/ # File preview -│ │ ├── sysinfo/ # System info view -│ │ ├── term/ # Terminal view -│ │ ├── tsunami/ # Tsunami builder view -│ │ ├── vdom/ # Virtual DOM view -│ │ ├── waveai/ # AI chat integration -│ │ ├── waveconfig/ # Config editor view -│ │ └── webview/ # Web view -│ └── workspace/ # Workspace management -├── builder/ # Builder app entry -├── layout/ # Layout system -├── preview/ # Standalone preview renderer -├── types/ # TypeScript type definitions -└── util/ # Utility functions -``` - -**Key Technologies:** - -- Electron (desktop application shell) -- React 19 with TypeScript -- Jotai for state management -- Monaco Editor for code editing -- XTerm.js for terminal emulation -- Tailwind CSS v4 for styling -- SCSS for additional styling (deprecated, new components should use Tailwind) -- Vite / electron-vite for bundling -- Task (Taskfile.yml) for build and code generation commands - -### 3. Go Backend Server (`cmd/server/`) - -The Go backend server handles all heavy lifting operations: - -**Entry Point:** [`main-server.go`](cmd/server/main-server.go) - -### 4. Go Packages (`pkg/`) - -The Go codebase is organized into modular packages: - -**Key Packages:** - -- `wstore/` - Database and storage layer -- `wconfig/` - Configuration management -- `wcore/` - Core business logic -- `wshrpc/` - RPC communication system -- `wshutil/` - WSH (Wave Shell) utilities -- `blockcontroller/` - Block execution management -- `remote/` - Remote connection handling -- `filestore/` - File storage system -- `web/` - Web server and WebSocket handling -- `telemetry/` - Usage analytics and telemetry -- `waveobj/` - Core data objects -- `service/` - Service layer -- `wps/` - Wave PubSub event system -- `waveai/` - AI functionality -- `shellexec/` - Shell execution -- `util/` - Common utilities - -### 5. Command Line Tools (`cmd/`) - -Key Go command-line utilities: - -- `wsh/` - Wave Shell command-line tool -- `server/` - Main backend server -- `generatego/` - Code generation -- `generateschema/` - Schema generation -- `generatets/` - TypeScript generation - -## Communication Architecture - -The core communication system is built around the **WSH RPC (Wave Shell RPC)** system, which provides a unified interface for all inter-process communication: frontend ↔ Go backend, Electron main process ↔ backend, and backend ↔ remote systems (SSH, WSL). - -### WSH RPC System (`pkg/wshrpc/`) - -The WSH RPC system is the backbone of Wave Terminal's communication architecture: - -**Key Components:** - -- [`wshrpctypes.go`](pkg/wshrpc/wshrpctypes.go) - Core RPC interface and type definitions (source of truth for all RPC commands) -- [`wshserver/`](pkg/wshrpc/wshserver/) - Server-side RPC implementation -- [`wshremote/`](pkg/wshrpc/wshremote/) - Remote connection handling -- [`wshclient.go`](pkg/wshrpc/wshclient.go) - Go client for making RPC calls -- [`frontend/app/store/wshclientapi.ts`](frontend/app/store/wshclientapi.ts) - Generated TypeScript RPC client - -**Routing:** Callers address RPC calls using _routes_ (e.g. a block ID, connection name, or `"waveapp"`) rather than caring about the underlying transport. The RPC layer resolves the route to the correct transport (WebSocket, Unix socket, SSH tunnel, stdio) automatically. This means the same RPC interface works whether the target is local or a remote SSH connection. - -## Development Notes - -- **Build commands** - Use `task` (Taskfile.yml) for all build, generate, and packaging commands -- **Code generation** - Run `task generate` after modifying Go types in `pkg/wshrpc/wshrpctypes.go`, `pkg/wconfig/settingsconfig.go`, or `pkg/waveobj/wtypemeta.go` -- **Testing** - Vitest for frontend unit tests; standard `go test` for Go packages -- **Database migrations** - SQL migration files in `db/migrations-wstore/` and `db/migrations-filestore/` -- **Documentation** - Docusaurus site in `docs/` diff --git a/.kilocode/rules/rules.md b/.kilocode/rules/rules.md deleted file mode 100644 index 904292ea97..0000000000 --- a/.kilocode/rules/rules.md +++ /dev/null @@ -1,204 +0,0 @@ -Wave Terminal is a modern terminal which provides graphical blocks, dynamic layout, workspaces, and SSH connection management. It is cross platform and built on electron. - -### Project Structure - -It has a TypeScript/React frontend and a Go backend. They talk together over `wshrpc` a custom RPC protocol that is implemented over websocket (and domain sockets). - -### Coding Guidelines - -- **Go Conventions**: - - Don't use custom enum types in Go. Instead, use string constants (e.g., `const StatusRunning = "running"` rather than creating a custom type like `type Status string`). - - Use string constants for status values, packet types, and other string-based enumerations. - - in Go code, prefer using Printf() vs Println() - - use "Make" as opposed to "New" for struct initialization func names - - in general const decls go at the top of the file (before types and functions) - - NEVER run `go build` (especially in weird sub-package directories). we can tell if everything compiles by seeing there are no problems/errors. -- **Synchronization**: - - Always prefer to use the `lock.Lock(); defer lock.Unlock()` pattern for synchronization if possible - - Avoid inline lock/unlock pairs - instead create helper functions that use the defer pattern - - When accessing shared data structures (maps, slices, etc.), ensure proper locking - - Example: Instead of `gc.lock.Lock(); gc.map[key]++; gc.lock.Unlock()`, create a helper function like `getNextValue(key string) int { gc.lock.Lock(); defer gc.lock.Unlock(); gc.map[key]++; return gc.map[key] }` -- **TypeScript Imports**: - - Use `@/...` for imports from different parts of the project (configured in `tsconfig.json` as `"@/*": ["frontend/*"]`). - - Prefer relative imports (`"./name"`) only within the same directory. - - Use named exports exclusively; avoid default exports. It's acceptable to export functions directly (e.g., React Components). - - Our indent is 4 spaces -- **JSON Field Naming**: All fields must be lowercase, without underscores. -- **TypeScript Conventions** - - **Type Handling**: - - In TypeScript we have strict null checks off, so no need to add "| null" to all the types. - - In TypeScript for Jotai atoms, if we want to write, we need to type the atom as a PrimitiveAtom - - Jotai has a bug with strict null checks off where if you create a null atom, e.g. atom(null) it does not "type" correctly. That's no issue, just cast it to the proper PrimitiveAtom type (no "| null") and it will work fine. - - Generally never use "=== undefined" or "!== undefined". This is bad style. Just use a "== null" or "!= null" unless it is a very specific case where we need to distinguish undefined from null. - - **Coding Style**: - - Use all lowercase filenames (except where case is actually important like Taskfile.yml) - - Import the "cn" function from "@/util/util" to do classname / clsx class merge (it uses twMerge underneath) - - Do NOT create private fields in classes (they are impossible to inspect) - - Use PascalCase for global consts at the top of files - - **Component Practices**: - - Make sure to add cursor-pointer to buttons/links and clickable items - - NEVER use cursor-help (it looks terrible) - - useAtom() and useAtomValue() are react HOOKS, so they must be called at the component level not inline in JSX - - If you use React.memo(), make sure to add a displayName for the component - - Other - - never use atob() or btoa() (not UTF-8 safe). use functions in frontend/util/util.ts for base64 decoding and encoding -- In general, when writing functions, we prefer _early returns_ rather than putting the majority of a function inside of an if block. - -### Styling - -- We use **Tailwind v4** to style. Custom stuff is defined in frontend/tailwindsetup.css -- _never_ use cursor-help, or cursor-not-allowed (it looks terrible) -- We have custom CSS setup as well, so it is a hybrid system. For new code we prefer tailwind, and are working to migrate code to all use tailwind. -- For accent buttons, use "bg-accent/80 text-primary rounded hover:bg-accent transition-colors cursor-pointer" (if you do "bg-accent hover:bg-accent/80" it looks weird as on hover the button gets darker instead of lighter) - -### RPC System - -To define a new RPC call, add the new definition to `pkg/wshrpc/wshrpctypes.go` including any input/output data that is required. After modifying wshrpctypes.go run `task generate` to generate the client APIs. - -For normal "server" RPCs (where a frontend client is calling the main server) you should implement the RPC call in `pkg/wshrpc/wshserver.go`. - -### Electron API - -From within the FE to get the electron API (e.g. the preload functions): - -```ts -import { getApi } from "@/store/global"; - -getApi().getIsDev(); -``` - -The full API is defined in custom.d.ts as type ElectronApi. - -### Code Generation - -- **TypeScript Types**: TypeScript types are automatically generated from Go types. After modifying Go types in `pkg/wshrpc/wshrpctypes.go`, run `task generate` to update the TypeScript type definitions in `frontend/types/gotypes.d.ts`. -- **Manual Edits**: Do not manually edit generated files like `frontend/types/gotypes.d.ts` or `frontend/app/store/wshclientapi.ts`. Instead, modify the source Go types and run `task generate`. - -### Frontend Architecture - -- The application uses Jotai for state management. -- When working with Jotai atoms that need to be updated, define them as `PrimitiveAtom` rather than just `atom`. - -### Notes - -- **CRITICAL: Completion format MUST be: "Done: [one-line description]"** -- **Keep your Task Completed summaries VERY short** -- **No lengthy pre-completion summaries** - Do not provide detailed explanations of implementation before using attempt_completion -- **No recaps of changes** - Skip explaining what was done before completion -- **Go directly to completion** - After making changes, proceed directly to attempt_completion without summarizing -- The project is currently an un-released POC / MVP. Do not worry about backward compatibility when making changes -- With React hooks, always complete all hook calls at the top level before any conditional returns (including jotai hook calls useAtom and useAtomValue); when a user explicitly tells you a function handles null inputs, trust them and stop trying to "protect" it with unnecessary checks or workarounds. -- **Match response length to question complexity** - For simple, direct questions in Ask mode (especially those that can be answered in 1-2 sentences), provide equally brief answers. Save detailed explanations for complex topics or when explicitly requested. -- **CRITICAL** - useAtomValue and useAtom are React HOOKS. They cannot be used inline in JSX code, they must appear at the top of a component in the hooks area of the react code. -- for simple functions, we prefer `if (!cond) { return }; functionality;` pattern over `if (cond) { functionality }` because it produces less indentation and is easier to follow. -- It is now 2026, so if you write new files, or update files use 2026 for the copyright year -- React.MutableRefObject is deprecated, just use React.RefObject now (in React 19 RefObject is always mutable) - -### Strict Comment Rules - -- **NEVER add comments that merely describe what code is doing**: - - ❌ `mutex.Lock() // Lock the mutex` - - ❌ `counter++ // Increment the counter` - - ❌ `buffer.Write(data) // Write data to buffer` - - ❌ `// Header component for app run list` (above AppRunListHeader) - - ❌ `// Updated function to include onClick parameter` - - ❌ `// Changed padding calculation` - - ❌ `// Removed unnecessary div` - - ❌ `// Using the model's width value here` -- **Only use comments for**: - - Explaining WHY a particular approach was chosen - - Documenting non-obvious edge cases or side effects - - Warning about potential pitfalls in usage - - Explaining complex algorithms that can't be simplified -- **When in doubt, leave it out**. No comment is better than a redundant comment. -- **Never add comments explaining code changes** - The code should speak for itself, and version control tracks changes. The one exception to this rule is if it is a very unobvious implementation. Something that someone would typically implement in a different (wrong) way. Then the comment helps us remember WHY we changed it to a less obvious implementation. -- **Never remove existing comments** unless specifically directed by the user. Comments that are already defined in existing code have been vetted by the user. - -### Jotai Model Pattern (our rules) - -- **Atoms live on the model.** -- **Simple atoms:** define as **field initializers**. -- **Atoms that depend on values/other atoms:** create in the **constructor**. -- Models **never use React hooks**; they use `globalStore.get/set`. -- It's fine to call model methods from **event handlers** or **`useEffect`**. -- Models use the **singleton pattern** with a `private static instance` field, a `private constructor`, and a `static getInstance()` method. -- The constructor is `private`; callers always use `getInstance()`. - -```ts -// model/MyModel.ts -import * as jotai from "jotai"; -import { globalStore } from "@/app/store/jotaiStore"; - -export class MyModel { - private static instance: MyModel | null = null; - - // simple atoms (field init) - statusAtom = jotai.atom<"idle" | "running" | "error">("idle"); - outputAtom = jotai.atom(""); - - // ctor-built atoms (need types) - lengthAtom!: jotai.Atom; - thresholdedAtom!: jotai.Atom; - - private constructor(initialThreshold = 20) { - this.lengthAtom = jotai.atom((get) => get(this.outputAtom).length); - this.thresholdedAtom = jotai.atom((get) => get(this.lengthAtom) > initialThreshold); - } - - static getInstance(): MyModel { - if (!MyModel.instance) { - MyModel.instance = new MyModel(); - } - return MyModel.instance; - } - - static resetInstance(): void { - MyModel.instance = null; - } - - async doWork() { - globalStore.set(this.statusAtom, "running"); - // ... do work ... - globalStore.set(this.statusAtom, "idle"); - } -} -``` - -```tsx -// component usage (events & effects OK) -import { useAtomValue } from "jotai"; - -function Panel() { - const model = MyModel.getInstance(); - const status = useAtomValue(model.statusAtom); - const isBig = useAtomValue(model.thresholdedAtom); - - const onClick = () => model.doWork(); - - return ( -
- {status} • {String(isBig)} -
- ); -} -``` - -**Remember:** singleton pattern with `getInstance()`, `private constructor`, atoms on the model, simple-as-fields, ctor for dependent/derived, updates via `globalStore.set/get`. -**Note** Older models may not use the singleton pattern - -### Tool Use - -Do NOT use write_to_file unless it is a new file or very short. Always prefer to use replace_in_file. Often your diffs fail when a file may be out of date in your cache vs the actual on-disk format. You should RE-READ the file and try to create diffs again if your diffs fail rather than fall back to write_to_file. If you feel like your ONLY option is to use write_to_file please ask first. - -Also when adding content to the end of files prefer to use the new append_file tool rather than trying to create a diff (as your diffs are often not specific enough and end up inserting code in the middle of existing functions). - -### Directory Awareness - -- **ALWAYS verify the current working directory before executing commands** -- Either run "pwd" first to verify the directory, or do a "cd" to the correct absolute directory before running commands -- When running tests, do not "cd" to the pkg directory and then run the test. This screws up the cwd and you never recover. run the test from the project root instead. - -### Testing / Compiling Go Code - -No need to run a `go build` or a `go run` to just check if the Go code compiles. VSCode's errors/problems cover this well. -If there are no Go errors in VSCode you can assume the code compiles fine. diff --git a/.kilocode/skills/add-config/SKILL.md b/.kilocode/skills/add-config/SKILL.md deleted file mode 100644 index f961093bb8..0000000000 --- a/.kilocode/skills/add-config/SKILL.md +++ /dev/null @@ -1,471 +0,0 @@ ---- -name: add-config -description: Guide for adding new configuration settings to Wave Terminal. Use when adding a new setting to the configuration system, implementing a new config key, or adding user-customizable settings. ---- - -# Adding a New Configuration Setting to Wave Terminal - -This guide explains how to add a new configuration setting to Wave Terminal's hierarchical configuration system. - -## Configuration System Overview - -Wave Terminal uses a hierarchical configuration system with: - -1. **Go Struct Definitions** - Type-safe configuration structure in `pkg/wconfig/settingsconfig.go` -2. **JSON Schema** - Auto-generated validation schema in `schema/settings.json` -3. **Default Values** - Built-in defaults in `pkg/wconfig/defaultconfig/settings.json` -4. **User Configuration** - User overrides in `~/.config/waveterm/settings.json` -5. **Block Metadata** - Block-level overrides in `pkg/waveobj/wtypemeta.go` -6. **Documentation** - User-facing docs in `docs/docs/config.mdx` - -Settings cascade from defaults → user settings → connection config → block overrides. - -## Step-by-Step Guide - -### Step 1: Add to Go Struct Definition - -Edit `pkg/wconfig/settingsconfig.go` and add your new field to the `SettingsType` struct: - -```go -type SettingsType struct { - // ... existing fields ... - - // Add your new field with appropriate JSON tag - MyNewSetting string `json:"mynew:setting,omitempty"` - - // For different types: - MyBoolSetting bool `json:"mynew:boolsetting,omitempty"` - MyNumberSetting float64 `json:"mynew:numbersetting,omitempty"` - MyIntSetting *int64 `json:"mynew:intsetting,omitempty"` // Use pointer for optional ints - MyArraySetting []string `json:"mynew:arraysetting,omitempty"` -} -``` - -**Naming Conventions:** - -- Use namespace prefixes (e.g., `term:`, `window:`, `ai:`, `web:`, `app:`) -- Use lowercase with colons as separators -- Field names should be descriptive and follow Go naming conventions -- Use `omitempty` tag to exclude empty values from JSON - -**Type Guidelines:** - -- Use `*int64` and `*float64` for optional numeric values -- Use `*bool` for optional boolean values (or `bool` if default is false) -- Use `string` for text values -- Use `[]string` for arrays -- Use `float64` for numbers that can be decimals - -**Namespace Organization:** - -- `app:*` - Application-level settings -- `term:*` - Terminal-specific settings -- `window:*` - Window and UI settings -- `ai:*` - AI-related settings -- `web:*` - Web browser settings -- `editor:*` - Code editor settings -- `conn:*` - Connection settings - -### Step 1.5: Add to Block Metadata (Optional) - -If your setting should support block-level overrides, also add it to `pkg/waveobj/wtypemeta.go`: - -```go -type MetaTSType struct { - // ... existing fields ... - - // Add your new field with matching JSON tag and type - MyNewSetting *string `json:"mynew:setting,omitempty"` // Use pointer for optional values - - // For different types: - MyBoolSetting *bool `json:"mynew:boolsetting,omitempty"` - MyNumberSetting *float64 `json:"mynew:numbersetting,omitempty"` - MyIntSetting *int `json:"mynew:intsetting,omitempty"` - MyArraySetting []string `json:"mynew:arraysetting,omitempty"` -} -``` - -**Block Metadata Guidelines:** - -- Use pointer types (`*string`, `*bool`, `*int`, `*float64`) for optional overrides -- JSON tags should exactly match the corresponding settings field -- This enables the hierarchical config system: block metadata → connection config → global settings -- Only add settings here that make sense to override per-block or per-connection - -### Step 2: Set Default Value (Optional) - -If your setting should have a default value, add it to `pkg/wconfig/defaultconfig/settings.json`: - -```json -{ - "ai:preset": "ai@global", - "ai:model": "gpt-5-mini", - // ... existing defaults ... - - "mynew:setting": "default value", - "mynew:boolsetting": true, - "mynew:numbersetting": 42.5, - "mynew:intsetting": 100 -} -``` - -**Default Value Guidelines:** - -- Only add defaults for settings that should have non-zero/non-empty initial values -- Ensure defaults make sense for typical user experience -- Keep defaults conservative and safe -- Boolean settings often don't need defaults if `false` is the correct default - -### Step 3: Update Documentation - -Add your new setting to the configuration table in `docs/docs/config.mdx`: - -```markdown -| Key Name | Type | Function | -| ------------------- | -------- | ----------------------------------------- | -| mynew:setting | string | Description of what this setting controls | -| mynew:boolsetting | bool | Enable/disable some feature | -| mynew:numbersetting | float | Numeric setting for some parameter | -| mynew:intsetting | int | Integer setting for some configuration | -| mynew:arraysetting | string[] | Array of strings for multiple values | -``` - -**Documentation Guidelines:** - -- Provide clear, concise descriptions -- For new settings in upcoming releases, add `` -- Update the default configuration example if you added defaults -- Explain what values are valid and what they do - -### Step 4: Regenerate Schema and TypeScript Types - -Run the generate task to automatically regenerate the JSON schema and TypeScript types: - -```bash -task generate -``` - -**What this does:** - -- Runs `task build:schema` (automatically generates JSON schema from Go structs) -- Generates TypeScript type definitions in `frontend/types/gotypes.d.ts` -- Generates RPC client APIs -- Generates metadata constants - -**Important:** The JSON schema in `schema/settings.json` is **automatically generated** from the Go struct definitions - you don't need to edit it manually. - -### Step 5: Use in Frontend Code - -Access your new setting in React components: - -```typescript -import { getOverrideConfigAtom, getSettingsKeyAtom, useAtomValue } from "@/store/global"; - -// In a React component -const MyComponent = ({ blockId }: { blockId: string }) => { - // Use override config atom for hierarchical resolution - // This automatically checks: block metadata → connection config → global settings → default - const mySettingAtom = getOverrideConfigAtom(blockId, "mynew:setting"); - const mySetting = useAtomValue(mySettingAtom) ?? "fallback value"; - - // For global-only settings (no block overrides) - const globalOnlySetting = useAtomValue(getSettingsKeyAtom("mynew:globalsetting")) ?? "fallback"; - - return
Setting value: {mySetting}
; -}; -``` - -**Frontend Configuration Patterns:** - -```typescript -// 1. Settings with block-level overrides (recommended for most view/display settings) -const termFontSize = useAtomValue(getOverrideConfigAtom(blockId, "term:fontsize")) ?? 12; - -// 2. Global-only settings (app-wide settings that don't vary by block) -const appGlobalHotkey = useAtomValue(getSettingsKeyAtom("app:globalhotkey")) ?? ""; - -// 3. Connection-specific settings -const connStatus = useAtomValue(getConnStatusAtom(connectionName)); -``` - -**When to use each pattern:** - -- Use `getOverrideConfigAtom()` for settings that can vary by block or connection (most UI/display settings) -- Use `getSettingsKeyAtom()` for app-level settings that are always global -- Always provide a fallback value with `??` operator - -### Step 6: Use in Backend Code - -Access settings in Go code: - -```go -// Get the full config -fullConfig := wconfig.GetWatcher().GetFullConfig() - -// Access your setting -myValue := fullConfig.Settings.MyNewSetting - -// For optional values (pointers) -if fullConfig.Settings.MyIntSetting != nil { - intValue := *fullConfig.Settings.MyIntSetting - // Use intValue -} -``` - -## Complete Examples - -### Example 1: Simple Boolean Setting (No Block Override) - -**Use case:** Add a setting to hide the AI button globally - -#### 1. Go Struct (`pkg/wconfig/settingsconfig.go`) - -```go -type SettingsType struct { - // ... existing fields ... - AppHideAiButton bool `json:"app:hideaibutton,omitempty"` -} -``` - -#### 2. Default Value (`pkg/wconfig/defaultconfig/settings.json`) - -```json -{ - "app:hideaibutton": false -} -``` - -#### 3. Documentation (`docs/docs/config.mdx`) - -```markdown -| app:hideaibutton | bool | Hide the AI button in the tab bar (defaults to false) | -``` - -#### 4. Generate Types - -```bash -task generate -``` - -#### 5. Frontend Usage - -```typescript -import { getSettingsKeyAtom } from "@/store/global"; - -const TabBar = () => { - const hideAiButton = useAtomValue(getSettingsKeyAtom("app:hideaibutton")); - - if (hideAiButton) { - return null; // Don't render AI button - } - - return ; -}; -``` - -#### 6. Usage Examples - -```bash -# Set in settings file -wsh setconfig app:hideaibutton=true - -# Or edit ~/.config/waveterm/settings.json -{ - "app:hideaibutton": true -} -``` - -### Example 2: Terminal Setting with Block Override - -**Use case:** Add a terminal bell sound setting that can be overridden per block - -#### 1. Go Struct (`pkg/wconfig/settingsconfig.go`) - -```go -type SettingsType struct { - // ... existing fields ... - TermBellSound string `json:"term:bellsound,omitempty"` -} -``` - -#### 2. Block Metadata (`pkg/waveobj/wtypemeta.go`) - -```go -type MetaTSType struct { - // ... existing fields ... - TermBellSound *string `json:"term:bellsound,omitempty"` // Pointer for optional override -} -``` - -#### 3. Default Value (`pkg/wconfig/defaultconfig/settings.json`) - -```json -{ - "term:bellsound": "default" -} -``` - -#### 4. Documentation (`docs/docs/config.mdx`) - -```markdown -| term:bellsound | string | Sound to play for terminal bell ("default", "none", or custom sound file path) | -``` - -#### 5. Generate Types - -```bash -task generate -``` - -#### 6. Frontend Usage - -```typescript -import { getOverrideConfigAtom } from "@/store/global"; - -const TerminalView = ({ blockId }: { blockId: string }) => { - // Use override config for hierarchical resolution - const bellSoundAtom = getOverrideConfigAtom(blockId, "term:bellsound"); - const bellSound = useAtomValue(bellSoundAtom) ?? "default"; - - const playBellSound = () => { - if (bellSound === "none") return; - // Play the bell sound - }; - - return
Terminal with bell: {bellSound}
; -}; -``` - -#### 7. Usage Examples - -```bash -# Set globally in settings file -wsh setconfig term:bellsound="custom.wav" - -# Set for current block only -wsh setmeta term:bellsound="none" - -# Set for specific block -wsh setmeta --block BLOCK_ID term:bellsound="beep" - -# Or edit ~/.config/waveterm/settings.json -{ - "term:bellsound": "custom.wav" -} -``` - -## Configuration Patterns - -### Clear/Reset Pattern - -Each namespace can have a "clear" field for resetting all settings in that namespace: - -```go -AppClear bool `json:"app:*,omitempty"` -TermClear bool `json:"term:*,omitempty"` -``` - -### Optional vs Required Settings - -- Use pointer types (`*bool`, `*int64`, `*float64`) for truly optional settings -- Use regular types for settings that should always have a value -- Provide sensible defaults for important settings - -### Block-Level Overrides via RPC - -Settings can be overridden at the block level using metadata: - -```typescript -import { RpcApi } from "@/app/store/wshclientapi"; -import { TabRpcClient } from "@/app/store/wshrpcutil"; -import { WOS } from "@/store/global"; - -// Set block-specific override -await RpcApi.SetMetaCommand(TabRpcClient, { - oref: WOS.makeORef("block", blockId), - meta: { "mynew:setting": "block-specific value" }, -}); -``` - -## Common Pitfalls - -### 1. Forgetting to Run `task generate` - -**Problem:** TypeScript types not updated, schema out of sync - -**Solution:** Always run `task generate` after modifying Go structs - -### 2. Type Mismatch Between Settings and Metadata - -**Problem:** Settings uses `string`, metadata uses `*int` - -**Solution:** Ensure types match (except metadata uses pointers for optionals) - -### 3. Not Providing Fallback Values - -**Problem:** Component breaks if setting is undefined - -**Solution:** Always use `??` operator with fallback: - -```typescript -const value = useAtomValue(getSettingsKeyAtom("key")) ?? "default"; -``` - -### 4. Using Wrong Config Atom - -**Problem:** Using `getSettingsKeyAtom()` for settings that need block overrides - -**Solution:** Use `getOverrideConfigAtom()` for any setting in `MetaTSType` - -## Best Practices - -### Naming - -- **Use descriptive names**: `term:fontsize` not `term:fs` -- **Follow namespace conventions**: Group related settings with common prefix -- **Use consistent casing**: Always lowercase with colons - -### Types - -- **Use `bool`** for simple on/off settings (no pointer if false is default) -- **Use `*bool`** only if you need to distinguish unset from false -- **Use `*int64`/`*float64`** for optional numeric values -- **Use `string`** for text, paths, or enum-like values -- **Use `[]string`** for lists - -### Defaults - -- **Provide sensible defaults** for settings users will commonly change -- **Omit defaults** for advanced/optional settings -- **Keep defaults safe** - don't enable experimental features by default -- **Document defaults** clearly in config.mdx - -### Block Overrides - -- **Enable for view/display settings**: Font sizes, colors, themes, etc. -- **Don't enable for app-wide settings**: Global hotkeys, window behavior, etc. -- **Consider the use case**: Would a user want different values per block or connection? - -### Documentation - -- **Be specific**: Explain what the setting does and what values are valid -- **Provide examples**: Show common use cases -- **Add version badges**: Mark new settings with `` -- **Keep it current**: Update docs when behavior changes - -## Quick Reference - -When adding a new configuration setting: - -- [ ] Add field to `SettingsType` in `pkg/wconfig/settingsconfig.go` -- [ ] Add field to `MetaTSType` in `pkg/waveobj/wtypemeta.go` (if block override needed) -- [ ] Add default to `pkg/wconfig/defaultconfig/settings.json` (if needed) -- [ ] Document in `docs/docs/config.mdx` -- [ ] Run `task generate` to update TypeScript types -- [ ] Use appropriate atom (`getOverrideConfigAtom` or `getSettingsKeyAtom`) in frontend - -## Related Documentation - -- **User Documentation**: `docs/docs/config.mdx` - User-facing configuration docs -- **Type Definitions**: `pkg/wconfig/settingsconfig.go` - Go struct definitions -- **Metadata Types**: `pkg/waveobj/wtypemeta.go` - Block metadata definitions diff --git a/.kilocode/skills/add-rpc/SKILL.md b/.kilocode/skills/add-rpc/SKILL.md deleted file mode 100644 index 0bf5117f9f..0000000000 --- a/.kilocode/skills/add-rpc/SKILL.md +++ /dev/null @@ -1,453 +0,0 @@ ---- -name: add-rpc -description: Guide for adding new RPC calls to Wave Terminal. Use when implementing new RPC commands, adding server-client communication methods, or extending the RPC interface with new functionality. ---- - -# Adding RPC Calls Guide - -## Overview - -Wave Terminal uses a WebSocket-based RPC (Remote Procedure Call) system for communication between different components. The RPC system allows the frontend, backend, electron main process, remote servers, and terminal blocks to communicate with each other through well-defined commands. - -This guide covers how to add a new RPC command to the system. - -## Key Files - -- `pkg/wshrpc/wshrpctypes.go` - RPC interface and type definitions -- `pkg/wshrpc/wshserver/wshserver.go` - Main server implementation (most common) -- `emain/emain-wsh.ts` - Electron main process implementation -- `frontend/app/store/tabrpcclient.ts` - Frontend tab implementation -- `pkg/wshrpc/wshremote/wshremote.go` - Remote server implementation -- `frontend/app/view/term/term-wsh.tsx` - Terminal block implementation - -## RPC Command Structure - -RPC commands in Wave Terminal follow these conventions: - -- **Method names** must end with `Command` -- **First parameter** must be `context.Context` -- **Remaining parameters** are a regular Go parameter list (zero or more typed args) -- **Return values** can be either just an error, or one return value plus an error -- **Streaming commands** return a channel instead of a direct value - -## Adding a New RPC Call - -### Step 1: Define the Command in the Interface - -Add your command to the `WshRpcInterface` in `pkg/wshrpc/wshrpctypes.go`: - -```go -type WshRpcInterface interface { - // ... existing commands ... - - // Add your new command - YourNewCommand(ctx context.Context, data CommandYourNewData) (*YourNewResponse, error) -} -``` - -**Method Signature Rules:** - -- Method name must end with `Command` -- First parameter must be `ctx context.Context` -- Remaining parameters are a regular Go parameter list (zero or more) -- Return either `error` or `(ReturnType, error)` -- For streaming, return `chan RespOrErrorUnion[T]` - -### Step 2: Define Request and Response Types - -If your command needs structured input or output, define types in the same file: - -```go -type CommandYourNewData struct { - FieldOne string `json:"fieldone"` - FieldTwo int `json:"fieldtwo"` - SomeId string `json:"someid"` -} - -type YourNewResponse struct { - ResultField string `json:"resultfield"` - Success bool `json:"success"` -} -``` - -**Type Naming Conventions:** - -- Request types: `Command[Name]Data` (e.g., `CommandGetMetaData`) -- Response types: `[Name]Response` or `Command[Name]RtnData` (e.g., `CommandResolveIdsRtnData`) -- Use `json` struct tags with lowercase field names -- Follow existing patterns in the file for consistency - -### Step 3: Generate Bindings - -After modifying `pkg/wshrpc/wshrpctypes.go`, run code generation to create TypeScript bindings and Go helper code: - -```bash -task generate -``` - -This command will: -- Generate TypeScript type definitions in `frontend/types/gotypes.d.ts` -- Create RPC client bindings -- Update routing code - -**Note:** If generation fails, check that your method signature follows all the rules above. - -### Step 4: Implement the Command - -Choose where to implement your command based on what it needs to do: - -#### A. Main Server Implementation (Most Common) - -Implement in `pkg/wshrpc/wshserver/wshserver.go`: - -```go -func (ws *WshServer) YourNewCommand(ctx context.Context, data wshrpc.CommandYourNewData) (*wshrpc.YourNewResponse, error) { - // Validate input - if data.SomeId == "" { - return nil, fmt.Errorf("someid is required") - } - - // Implement your logic - result := doSomething(data) - - // Return response - return &wshrpc.YourNewResponse{ - ResultField: result, - Success: true, - }, nil -} -``` - -**Use main server when:** -- Accessing the database -- Managing blocks, tabs, or workspaces -- Coordinating between components -- Handling file operations on the main filesystem - -#### B. Electron Implementation - -Implement in `emain/emain-wsh.ts`: - -```typescript -async handle_yournew(rh: RpcResponseHelper, data: CommandYourNewData): Promise { - // Electron-specific logic - const result = await electronAPI.doSomething(data); - - return { - resultfield: result, - success: true, - }; -} -``` - -**Use Electron when:** -- Accessing native OS features -- Managing application windows -- Using Electron APIs (notifications, system tray, etc.) -- Handling encryption/decryption with safeStorage - -#### C. Frontend Tab Implementation - -Implement in `frontend/app/store/tabrpcclient.ts`: - -```typescript -async handle_yournew(rh: RpcResponseHelper, data: CommandYourNewData): Promise { - // Access frontend state/models - const layoutModel = getLayoutModelForStaticTab(); - - // Implement tab-specific logic - const result = layoutModel.doSomething(data); - - return { - resultfield: result, - success: true, - }; -} -``` - -**Use tab client when:** -- Accessing React state or Jotai atoms -- Manipulating UI layout -- Capturing screenshots -- Reading frontend-only data - -#### D. Remote Server Implementation - -Implement in `pkg/wshrpc/wshremote/wshremote.go`: - -```go -func (impl *ServerImpl) RemoteYourNewCommand(ctx context.Context, data wshrpc.CommandRemoteYourNewData) (*wshrpc.YourNewResponse, error) { - // Remote filesystem or process operations - result, err := performRemoteOperation(data) - if err != nil { - return nil, fmt.Errorf("remote operation failed: %w", err) - } - - return &wshrpc.YourNewResponse{ - ResultField: result, - Success: true, - }, nil -} -``` - -**Use remote server when:** -- Operating on remote filesystems -- Executing commands on remote hosts -- Managing remote processes -- Convention: prefix command name with `Remote` (e.g., `RemoteGetInfoCommand`) - -#### E. Terminal Block Implementation - -Implement in `frontend/app/view/term/term-wsh.tsx`: - -```typescript -async handle_yournew(rh: RpcResponseHelper, data: CommandYourNewData): Promise { - // Access terminal-specific data - const termWrap = this.model.termRef.current; - - // Implement terminal logic - const result = termWrap.doSomething(data); - - return { - resultfield: result, - success: true, - }; -} -``` - -**Use terminal client when:** -- Accessing terminal buffer/scrollback -- Managing VDOM contexts -- Reading terminal-specific state -- Interacting with xterm.js - -## Complete Example: Adding GetWaveInfo Command - -### 1. Define Interface - -In `pkg/wshrpc/wshrpctypes.go`: - -```go -type WshRpcInterface interface { - // ... other commands ... - WaveInfoCommand(ctx context.Context) (*WaveInfoData, error) -} - -type WaveInfoData struct { - Version string `json:"version"` - BuildTime string `json:"buildtime"` - ConfigPath string `json:"configpath"` - DataPath string `json:"datapath"` -} -``` - -### 2. Generate Bindings - -```bash -task generate -``` - -### 3. Implement in Main Server - -In `pkg/wshrpc/wshserver/wshserver.go`: - -```go -func (ws *WshServer) WaveInfoCommand(ctx context.Context) (*wshrpc.WaveInfoData, error) { - return &wshrpc.WaveInfoData{ - Version: wavebase.WaveVersion, - BuildTime: wavebase.BuildTime, - ConfigPath: wavebase.GetConfigDir(), - DataPath: wavebase.GetWaveDataDir(), - }, nil -} -``` - -### 4. Call from Frontend - -```typescript -import { RpcApi } from "@/app/store/wshclientapi"; - -// Call the RPC -const info = await RpcApi.WaveInfoCommand(TabRpcClient); -console.log("Wave Version:", info.version); -``` - -## Streaming Commands - -For commands that return data progressively, use channels: - -### Define Streaming Interface - -```go -type WshRpcInterface interface { - StreamYourDataCommand(ctx context.Context, request YourDataRequest) chan RespOrErrorUnion[YourDataType] -} -``` - -### Implement Streaming Command - -```go -func (ws *WshServer) StreamYourDataCommand(ctx context.Context, request wshrpc.YourDataRequest) chan wshrpc.RespOrErrorUnion[wshrpc.YourDataType] { - rtn := make(chan wshrpc.RespOrErrorUnion[wshrpc.YourDataType]) - - go func() { - defer close(rtn) - defer func() { - panichandler.PanicHandler("StreamYourDataCommand", recover()) - }() - - // Stream data - for i := 0; i < 10; i++ { - select { - case <-ctx.Done(): - return - default: - rtn <- wshrpc.RespOrErrorUnion[wshrpc.YourDataType]{ - Response: wshrpc.YourDataType{ - Value: i, - }, - } - time.Sleep(100 * time.Millisecond) - } - } - }() - - return rtn -} -``` - -## Best Practices - -1. **Validation First**: Always validate input parameters at the start of your implementation - -2. **Descriptive Names**: Use clear, action-oriented command names (e.g., `GetFullConfigCommand`, not `ConfigCommand`) - -3. **Error Handling**: Return descriptive errors with context: - ```go - return nil, fmt.Errorf("error creating block: %w", err) - ``` - -4. **Context Awareness**: Respect context cancellation for long-running operations: - ```go - select { - case <-ctx.Done(): - return ctx.Err() - default: - // continue - } - ``` - -5. **Consistent Types**: Follow existing naming patterns for request/response types - -6. **JSON Tags**: Always use lowercase JSON tags matching frontend conventions - -7. **Documentation**: Add comments explaining complex commands or special behaviors - -8. **Type Safety**: Leverage TypeScript generation - your types will be checked on both ends - -9. **Panic Recovery**: Use `panichandler.PanicHandler` in goroutines to prevent crashes - -10. **Route Awareness**: For multi-route scenarios, use `wshutil.GetRpcSourceFromContext(ctx)` to identify callers - -## Common Command Patterns - -### Simple Query - -```go -func (ws *WshServer) GetSomethingCommand(ctx context.Context, id string) (*Something, error) { - obj, err := wstore.DBGet[*Something](ctx, id) - if err != nil { - return nil, fmt.Errorf("error getting something: %w", err) - } - return obj, nil -} -``` - -### Mutation with Updates - -```go -func (ws *WshServer) UpdateSomethingCommand(ctx context.Context, data wshrpc.CommandUpdateData) error { - ctx = waveobj.ContextWithUpdates(ctx) - - // Make changes - err := wstore.UpdateObject(ctx, data.ORef, data.Updates) - if err != nil { - return fmt.Errorf("error updating: %w", err) - } - - // Broadcast updates - updates := waveobj.ContextGetUpdatesRtn(ctx) - wps.Broker.SendUpdateEvents(updates) - - return nil -} -``` - -### Command with Side Effects - -```go -func (ws *WshServer) DoActionCommand(ctx context.Context, data wshrpc.CommandActionData) error { - // Perform action - result, err := performAction(data) - if err != nil { - return err - } - - // Publish event about the action - go func() { - wps.Broker.Publish(wps.WaveEvent{ - Event: wps.Event_ActionComplete, - Data: result, - }) - }() - - return nil -} -``` - -## Troubleshooting - -### Command Not Found - -- Ensure method name ends with `Command` -- Verify you ran `task generate` -- Check that the interface is in `WshRpcInterface` - -### Type Mismatch Errors - -- Run `task generate` after changing types -- Ensure JSON tags are lowercase -- Verify TypeScript code is using generated types - -### Command Times Out - -- Check for blocking operations -- Ensure context is passed through -- Consider using a streaming command for long operations - -### Routing Issues - -- For remote commands, ensure they're implemented in correct location -- Check route configuration in RpcContext -- Verify authentication for secured routes - -## Quick Reference - -When adding a new RPC command: - -- [ ] Add method to `WshRpcInterface` in `pkg/wshrpc/wshrpctypes.go` (must end with `Command`) -- [ ] Define request/response types with JSON tags (if needed) -- [ ] Run `task generate` to create bindings -- [ ] Implement in appropriate location: - - [ ] `wshserver.go` for main server (most common) - - [ ] `emain-wsh.ts` for Electron - - [ ] `tabrpcclient.ts` for frontend - - [ ] `wshremote.go` for remote (prefix with `Remote`) - - [ ] `term-wsh.tsx` for terminal -- [ ] Add input validation -- [ ] Handle errors with context -- [ ] Test the command end-to-end - -## Related Documentation - -- **WPS Events**: See the `wps-events` skill - Publishing events from RPC commands diff --git a/.kilocode/skills/add-wshcmd/SKILL.md b/.kilocode/skills/add-wshcmd/SKILL.md deleted file mode 100644 index 0cdae64702..0000000000 --- a/.kilocode/skills/add-wshcmd/SKILL.md +++ /dev/null @@ -1,921 +0,0 @@ ---- -name: add-wshcmd -description: Guide for adding new wsh commands to Wave Terminal. Use when implementing new CLI commands, adding command-line functionality, or extending the wsh command interface. ---- - -# Adding a New wsh Command to Wave Terminal - -This guide explains how to add a new command to the `wsh` CLI tool. - -## wsh Command System Overview - -Wave Terminal's `wsh` command provides CLI access to Wave Terminal features. The system uses: - -1. **Cobra Framework** - CLI command structure and parsing -2. **Command Files** - Individual command implementations in `cmd/wsh/cmd/wshcmd-*.go` -3. **RPC Client** - Communication with Wave Terminal backend via `RpcClient` -4. **Activity Tracking** - Telemetry for command usage analytics -5. **Documentation** - User-facing docs in `docs/docs/wsh-reference.mdx` - -Commands are registered in their `init()` functions and execute through the Cobra framework. - -## Step-by-Step Guide - -### Step 1: Create Command File - -Create a new file in `cmd/wsh/cmd/` named `wshcmd-[commandname].go`: - -```go -// Copyright 2025, Command Line Inc. -// SPDX-License-Identifier: Apache-2.0 - -package cmd - -import ( - "fmt" - - "github.com/spf13/cobra" - "github.com/wavetermdev/waveterm/pkg/wshrpc" - "github.com/wavetermdev/waveterm/pkg/wshrpc/wshclient" -) - -var myCommandCmd = &cobra.Command{ - Use: "mycommand [args]", - Short: "Brief description of what this command does", - Long: `Detailed description of the command. -Can include multiple lines and examples of usage.`, - RunE: myCommandRun, - PreRunE: preRunSetupRpcClient, // Include if command needs RPC - DisableFlagsInUseLine: true, -} - -// Flag variables -var ( - myCommandFlagExample string - myCommandFlagVerbose bool -) - -func init() { - // Add command to root - rootCmd.AddCommand(myCommandCmd) - - // Define flags - myCommandCmd.Flags().StringVarP(&myCommandFlagExample, "example", "e", "", "example flag description") - myCommandCmd.Flags().BoolVarP(&myCommandFlagVerbose, "verbose", "v", false, "enable verbose output") -} - -func myCommandRun(cmd *cobra.Command, args []string) (rtnErr error) { - // Always track activity for telemetry - defer func() { - sendActivity("mycommand", rtnErr == nil) - }() - - // Validate arguments - if len(args) == 0 { - OutputHelpMessage(cmd) - return fmt.Errorf("requires at least one argument") - } - - // Command implementation - fmt.Printf("Command executed successfully\n") - return nil -} -``` - -**File Naming Convention:** -- Use `wshcmd-[commandname].go` format -- Use lowercase, hyphenated names for multi-word commands -- Examples: `wshcmd-getvar.go`, `wshcmd-setmeta.go`, `wshcmd-ai.go` - -### Step 2: Command Structure - -#### Basic Command Structure - -```go -var myCommandCmd = &cobra.Command{ - Use: "mycommand [required] [optional...]", - Short: "One-line description (shown in help)", - Long: `Detailed multi-line description`, - - // Argument validation - Args: cobra.MinimumNArgs(1), // Or cobra.ExactArgs(1), cobra.NoArgs, etc. - - // Execution function - RunE: myCommandRun, - - // Pre-execution setup (if needed) - PreRunE: preRunSetupRpcClient, // Sets up RPC client for backend communication - - // Example usage (optional) - Example: " wsh mycommand foo\n wsh mycommand --flag bar", - - // Disable flag notation in usage line - DisableFlagsInUseLine: true, -} -``` - -**Key Fields:** -- `Use`: Command name and argument pattern -- `Short`: Brief description for command list -- `Long`: Detailed description shown in help -- `Args`: Argument validator (optional) -- `RunE`: Main execution function (returns error) -- `PreRunE`: Setup function that runs before `RunE` -- `Example`: Usage examples (optional) -- `DisableFlagsInUseLine`: Clean up help display - -#### When to Use PreRunE - -Include `PreRunE: preRunSetupRpcClient` if your command: -- Communicates with the Wave Terminal backend -- Needs access to `RpcClient` -- Requires JWT authentication (WAVETERM_JWT env var) -- Makes RPC calls via `wshclient.*Command()` functions - -**Don't include PreRunE** for commands that: -- Only manipulate local state -- Don't need backend communication -- Are purely informational/local operations - -### Step 3: Implement Command Logic - -#### Command Function Pattern - -```go -func myCommandRun(cmd *cobra.Command, args []string) (rtnErr error) { - // Step 1: Always track activity (for telemetry) - defer func() { - sendActivity("mycommand", rtnErr == nil) - }() - - // Step 2: Validate arguments and flags - if len(args) != 1 { - OutputHelpMessage(cmd) - return fmt.Errorf("requires exactly one argument") - } - - // Step 3: Parse/prepare data - targetArg := args[0] - - // Step 4: Make RPC call if needed - result, err := wshclient.SomeCommand(RpcClient, wshrpc.CommandSomeData{ - Field: targetArg, - }, &wshrpc.RpcOpts{Timeout: 2000}) - if err != nil { - return fmt.Errorf("executing command: %w", err) - } - - // Step 5: Output results - fmt.Printf("Result: %s\n", result) - return nil -} -``` - -**Important Patterns:** - -1. **Activity Tracking**: Always include deferred `sendActivity()` call - ```go - defer func() { - sendActivity("commandname", rtnErr == nil) - }() - ``` - -2. **Error Handling**: Return errors, don't call `os.Exit()` - ```go - if err != nil { - return fmt.Errorf("context: %w", err) - } - ``` - -3. **Output**: Use standard `fmt` package for output - ```go - fmt.Printf("Success message\n") - fmt.Fprintf(os.Stderr, "Error message\n") - ``` - -4. **Help Messages**: Show help when arguments are invalid - ```go - if len(args) == 0 { - OutputHelpMessage(cmd) - return fmt.Errorf("requires arguments") - } - ``` - -5. **Exit Codes**: Set custom exit code via `WshExitCode` - ```go - if notFound { - WshExitCode = 1 - return nil // Don't return error, just set exit code - } - ``` - -### Step 4: Define Flags - -Add flags in the `init()` function: - -```go -var ( - // Declare flag variables at package level - myCommandFlagString string - myCommandFlagBool bool - myCommandFlagInt int -) - -func init() { - rootCmd.AddCommand(myCommandCmd) - - // String flag with short version - myCommandCmd.Flags().StringVarP(&myCommandFlagString, "name", "n", "default", "description") - - // Boolean flag - myCommandCmd.Flags().BoolVarP(&myCommandFlagBool, "verbose", "v", false, "enable verbose") - - // Integer flag - myCommandCmd.Flags().IntVar(&myCommandFlagInt, "count", 10, "set count") - - // Flag without short version - myCommandCmd.Flags().StringVar(&myCommandFlagString, "longname", "", "description") -} -``` - -**Flag Types:** -- `StringVar/StringVarP` - String values -- `BoolVar/BoolVarP` - Boolean flags -- `IntVar/IntVarP` - Integer values -- The `P` suffix versions include a short flag name - -**Flag Naming:** -- Use camelCase for variable names: `myCommandFlagName` -- Use kebab-case for flag names: `--flag-name` -- Prefix variable names with command name for clarity - -### Step 5: Working with Block Arguments - -Many commands operate on blocks. Use the standard block resolution pattern: - -```go -func myCommandRun(cmd *cobra.Command, args []string) (rtnErr error) { - defer func() { - sendActivity("mycommand", rtnErr == nil) - }() - - // Resolve block using the -b/--block flag - fullORef, err := resolveBlockArg() - if err != nil { - return err - } - - // Use the blockid in RPC call - err = wshclient.SomeCommand(RpcClient, wshrpc.CommandSomeData{ - BlockId: fullORef.OID, - }, &wshrpc.RpcOpts{Timeout: 2000}) - if err != nil { - return fmt.Errorf("command failed: %w", err) - } - - return nil -} -``` - -**Block Resolution:** -- The `-b/--block` flag is defined globally in `wshcmd-root.go` -- `resolveBlockArg()` resolves the block argument to a full ORef -- Supports: `this`, `tab`, full UUIDs, 8-char prefixes, block numbers -- Default is `"this"` (current block) - -**Alternative: Manual Block Resolution** - -```go -// Get tab ID from environment -tabId := os.Getenv("WAVETERM_TABID") -if tabId == "" { - return fmt.Errorf("WAVETERM_TABID not set") -} - -// Create route for tab-level operations -route := wshutil.MakeTabRouteId(tabId) - -// Use route in RPC call -err := wshclient.SomeCommand(RpcClient, commandData, &wshrpc.RpcOpts{ - Route: route, - Timeout: 2000, -}) -``` - -### Step 6: Making RPC Calls - -Use the `wshclient` package to make RPC calls: - -```go -import ( - "github.com/wavetermdev/waveterm/pkg/wshrpc" - "github.com/wavetermdev/waveterm/pkg/wshrpc/wshclient" -) - -// Simple RPC call -result, err := wshclient.GetMetaCommand(RpcClient, wshrpc.CommandGetMetaData{ - ORef: *fullORef, -}, &wshrpc.RpcOpts{Timeout: 2000}) -if err != nil { - return fmt.Errorf("getting metadata: %w", err) -} - -// RPC call with routing -err := wshclient.SetMetaCommand(RpcClient, wshrpc.CommandSetMetaData{ - ORef: *fullORef, - Meta: metaMap, -}, &wshrpc.RpcOpts{ - Route: route, - Timeout: 5000, -}) -if err != nil { - return fmt.Errorf("setting metadata: %w", err) -} -``` - -**RPC Options:** -- `Timeout`: Request timeout in milliseconds (typically 2000-5000) -- `Route`: Route ID for targeting specific components -- Available routes: `wshutil.ControlRoute`, `wshutil.MakeTabRouteId(tabId)` - -### Step 7: Add Documentation - -Add your command to `docs/docs/wsh-reference.mdx`: - -````markdown -## mycommand - -Brief description of what the command does. - -```sh -wsh mycommand [args] [flags] -``` - -Detailed explanation of the command's purpose and behavior. - -Flags: -- `-n, --name ` - description of this flag -- `-v, --verbose` - enable verbose output -- `-b, --block ` - specify target block (default: current block) - -Examples: - -```sh -# Basic usage -wsh mycommand arg1 - -# With flags -wsh mycommand --name value arg1 - -# With block targeting -wsh mycommand -b 2 arg1 - -# Complex example -wsh mycommand -v --name "example" arg1 arg2 -``` - -Additional notes, tips, or warnings about the command. - ---- -```` - -**Documentation Guidelines:** -- Place in alphabetical order with other commands -- Include command signature with argument pattern -- List all flags with short and long versions -- Provide practical examples (at least 3-5) -- Explain common use cases and patterns -- Add tips or warnings if relevant -- Use `---` separator between commands - -### Step 8: Test Your Command - -Build and test the command: - -```bash -# Build wsh -task build:wsh - -# Or build everything -task build - -# Test the command -./bin/wsh/wsh mycommand --help -./bin/wsh/wsh mycommand arg1 arg2 -``` - -**Testing Checklist:** -- [ ] Help message displays correctly -- [ ] Required arguments validated -- [ ] Flags work as expected -- [ ] Error messages are clear -- [ ] Success cases work correctly -- [ ] RPC calls complete successfully -- [ ] Output is formatted correctly - -## Complete Examples - -### Example 1: Simple Command with No RPC - -**Use case:** A command that prints Wave Terminal version info - -#### Command File (`cmd/wsh/cmd/wshcmd-version.go`) - -```go -// Copyright 2025, Command Line Inc. -// SPDX-License-Identifier: Apache-2.0 - -package cmd - -import ( - "github.com/spf13/cobra" - "github.com/wavetermdev/waveterm/pkg/wavebase" -) - -var versionCmd = &cobra.Command{ - Use: "version", - Short: "Print Wave Terminal version", - RunE: versionRun, -} - -func init() { - rootCmd.AddCommand(versionCmd) -} - -func versionRun(cmd *cobra.Command, args []string) (rtnErr error) { - defer func() { - sendActivity("version", rtnErr == nil) - }() - - fmt.Printf("Wave Terminal %s\n", wavebase.WaveVersion) - return nil -} -``` - -#### Documentation - -````markdown -## version - -Print the current Wave Terminal version. - -```sh -wsh version -``` - -Examples: - -```sh -# Print version -wsh version -``` -```` - -### Example 2: Command with Flags and RPC - -**Use case:** A command to update block title - -#### Command File (`cmd/wsh/cmd/wshcmd-settitle.go`) - -```go -// Copyright 2025, Command Line Inc. -// SPDX-License-Identifier: Apache-2.0 - -package cmd - -import ( - "fmt" - - "github.com/spf13/cobra" - "github.com/wavetermdev/waveterm/pkg/wshrpc" - "github.com/wavetermdev/waveterm/pkg/wshrpc/wshclient" -) - -var setTitleCmd = &cobra.Command{ - Use: "settitle [title]", - Short: "Set block title", - Long: `Set the title for the current or specified block.`, - Args: cobra.ExactArgs(1), - RunE: setTitleRun, - PreRunE: preRunSetupRpcClient, - DisableFlagsInUseLine: true, -} - -var setTitleIcon string - -func init() { - rootCmd.AddCommand(setTitleCmd) - setTitleCmd.Flags().StringVarP(&setTitleIcon, "icon", "i", "", "set block icon") -} - -func setTitleRun(cmd *cobra.Command, args []string) (rtnErr error) { - defer func() { - sendActivity("settitle", rtnErr == nil) - }() - - title := args[0] - - // Resolve block - fullORef, err := resolveBlockArg() - if err != nil { - return err - } - - // Build metadata map - meta := make(map[string]interface{}) - meta["title"] = title - if setTitleIcon != "" { - meta["icon"] = setTitleIcon - } - - // Make RPC call - err = wshclient.SetMetaCommand(RpcClient, wshrpc.CommandSetMetaData{ - ORef: *fullORef, - Meta: meta, - }, &wshrpc.RpcOpts{Timeout: 2000}) - if err != nil { - return fmt.Errorf("setting title: %w", err) - } - - fmt.Printf("title updated\n") - return nil -} -``` - -#### Documentation - -````markdown -## settitle - -Set the title for a block. - -```sh -wsh settitle [title] -``` - -Update the display title for the current or specified block. Optionally set an icon as well. - -Flags: -- `-i, --icon ` - set block icon along with title -- `-b, --block ` - specify target block (default: current block) - -Examples: - -```sh -# Set title for current block -wsh settitle "My Terminal" - -# Set title and icon -wsh settitle --icon "terminal" "Development Shell" - -# Set title for specific block -wsh settitle -b 2 "Build Output" -``` -```` - -### Example 3: Subcommands - -**Use case:** Command with multiple subcommands (like `wsh conn`) - -#### Command File (`cmd/wsh/cmd/wshcmd-mygroup.go`) - -```go -// Copyright 2025, Command Line Inc. -// SPDX-License-Identifier: Apache-2.0 - -package cmd - -import ( - "fmt" - - "github.com/spf13/cobra" - "github.com/wavetermdev/waveterm/pkg/wshrpc" - "github.com/wavetermdev/waveterm/pkg/wshrpc/wshclient" -) - -var myGroupCmd = &cobra.Command{ - Use: "mygroup", - Short: "Manage something", -} - -var myGroupListCmd = &cobra.Command{ - Use: "list", - Short: "List items", - RunE: myGroupListRun, - PreRunE: preRunSetupRpcClient, -} - -var myGroupAddCmd = &cobra.Command{ - Use: "add [name]", - Short: "Add an item", - Args: cobra.ExactArgs(1), - RunE: myGroupAddRun, - PreRunE: preRunSetupRpcClient, -} - -func init() { - // Add parent command - rootCmd.AddCommand(myGroupCmd) - - // Add subcommands - myGroupCmd.AddCommand(myGroupListCmd) - myGroupCmd.AddCommand(myGroupAddCmd) -} - -func myGroupListRun(cmd *cobra.Command, args []string) (rtnErr error) { - defer func() { - sendActivity("mygroup:list", rtnErr == nil) - }() - - // Implementation - fmt.Printf("Listing items...\n") - return nil -} - -func myGroupAddRun(cmd *cobra.Command, args []string) (rtnErr error) { - defer func() { - sendActivity("mygroup:add", rtnErr == nil) - }() - - name := args[0] - fmt.Printf("Adding item: %s\n", name) - return nil -} -``` - -#### Documentation - -````markdown -## mygroup - -Manage something with subcommands. - -### list - -List all items. - -```sh -wsh mygroup list -``` - -### add - -Add a new item. - -```sh -wsh mygroup add [name] -``` - -Examples: - -```sh -# List items -wsh mygroup list - -# Add an item -wsh mygroup add "new-item" -``` -```` - -## Common Patterns - -### Reading from Stdin - -```go -import "io" - -func myCommandRun(cmd *cobra.Command, args []string) (rtnErr error) { - defer func() { - sendActivity("mycommand", rtnErr == nil) - }() - - // Check if reading from stdin (using "-" convention) - var data []byte - var err error - - if len(args) > 0 && args[0] == "-" { - data, err = io.ReadAll(os.Stdin) - if err != nil { - return fmt.Errorf("reading stdin: %w", err) - } - } else { - // Read from file or other source - data, err = os.ReadFile(args[0]) - if err != nil { - return fmt.Errorf("reading file: %w", err) - } - } - - // Process data - fmt.Printf("Read %d bytes\n", len(data)) - return nil -} -``` - -### JSON File Input - -```go -import ( - "encoding/json" - "io" -) - -func loadJSONFile(filepath string) (map[string]interface{}, error) { - var data []byte - var err error - - if filepath == "-" { - data, err = io.ReadAll(os.Stdin) - if err != nil { - return nil, fmt.Errorf("reading stdin: %w", err) - } - } else { - data, err = os.ReadFile(filepath) - if err != nil { - return nil, fmt.Errorf("reading file: %w", err) - } - } - - var result map[string]interface{} - if err := json.Unmarshal(data, &result); err != nil { - return nil, fmt.Errorf("parsing JSON: %w", err) - } - - return result, nil -} -``` - -### Conditional Output (TTY Detection) - -```go -func myCommandRun(cmd *cobra.Command, args []string) (rtnErr error) { - defer func() { - sendActivity("mycommand", rtnErr == nil) - }() - - isTty := getIsTty() - - // Output value - fmt.Printf("%s", value) - - // Add newline only if TTY (for better piping experience) - if isTty { - fmt.Printf("\n") - } - - return nil -} -``` - -### Environment Variable Access - -```go -func myCommandRun(cmd *cobra.Command, args []string) (rtnErr error) { - defer func() { - sendActivity("mycommand", rtnErr == nil) - }() - - // Get block ID from environment - blockId := os.Getenv("WAVETERM_BLOCKID") - if blockId == "" { - return fmt.Errorf("WAVETERM_BLOCKID not set") - } - - // Get tab ID from environment - tabId := os.Getenv("WAVETERM_TABID") - if tabId == "" { - return fmt.Errorf("WAVETERM_TABID not set") - } - - fmt.Printf("Block: %s, Tab: %s\n", blockId, tabId) - return nil -} -``` - -## Best Practices - -### Command Design - -1. **Single Responsibility**: Each command should do one thing well -2. **Composable**: Design commands to work with pipes and other commands -3. **Consistent**: Follow existing wsh command patterns and conventions -4. **Documented**: Provide clear help text and examples - -### Error Handling - -1. **Context**: Wrap errors with context using `fmt.Errorf("context: %w", err)` -2. **User-Friendly**: Make error messages clear and actionable -3. **No Panics**: Return errors instead of calling `os.Exit()` or `panic()` -4. **Exit Codes**: Use `WshExitCode` for custom exit codes - -### Output - -1. **Structured**: Use consistent formatting for output -2. **Quiet by Default**: Only output what's necessary -3. **Verbose Flag**: Optionally provide `-v` for detailed output -4. **Stderr for Errors**: Use `fmt.Fprintf(os.Stderr, ...)` for error messages - -### Flags - -1. **Short Versions**: Provide `-x` short versions for common flags -2. **Sensible Defaults**: Choose defaults that work for most users -3. **Boolean Flags**: Use for on/off options -4. **String Flags**: Use for values that need user input - -### RPC Calls - -1. **Timeouts**: Always specify reasonable timeouts -2. **Error Context**: Wrap RPC errors with operation context -3. **Retries**: Don't retry automatically; let user retry command -4. **Routes**: Use appropriate routes for different operations - -## Common Pitfalls - -### 1. Forgetting Activity Tracking - -**Problem**: Command usage not tracked in telemetry - -**Solution**: Always include deferred `sendActivity()` call: -```go -defer func() { - sendActivity("commandname", rtnErr == nil) -}() -``` - -### 2. Using os.Exit() Instead of Returning Error - -**Problem**: Breaks defer statements and cleanup - -**Solution**: Return errors from RunE function: -```go -// Bad -if err != nil { - fmt.Fprintf(os.Stderr, "error: %v\n", err) - os.Exit(1) -} - -// Good -if err != nil { - return fmt.Errorf("operation failed: %w", err) -} -``` - -### 3. Not Validating Arguments - -**Problem**: Command crashes with nil pointer or index out of range - -**Solution**: Validate arguments early and show help: -```go -if len(args) == 0 { - OutputHelpMessage(cmd) - return fmt.Errorf("requires at least one argument") -} -``` - -### 4. Forgetting to Add to init() - -**Problem**: Command not available when running wsh - -**Solution**: Always add command in `init()` function: -```go -func init() { - rootCmd.AddCommand(myCommandCmd) -} -``` - -### 5. Inconsistent Output - -**Problem**: Inconsistent use of output methods - -**Solution**: Use standard `fmt` package functions: -```go -// For stdout -fmt.Printf("output\n") - -// For stderr -fmt.Fprintf(os.Stderr, "error message\n") -``` - -## Quick Reference Checklist - -When adding a new wsh command: - -- [ ] Create `cmd/wsh/cmd/wshcmd-[commandname].go` -- [ ] Define command struct with Use, Short, Long descriptions -- [ ] Add `PreRunE: preRunSetupRpcClient` if using RPC -- [ ] Implement command function with activity tracking -- [ ] Add command to `rootCmd` in `init()` function -- [ ] Define flags in `init()` function if needed -- [ ] Add documentation to `docs/docs/wsh-reference.mdx` -- [ ] Build and test: `task build:wsh` -- [ ] Test help: `wsh [commandname] --help` -- [ ] Test all flag combinations -- [ ] Test error cases - -## Related Files - -- **Root Command**: `cmd/wsh/cmd/wshcmd-root.go` - Main command setup and utilities -- **RPC Client**: `pkg/wshrpc/wshclient/` - Client functions for RPC calls -- **RPC Types**: `pkg/wshrpc/wshrpctypes.go` - RPC request/response data structures -- **Documentation**: `docs/docs/wsh-reference.mdx` - User-facing command reference -- **Examples**: `cmd/wsh/cmd/wshcmd-*.go` - Existing command implementations diff --git a/.kilocode/skills/context-menu/SKILL.md b/.kilocode/skills/context-menu/SKILL.md deleted file mode 100644 index dda3b7b985..0000000000 --- a/.kilocode/skills/context-menu/SKILL.md +++ /dev/null @@ -1,160 +0,0 @@ ---- -name: context-menu -description: Guide for creating and displaying context menus in Wave Terminal. Use when implementing right-click menus, adding context menu items, creating submenus, or handling menu interactions with checkboxes and separators. ---- - -# Context Menu Quick Reference - -This guide provides a quick overview of how to create and display a context menu using our system. - ---- - -## ContextMenuItem Type - -Define each menu item using the `ContextMenuItem` type: - -```ts -type ContextMenuItem = { - label?: string; - type?: "separator" | "normal" | "submenu" | "checkbox" | "radio"; - role?: string; // Electron role (optional) - click?: () => void; // Callback for item selection (not needed if role is set) - submenu?: ContextMenuItem[]; // For nested menus - checked?: boolean; // For checkbox or radio items - visible?: boolean; - enabled?: boolean; - sublabel?: string; -}; -``` - ---- - -## Import and Show the Menu - -Import the context menu module: - -```ts -import { ContextMenuModel } from "@/app/store/contextmenu"; -``` - -To display the context menu, call: - -```ts -ContextMenuModel.getInstance().showContextMenu(menu, event); -``` - -- **menu**: An array of `ContextMenuItem`. -- **event**: The mouse event that triggered the context menu (typically from an onContextMenu handler). - ---- - -## Basic Example - -A simple context menu with a separator: - -```ts -const menu: ContextMenuItem[] = [ - { - label: "New File", - click: () => { - /* create a new file */ - }, - }, - { - label: "New Folder", - click: () => { - /* create a new folder */ - }, - }, - { type: "separator" }, - { - label: "Rename", - click: () => { - /* rename item */ - }, - }, -]; - -ContextMenuModel.getInstance().showContextMenu(menu, e); -``` - ---- - -## Example with Submenu and Checkboxes - -Toggle settings using a submenu with checkbox items: - -```ts -const isClearOnStart = true; // Example setting - -const menu: ContextMenuItem[] = [ - { - label: "Clear Output On Restart", - submenu: [ - { - label: "On", - type: "checkbox", - checked: isClearOnStart, - click: () => { - // Set the config to enable clear on restart - }, - }, - { - label: "Off", - type: "checkbox", - checked: !isClearOnStart, - click: () => { - // Set the config to disable clear on restart - }, - }, - ], - }, -]; - -ContextMenuModel.getInstance().showContextMenu(menu, e); -``` - ---- - -## Editing a Config File Example - -Open a configuration file (e.g., `widgets.json`) in preview mode: - -```ts -{ - label: "Edit widgets.json", - click: () => { - fireAndForget(async () => { - const path = `${getApi().getConfigDir()}/widgets.json`; - const blockDef: BlockDef = { - meta: { view: "preview", file: path }, - }; - await createBlock(blockDef, false, true); - }); - }, -} -``` - ---- - -## Summary - -- **Menu Definition**: Use the `ContextMenuItem` type. -- **Actions**: Use `click` for actions; use `submenu` for nested options. -- **Separators**: Use `type: "separator"` to group items. -- **Toggles**: Use `type: "checkbox"` or `"radio"` with the `checked` property. -- **Displaying**: Use `ContextMenuModel.getInstance().showContextMenu(menu, event)` to render the menu. - -## Common Use Cases - -### File/Folder Operations -Context menus are commonly used for file operations like creating, renaming, and deleting files or folders. - -### Settings Toggles -Use checkbox menu items to toggle settings on and off, with the `checked` property reflecting the current state. - -### Nested Options -Use `submenu` to organize related options hierarchically, keeping the top-level menu clean and organized. - -### Conditional Items -Use the `visible` and `enabled` properties to dynamically show or disable menu items based on the current state. diff --git a/.kilocode/skills/create-view/SKILL.md b/.kilocode/skills/create-view/SKILL.md deleted file mode 100644 index 49049ca9e5..0000000000 --- a/.kilocode/skills/create-view/SKILL.md +++ /dev/null @@ -1,520 +0,0 @@ ---- -name: create-view -description: Guide for implementing a new view type in Wave Terminal. Use when creating a new view component, implementing the ViewModel interface, registering a new view type in BlockRegistry, or adding a new content type to display within blocks. ---- - -# Creating a New View in Wave Terminal - -This guide explains how to implement a new view type in Wave Terminal. Views are the core content components displayed within blocks in the terminal interface. - -## Architecture Overview - -Wave Terminal uses a **Model-View architecture** where: - -- **ViewModel** - Contains all state, logic, and UI configuration as Jotai atoms -- **ViewComponent** - Pure React component that renders the UI using the model -- **BlockFrame** - Wraps views with a header, connection management, and standard controls - -The separation between model and component ensures: - -- Models can update state without React hooks -- Components remain pure and testable -- State is centralized in Jotai atoms for easy access - -## ViewModel Interface - -Every view must implement the `ViewModel` interface defined in `frontend/types/custom.d.ts`: - -```typescript -interface ViewModel { - // Required: The type identifier for this view (e.g., "term", "web", "preview") - viewType: string; - - // Required: The React component that renders this view - viewComponent: ViewComponent; - - // Optional: Icon shown in block header (FontAwesome icon name or IconButtonDecl) - viewIcon?: jotai.Atom; - - // Optional: Display name shown in block header (e.g., "Terminal", "Web", "Preview") - viewName?: jotai.Atom; - - // Optional: Additional header elements (text, buttons, inputs) shown after the name - viewText?: jotai.Atom; - - // Optional: Icon button shown before the view name in header - preIconButton?: jotai.Atom; - - // Optional: Icon buttons shown at the end of the header (before settings/close) - endIconButtons?: jotai.Atom; - - // Optional: Custom background styling for the block - blockBg?: jotai.Atom; - - // Optional: If true, completely hides the block header - noHeader?: jotai.Atom; - - // Optional: If true, shows connection picker in header for remote connections - manageConnection?: jotai.Atom; - - // Optional: If true, filters out 'nowsh' connections from connection picker - filterOutNowsh?: jotai.Atom; - - // Optional: If true, removes default padding from content area - noPadding?: jotai.Atom; - - // Optional: Atoms for managing in-block search functionality - searchAtoms?: SearchAtoms; - - // Optional: Returns whether this is a basic terminal (for multi-input feature) - isBasicTerm?: (getFn: jotai.Getter) => boolean; - - // Optional: Returns context menu items for the settings dropdown - getSettingsMenuItems?: () => ContextMenuItem[]; - - // Optional: Focuses the view when called, returns true if successful - giveFocus?: () => boolean; - - // Optional: Handles keyboard events, returns true if handled - keyDownHandler?: (e: WaveKeyboardEvent) => boolean; - - // Optional: Cleanup when block is closed - dispose?: () => void; -} -``` - -### Key Concepts - -**Atoms**: All UI-related properties must be Jotai atoms. This enables: - -- Reactive updates when state changes -- Access from anywhere via `globalStore.get()`/`globalStore.set()` -- Derived atoms that compute values from other atoms - -**ViewComponent**: The React component receives these props: - -```typescript -type ViewComponentProps = { - blockId: string; // Unique ID for this block - blockRef: React.RefObject; // Ref to block container - contentRef: React.RefObject; // Ref to content area - model: T; // Your ViewModel instance -}; -``` - -## Step-by-Step Guide - -### 1. Create the View Model Class - -Create a new file for your view model (e.g., `frontend/app/view/myview/myview-model.ts`): - -```typescript -import { BlockNodeModel } from "@/app/block/blocktypes"; -import { globalStore } from "@/app/store/jotaiStore"; -import { WOS, useBlockAtom } from "@/store/global"; -import * as jotai from "jotai"; -import { MyView } from "./myview"; - -export class MyViewModel implements ViewModel { - viewType: string; - blockId: string; - nodeModel: BlockNodeModel; - blockAtom: jotai.Atom; - - // Define your atoms (simple field initializers) - viewIcon = jotai.atom("circle"); - viewName = jotai.atom("My View"); - noPadding = jotai.atom(true); - - // Derived atom (created in constructor) - viewText!: jotai.Atom; - - constructor(blockId: string, nodeModel: BlockNodeModel) { - this.viewType = "myview"; - this.blockId = blockId; - this.nodeModel = nodeModel; - this.blockAtom = WOS.getWaveObjectAtom(`block:${blockId}`); - - // Create derived atoms that depend on block data or other atoms - this.viewText = jotai.atom((get) => { - const blockData = get(this.blockAtom); - const rtn: HeaderElem[] = []; - - // Add header buttons/text based on state - rtn.push({ - elemtype: "iconbutton", - icon: "refresh", - title: "Refresh", - click: () => this.refresh(), - }); - - return rtn; - }); - } - - get viewComponent(): ViewComponent { - return MyView; - } - - refresh() { - // Update state using globalStore - // Never use React hooks in model methods - console.log("refreshing..."); - } - - giveFocus(): boolean { - // Focus your view component - return true; - } - - dispose() { - // Cleanup resources (unsubscribe from events, etc.) - } -} -``` - -### 2. Create the View Component - -Create your React component (e.g., `frontend/app/view/myview/myview.tsx`): - -```typescript -import { ViewComponentProps } from "@/app/block/blocktypes"; -import { MyViewModel } from "./myview-model"; -import { useAtomValue } from "jotai"; -import "./myview.scss"; - -export const MyView: React.FC> = ({ - blockId, - model, - contentRef -}) => { - // Use atoms from the model (these are React hooks - call at top level!) - const blockData = useAtomValue(model.blockAtom); - - return ( -
-
Block ID: {blockId}
-
View: {model.viewType}
- {/* Your view content here */} -
- ); -}; -``` - -### 3. Register the View - -Add your view to the `BlockRegistry` in `frontend/app/block/blockregistry.ts`: - -```typescript -import { MyViewModel } from "@/app/view/myview/myview-model"; - -const BlockRegistry: Map = new Map(); -BlockRegistry.set("term", TermViewModel); -BlockRegistry.set("preview", PreviewModel); -BlockRegistry.set("web", WebViewModel); -// ... existing registrations ... -BlockRegistry.set("myview", MyViewModel); // Add your view here -``` - -The registry key (e.g., `"myview"`) becomes the view type used in block metadata. - -### 4. Create Blocks with Your View - -Users can create blocks with your view type: - -- Via CLI: `wsh view myview` -- Via RPC: Use the block's `meta.view` field set to `"myview"` - -## Real-World Examples - -### Example 1: Terminal View (`term-model.ts`) - -The terminal view demonstrates: - -- **Connection management** via `manageConnection` atom -- **Dynamic header buttons** showing shell status (play/restart) -- **Mode switching** between terminal and vdom views -- **Custom keyboard handling** for terminal-specific shortcuts -- **Focus management** to focus the xterm.js instance -- **Shell integration status** showing AI capability indicators - -Key features: - -```typescript -this.manageConnection = jotai.atom((get) => { - const termMode = get(this.termMode); - if (termMode == "vdom") return false; - return true; // Show connection picker for regular terminal mode -}); - -this.endIconButtons = jotai.atom((get) => { - const shellProcStatus = get(this.shellProcStatus); - const buttons: IconButtonDecl[] = []; - - if (shellProcStatus == "running") { - buttons.push({ - elemtype: "iconbutton", - icon: "refresh", - title: "Restart Shell", - click: this.forceRestartController.bind(this), - }); - } - return buttons; -}); -``` - -### Example 2: Web View (`webview.tsx`) - -The web view shows: - -- **Complex header controls** (back/forward/home/URL input) -- **State management** for loading, URL, and navigation -- **Event handling** for webview navigation events -- **Custom styling** with `noPadding` for full-bleed content -- **Media controls** showing play/pause/mute when media is active - -Key features: - -```typescript -this.viewText = jotai.atom((get) => { - const url = get(this.url); - const rtn: HeaderElem[] = []; - - // Navigation buttons - rtn.push({ - elemtype: "iconbutton", - icon: "chevron-left", - click: this.handleBack.bind(this), - disabled: this.shouldDisableBackButton(), - }); - - // URL input with nested controls - rtn.push({ - elemtype: "div", - className: "block-frame-div-url", - children: [ - { - elemtype: "input", - value: url, - onChange: this.handleUrlChange.bind(this), - onKeyDown: this.handleKeyDown.bind(this), - }, - { - elemtype: "iconbutton", - icon: "rotate-right", - click: this.handleRefresh.bind(this), - }, - ], - }); - - return rtn; -}); -``` - -## Header Elements (`HeaderElem`) - -The `viewText` atom can return an array of these element types: - -```typescript -// Icon button -{ - elemtype: "iconbutton", - icon: "refresh", - title: "Tooltip text", - click: () => { /* handler */ }, - disabled?: boolean, - iconColor?: string, - iconSpin?: boolean, - noAction?: boolean, // Shows icon but no click action -} - -// Text element -{ - elemtype: "text", - text: "Display text", - className?: string, - noGrow?: boolean, - ref?: React.RefObject, - onClick?: (e: React.MouseEvent) => void, -} - -// Text button -{ - elemtype: "textbutton", - text: "Button text", - className?: string, - title: "Tooltip", - onClick: (e: React.MouseEvent) => void, -} - -// Input field -{ - elemtype: "input", - value: string, - className?: string, - onChange: (e: React.ChangeEvent) => void, - onKeyDown?: (e: React.KeyboardEvent) => void, - onFocus?: (e: React.FocusEvent) => void, - onBlur?: (e: React.FocusEvent) => void, - ref?: React.RefObject, -} - -// Container with children -{ - elemtype: "div", - className?: string, - children: HeaderElem[], - onMouseOver?: (e: React.MouseEvent) => void, - onMouseOut?: (e: React.MouseEvent) => void, -} - -// Menu button (dropdown) -{ - elemtype: "menubutton", - // ... MenuButtonProps ... -} -``` - -## Best Practices - -### Jotai Model Pattern - -Follow these rules for Jotai atoms in models: - -1. **Simple atoms as field initializers**: - - ```typescript - viewIcon = jotai.atom("circle"); - noPadding = jotai.atom(true); - ``` - -2. **Derived atoms in constructor** (need dependency on other atoms): - - ```typescript - constructor(blockId: string, nodeModel: BlockNodeModel) { - this.viewText = jotai.atom((get) => { - const blockData = get(this.blockAtom); - return [/* computed based on blockData */]; - }); - } - ``` - -3. **Models never use React hooks** - Use `globalStore.get()`/`set()`: - - ```typescript - refresh() { - const currentData = globalStore.get(this.blockAtom); - globalStore.set(this.dataAtom, newData); - } - ``` - -4. **Components use hooks for atoms**: - ```typescript - const data = useAtomValue(model.dataAtom); - const [value, setValue] = useAtom(model.valueAtom); - ``` - -### State Management - -- All view state should live in atoms on the model -- Use `useBlockAtom()` helper for block-scoped atoms that persist -- Use `globalStore` for imperative access outside React components -- Subscribe to Wave events using `waveEventSubscribe()` - -### Styling - -- Create a `.scss` file for your view styles -- Use Tailwind utilities where possible (v4) -- Add `noPadding: atom(true)` for full-bleed content -- Use `blockBg` atom to customize block background - -### Focus Management - -Implement `giveFocus()` to focus your view when: - -- Block gains focus via keyboard navigation -- User clicks the block -- Return `true` if successfully focused, `false` otherwise - -### Keyboard Handling - -Implement `keyDownHandler(e: WaveKeyboardEvent)` for: - -- View-specific keyboard shortcuts -- Return `true` if event was handled (prevents propagation) -- Use `keyutil.checkKeyPressed(waveEvent, "Cmd:K")` for shortcut checks - -### Cleanup - -Implement `dispose()` to: - -- Unsubscribe from Wave events -- Unregister routes/handlers -- Clear timers/intervals -- Release resources - -### Connection Management - -For views that need remote connections: - -```typescript -this.manageConnection = jotai.atom(true); // Show connection picker -this.filterOutNowsh = jotai.atom(true); // Hide nowsh connections -``` - -Access connection status: - -```typescript -const connStatus = jotai.atom((get) => { - const blockData = get(this.blockAtom); - const connName = blockData?.meta?.connection; - return get(getConnStatusAtom(connName)); -}); -``` - -## Common Patterns - -### Reading Block Metadata - -```typescript -import { getBlockMetaKeyAtom } from "@/store/global"; - -// In constructor: -this.someFlag = getBlockMetaKeyAtom(blockId, "myview:flag"); - -// In component: -const flag = useAtomValue(model.someFlag); -``` - -### Configuration Overrides - -Wave has a hierarchical config system (global → connection → block): - -```typescript -import { getOverrideConfigAtom } from "@/store/global"; - -this.settingAtom = jotai.atom((get) => { - // Checks block meta, then connection config, then global settings - return get(getOverrideConfigAtom(this.blockId, "myview:setting")) ?? defaultValue; -}); -``` - -### Updating Block Metadata - -```typescript -import { RpcApi } from "@/app/store/wshclientapi"; -import { TabRpcClient } from "@/app/store/wshrpcutil"; -import { WOS } from "@/store/global"; - -await RpcApi.SetMetaCommand(TabRpcClient, { - oref: WOS.makeORef("block", this.blockId), - meta: { "myview:key": value }, -}); -``` - -## Additional Resources - -- `frontend/app/block/blockframe-header.tsx` - Block header rendering -- `frontend/app/view/term/term-model.ts` - Complex view example -- `frontend/app/view/webview/webview.tsx` - Navigation UI example -- `frontend/types/custom.d.ts` - Type definitions diff --git a/.kilocode/skills/electron-api/SKILL.md b/.kilocode/skills/electron-api/SKILL.md deleted file mode 100644 index 0014e82a50..0000000000 --- a/.kilocode/skills/electron-api/SKILL.md +++ /dev/null @@ -1,182 +0,0 @@ ---- -name: electron-api -description: Guide for adding new Electron APIs to Wave Terminal. Use when implementing new frontend-to-electron communications via preload/IPC. ---- - -# Adding Electron APIs - -Electron APIs allow the frontend to call Electron main process functionality directly via IPC. - -## Four Files to Edit - -1. [`frontend/types/custom.d.ts`](frontend/types/custom.d.ts) - TypeScript [`ElectronApi`](frontend/types/custom.d.ts:82) type -2. [`emain/preload.ts`](emain/preload.ts) - Expose method via `contextBridge` -3. [`emain/emain-ipc.ts`](emain/emain-ipc.ts) - Implement IPC handler -4. [`frontend/preview/preview-electron-api.ts`](frontend/preview/preview-electron-api.ts) - Add a no-op stub to keep the `previewElectronApi` object in sync with the `ElectronApi` type - -## Three Communication Patterns - -1. **Sync** - `ipcRenderer.sendSync()` + `ipcMain.on()` + `event.returnValue = ...` -2. **Async** - `ipcRenderer.invoke()` + `ipcMain.handle()` -3. **Fire-and-forget** - `ipcRenderer.send()` + `ipcMain.on()` - -## Example: Async Method - -### 1. Define TypeScript Interface - -In [`frontend/types/custom.d.ts`](frontend/types/custom.d.ts): - -```typescript -type ElectronApi = { - captureScreenshot: (rect: Electron.Rectangle) => Promise; // capture-screenshot -}; -``` - -### 2. Expose in Preload - -In [`emain/preload.ts`](emain/preload.ts): - -```typescript -contextBridge.exposeInMainWorld("api", { - captureScreenshot: (rect: Rectangle) => ipcRenderer.invoke("capture-screenshot", rect), -}); -``` - -### 3. Implement Handler - -In [`emain/emain-ipc.ts`](emain/emain-ipc.ts): - -```typescript -electron.ipcMain.handle("capture-screenshot", async (event, rect) => { - const tabView = getWaveTabViewByWebContentsId(event.sender.id); - if (!tabView) throw new Error("No tab view found"); - const image = await tabView.webContents.capturePage(rect); - return `data:image/png;base64,${image.toPNG().toString("base64")}`; -}); -``` - -### 4. Add Preview Stub - -In [`frontend/preview/preview-electron-api.ts`](frontend/preview/preview-electron-api.ts): - -```typescript -captureScreenshot: (_rect: Electron.Rectangle) => Promise.resolve(""), -``` - -### 5. Call from Frontend - -```typescript -import { getApi } from "@/store/global"; - -const dataUrl = await getApi().captureScreenshot({ x: 0, y: 0, width: 800, height: 600 }); -``` - -## Example: Sync Method - -### 1. Define - -```typescript -type ElectronApi = { - getUserName: () => string; // get-user-name -}; -``` - -### 2. Preload - -```typescript -getUserName: () => ipcRenderer.sendSync("get-user-name"), -``` - -### 3. Handler (⚠️ MUST set event.returnValue or browser hangs) - -```typescript -electron.ipcMain.on("get-user-name", (event) => { - event.returnValue = process.env.USER || "unknown"; -}); -``` - -### 4. Call - -```typescript -import { getApi } from "@/store/global"; - -const userName = getApi().getUserName(); // blocks until returns -``` - -## Example: Fire-and-Forget - -### 1. Define - -```typescript -type ElectronApi = { - openExternal: (url: string) => void; // open-external -}; -``` - -### 2. Preload - -```typescript -openExternal: (url) => ipcRenderer.send("open-external", url), -``` - -### 3. Handler - -```typescript -electron.ipcMain.on("open-external", (event, url) => { - electron.shell.openExternal(url); -}); -``` - -## Example: Event Listener - -### 1. Define - -```typescript -type ElectronApi = { - onZoomFactorChange: (callback: (zoomFactor: number) => void) => void; // zoom-factor-change -}; -``` - -### 2. Preload - -```typescript -onZoomFactorChange: (callback) => - ipcRenderer.on("zoom-factor-change", (_event, zoomFactor) => callback(zoomFactor)), -``` - -### 3. Send from Main - -```typescript -webContents.send("zoom-factor-change", newZoomFactor); -``` - -## Quick Reference - -**Use Sync when:** -- Getting config/env vars -- Quick lookups, no I/O -- ⚠️ **CRITICAL**: Always set `event.returnValue` or browser hangs - -**Use Async when:** -- File operations -- Network requests -- Can fail or take time - -**Use Fire-and-forget when:** -- No return value needed -- Triggering actions - -**Electron API vs RPC:** -- Electron API: Native OS features, window management, Electron APIs -- RPC: Database, backend logic, remote servers - -## Checklist - -- [ ] Add to [`ElectronApi`](frontend/types/custom.d.ts:82) in [`custom.d.ts`](frontend/types/custom.d.ts) -- [ ] Include IPC channel name in comment -- [ ] Expose in [`preload.ts`](emain/preload.ts) -- [ ] Implement in [`emain-ipc.ts`](emain/emain-ipc.ts) -- [ ] Add no-op stub to [`preview-electron-api.ts`](frontend/preview/preview-electron-api.ts) -- [ ] IPC channel names match exactly -- [ ] **For sync**: Set `event.returnValue` (or browser hangs!) -- [ ] Test end-to-end diff --git a/.kilocode/skills/openspec-apply-change/SKILL.md b/.kilocode/skills/openspec-apply-change/SKILL.md deleted file mode 100644 index 70fbdb8569..0000000000 --- a/.kilocode/skills/openspec-apply-change/SKILL.md +++ /dev/null @@ -1,156 +0,0 @@ ---- -name: openspec-apply-change -description: Implement tasks from an OpenSpec change. Use when the user wants to start implementing, continue implementation, or work through tasks. -license: MIT -compatibility: Requires openspec CLI. -metadata: - author: openspec - version: "1.0" - generatedBy: "1.3.1" ---- - -Implement tasks from an OpenSpec change. - -**Input**: Optionally specify a change name. If omitted, check if it can be inferred from conversation context. If vague or ambiguous you MUST prompt for available changes. - -**Steps** - -1. **Select the change** - - If a name is provided, use it. Otherwise: - - Infer from conversation context if the user mentioned a change - - Auto-select if only one active change exists - - If ambiguous, run `openspec list --json` to get available changes and use the **AskUserQuestion tool** to let the user select - - Always announce: "Using change: " and how to override (e.g., `/opsx:apply `). - -2. **Check status to understand the schema** - ```bash - openspec status --change "" --json - ``` - Parse the JSON to understand: - - `schemaName`: The workflow being used (e.g., "spec-driven") - - Which artifact contains the tasks (typically "tasks" for spec-driven, check status for others) - -3. **Get apply instructions** - - ```bash - openspec instructions apply --change "" --json - ``` - - This returns: - - `contextFiles`: artifact ID -> array of concrete file paths (varies by schema - could be proposal/specs/design/tasks or spec/tests/implementation/docs) - - Progress (total, complete, remaining) - - Task list with status - - Dynamic instruction based on current state - - **Handle states:** - - If `state: "blocked"` (missing artifacts): show message, suggest using openspec-continue-change - - If `state: "all_done"`: congratulate, suggest archive - - Otherwise: proceed to implementation - -4. **Read context files** - - Read every file path listed under `contextFiles` from the apply instructions output. - The files depend on the schema being used: - - **spec-driven**: proposal, specs, design, tasks - - Other schemas: follow the contextFiles from CLI output - -5. **Show current progress** - - Display: - - Schema being used - - Progress: "N/M tasks complete" - - Remaining tasks overview - - Dynamic instruction from CLI - -6. **Implement tasks (loop until done or blocked)** - - For each pending task: - - Show which task is being worked on - - Make the code changes required - - Keep changes minimal and focused - - Mark task complete in the tasks file: `- [ ]` → `- [x]` - - Continue to next task - - **Pause if:** - - Task is unclear → ask for clarification - - Implementation reveals a design issue → suggest updating artifacts - - Error or blocker encountered → report and wait for guidance - - User interrupts - -7. **On completion or pause, show status** - - Display: - - Tasks completed this session - - Overall progress: "N/M tasks complete" - - If all done: suggest archive - - If paused: explain why and wait for guidance - -**Output During Implementation** - -``` -## Implementing: (schema: ) - -Working on task 3/7: -[...implementation happening...] -✓ Task complete - -Working on task 4/7: -[...implementation happening...] -✓ Task complete -``` - -**Output On Completion** - -``` -## Implementation Complete - -**Change:** -**Schema:** -**Progress:** 7/7 tasks complete ✓ - -### Completed This Session -- [x] Task 1 -- [x] Task 2 -... - -All tasks complete! Ready to archive this change. -``` - -**Output On Pause (Issue Encountered)** - -``` -## Implementation Paused - -**Change:** -**Schema:** -**Progress:** 4/7 tasks complete - -### Issue Encountered - - -**Options:** -1.
); diff --git a/frontend/app/view/term/term-model.ts b/frontend/app/view/term/term-model.ts index c9094849b2..4999628f54 100644 --- a/frontend/app/view/term/term-model.ts +++ b/frontend/app/view/term/term-model.ts @@ -766,7 +766,7 @@ export class TermViewModel implements ViewModel { return false; } const shellProcStatus = globalStore.get(this.shellProcStatus); - if ((shellProcStatus == "done" || shellProcStatus == "init") && keyutil.checkKeyPressed(waveEvent, "Enter")) { + if (shellProcStatus == "done" && keyutil.checkKeyPressed(waveEvent, "Enter")) { fireAndForget(() => this.forceRestartController()); return false; } diff --git a/frontend/app/view/term/term.tsx b/frontend/app/view/term/term.tsx index 4428889353..b19ceff00b 100644 --- a/frontend/app/view/term/term.tsx +++ b/frontend/app/view/term/term.tsx @@ -370,12 +370,17 @@ const TerminalView = ({ blockId, model }: ViewComponentProps) => let cancelled = false; fireAndForget(async () => { try { + if (cancelled) { + return; + } const info = await RpcApi.SessionInfoCommand(TabRpcClient, { daemonid: daemonId }); if (!cancelled && info.jobid) { await termWrap.attachToDaemon(info.jobid); } } catch (e) { - console.log("error attaching terminal to session daemon", daemonId, e); + if (!cancelled) { + console.log("error attaching terminal to session daemon", daemonId, e); + } } }); return () => { diff --git a/pkg/blockcontroller/blockcontroller.go b/pkg/blockcontroller/blockcontroller.go index e2bd9e8b89..5a4af19f08 100644 --- a/pkg/blockcontroller/blockcontroller.go +++ b/pkg/blockcontroller/blockcontroller.go @@ -9,6 +9,7 @@ import ( "fmt" "io/fs" "log" + "runtime" "strings" "sync" "time" @@ -18,6 +19,7 @@ import ( "github.com/wavetermdev/waveterm/pkg/filestore" "github.com/wavetermdev/waveterm/pkg/remote" "github.com/wavetermdev/waveterm/pkg/remote/conncontroller" + "github.com/wavetermdev/waveterm/pkg/sessiondaemon" "github.com/wavetermdev/waveterm/pkg/util/ds" "github.com/wavetermdev/waveterm/pkg/util/shellutil" "github.com/wavetermdev/waveterm/pkg/wavebase" @@ -171,8 +173,16 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts if existing != nil { existingConnName := existing.GetConnName() if existingConnName != connName { + // For non-local connections, check readiness before switching + if !conncontroller.IsLocalConnName(connName) && !conncontroller.IsWslConnName(connName) && existingConnName == "" { + err = CheckConnStatus(blockId) + if err != nil { + log.Printf("not stopping blockcontroller %s due to conn change (from %q to %q): new connection not ready\n", blockId, existingConnName, connName) + return fmt.Errorf("cannot start shellproc: %w", err) + } + } log.Printf("stopping blockcontroller %s due to conn change (from %q to %q)\n", blockId, existingConnName, connName) - DestroyBlockController(blockId) + stopBlockController(blockId) time.Sleep(100 * time.Millisecond) existing = nil } @@ -191,6 +201,10 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts // Auto-create anonymous daemon for SSH blocks without daemonid if daemonId == "" && controllerName == BlockController_Shell && !conncontroller.IsLocalConnName(connName) && !conncontroller.IsWslConnName(connName) { + err = CheckConnStatus(blockId) + if err != nil { + return fmt.Errorf("cannot start shellproc: %w", err) + } newDaemonId, err := autoCreateSessionDaemon(ctx, blockId, blockData.Meta, connName, rtOpts) if err != nil { return fmt.Errorf("auto-create session daemon: %w", err) @@ -198,11 +212,29 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts daemonId = newDaemonId } + // For local/WSL connections, session daemon is not applicable — clear and fall through to ShellController + if daemonId != "" && controllerName == BlockController_Shell && (conncontroller.IsLocalConnName(connName) || conncontroller.IsWslConnName(connName)) { + if existing != nil { + DestroyBlockController(blockId) + time.Sleep(100 * time.Millisecond) + existing = nil + } + _ = wstore.DBUpdateFn(ctx, blockId, func(block *waveobj.Block) { + delete(block.Meta, waveobj.MetaKey_SessionDaemonId) + }) + daemonId = "" + } + // Validate existing daemon: if stale (done/not found), clear and auto-create if daemonId != "" && controllerName == BlockController_Shell { dbDaemon, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, daemonId) if err != nil || dbDaemon.Status == "done" { log.Printf("[sessiondaemon] stale daemon=%s block=%s status=%s err=%v, clearing and recreating", daemonId, blockId, func() string { if dbDaemon != nil { return dbDaemon.Status }; return "db_load_error" }(), err) + if existing != nil { + DestroyBlockController(blockId) + time.Sleep(100 * time.Millisecond) + existing = nil + } _ = wstore.DBUpdateFn(ctx, blockId, func(block *waveobj.Block) { delete(block.Meta, waveobj.MetaKey_SessionDaemonId) }) @@ -225,7 +257,7 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts needsReplace = true } case *SessionDaemonController: - if daemonId == "" { + if daemonId == "" || conncontroller.IsLocalConnName(connName) || conncontroller.IsWslConnName(connName) { needsReplace = true } case *TsunamiController: @@ -236,7 +268,7 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts if needsReplace { log.Printf("stopping blockcontroller %s due to controller type change\n", blockId) - DestroyBlockController(blockId) + stopBlockController(blockId) time.Sleep(100 * time.Millisecond) existing = nil } @@ -244,9 +276,12 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts // Force restart if requested if force && existing != nil { - DestroyBlockController(blockId) - time.Sleep(100 * time.Millisecond) - existing = nil + status := existing.GetRuntimeStatus() + if status.ShellProcStatus != Status_Running { + stopBlockController(blockId) + time.Sleep(100 * time.Millisecond) + existing = nil + } } // Destroy done controllers before restarting @@ -254,7 +289,7 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts status := existing.GetRuntimeStatus() if status.ShellProcStatus == Status_Done { log.Printf("destroying blockcontroller %s with done status before restart\n", blockId) - DestroyBlockController(blockId) + stopBlockController(blockId) time.Sleep(100 * time.Millisecond) existing = nil } @@ -271,6 +306,14 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts sdc.DaemonId = daemonId controller = sdc registerController(blockId, controller) + // Ensure the daemon is in memory before attaching the block. + // On restart, the daemon exists in DB but not in the in-memory + // manager – AttachBlock silently no-ops if not found. + dbDaemon, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, daemonId) + if err == nil { + sessiondaemon.Manager.GetOrCreate(ctx, dbDaemon) + } + sessiondaemon.Manager.AttachBlock(ctx, daemonId, blockId) case controllerName == BlockController_Shell || controllerName == BlockController_Cmd: controller = MakeShellController(tabId, blockId, controllerName, connName) @@ -316,13 +359,23 @@ func GetBlockControllerRuntimeStatus(blockId string) *BlockControllerRuntimeStat return controller.GetRuntimeStatus() } -func DestroyBlockController(blockId string) { +func stopBlockController(blockId string) { controller := getController(blockId) if controller == nil { return } + stackBuf := make([]byte, 4096) + stackLen := runtime.Stack(stackBuf, false) + log.Printf("[sessiondaemon] stopBlockController: block=%s stack:\n%s", blockId, string(stackBuf[:stackLen])) controller.Stop(true, Status_Done, true) wstore.DeleteRTInfo(waveobj.MakeORef(waveobj.OType_Block, blockId)) +} + +func DestroyBlockController(blockId string) { + stackBuf := make([]byte, 4096) + stackLen := runtime.Stack(stackBuf, false) + log.Printf("[sessiondaemon] DestroyBlockController: block=%s stack:\n%s", blockId, string(stackBuf[:stackLen])) + stopBlockController(blockId) deleteController(blockId) } diff --git a/pkg/blockcontroller/sessiondaemoncontroller.go b/pkg/blockcontroller/sessiondaemoncontroller.go index 0d4b84a77a..ce95684a8c 100644 --- a/pkg/blockcontroller/sessiondaemoncontroller.go +++ b/pkg/blockcontroller/sessiondaemoncontroller.go @@ -16,6 +16,7 @@ import ( "github.com/wavetermdev/waveterm/pkg/sessiondaemon" "github.com/wavetermdev/waveterm/pkg/shellexec" "github.com/wavetermdev/waveterm/pkg/util/shellutil" + "github.com/wavetermdev/waveterm/pkg/utilds" "github.com/wavetermdev/waveterm/pkg/wavebase" "github.com/wavetermdev/waveterm/pkg/waveobj" "github.com/wavetermdev/waveterm/pkg/wps" @@ -34,7 +35,7 @@ type SessionDaemonController struct { TabId string InputSessionId string inputSeqNum int - versionTs int64 + versionTs utilds.VersionTs } func MakeSessionDaemonController(tabId string, blockId string, connName string) *SessionDaemonController { @@ -44,7 +45,6 @@ func MakeSessionDaemonController(tabId string, blockId string, connName string) ConnName: connName, TabId: tabId, InputSessionId: uuid.New().String(), - versionTs: 1, } } @@ -198,9 +198,7 @@ func (sdc *SessionDaemonController) Stop(graceful bool, newStatus string, destro daemon.Stop(ctx) } sessiondaemon.Manager.Remove(sdc.DaemonId) - wstore.DBUpdateFn(ctx, sdc.DaemonId, func(sd *waveobj.SessionDaemon) { - sd.Status = "done" - }) + wstore.DBDelete(ctx, waveobj.OType_SessionDaemon, sdc.DaemonId) } else { log.Printf("[sessiondaemon] stop: daemon=%s remaining blocks=%d anonymous=%v", sdc.DaemonId, remaining, dbDaemon.IsAnonymous) } @@ -222,7 +220,7 @@ func (sdc *SessionDaemonController) GetRuntimeStatus() *BlockControllerRuntimeSt sdc.WithLock(func() { rtn.BlockId = sdc.BlockId rtn.ShellProcConnName = sdc.ConnName - rtn.Version = sdc.versionTs + rtn.Version = sdc.versionTs.GetVersionTs() daemon := sessiondaemon.Manager.Get(sdc.DaemonId) if daemon != nil { if daemon.JobId == "" { @@ -239,9 +237,7 @@ func (sdc *SessionDaemonController) GetRuntimeStatus() *BlockControllerRuntimeSt } func (sdc *SessionDaemonController) incrementVersion() { - sdc.Lock.Lock() - defer sdc.Lock.Unlock() - sdc.versionTs++ + sdc.versionTs.GetVersionTs() } func (sdc *SessionDaemonController) GetConnName() string { diff --git a/pkg/jobcontroller/jobcontroller.go b/pkg/jobcontroller/jobcontroller.go index 0e415d1105..59eca080eb 100644 --- a/pkg/jobcontroller/jobcontroller.go +++ b/pkg/jobcontroller/jobcontroller.go @@ -760,6 +760,17 @@ func StartJob(ctx context.Context, params StartJobParams) (string, error) { }, }) + routeId := wshutil.MakeJobRouteId(jobId) + waitCtx, cancelFn := context.WithTimeout(ctx, 5*time.Second) + err = wshutil.DefaultRouter.WaitForRegister(waitCtx, routeId) + cancelFn() + if err != nil { + log.Printf("[job:%s] warning: route not established after start: %v", jobId, err) + } else { + SetJobConnStatus(jobId, JobConnStatus_Connected) + log.Printf("[job:%s] route established, job connected", jobId) + } + go func() { defer func() { panichandler.PanicHandler("jobcontroller:runOutputLoop", recover()) diff --git a/pkg/sessiondaemon/sessiondaemon.go b/pkg/sessiondaemon/sessiondaemon.go index 6c7e7beed4..845da176dd 100644 --- a/pkg/sessiondaemon/sessiondaemon.go +++ b/pkg/sessiondaemon/sessiondaemon.go @@ -5,6 +5,7 @@ import ( "encoding/base64" "fmt" "log" + "runtime" "sync" "time" @@ -16,7 +17,7 @@ import ( ) const ( - DefaultAnonymousIdleTimeout = 3600 // 1h + DefaultAnonymousIdleTimeout = 600 // 10min DefaultNamedIdleTimeout = 86400 // 24h IdleCheckInterval = 60 // 检查间隔(秒) ) @@ -169,13 +170,16 @@ func (sd *SessionDaemonManager) AttachBlock(ctx context.Context, daemonId string sd.Lock.Unlock() defer daemon.Lock.Unlock() daemon.Blocks[blockId] = true + // Reset idle countdown: block attached, daemon is no longer idle. wstore.DBUpdateFn(ctx, daemonId, func(dbD *waveobj.SessionDaemon) { dbD.IdleSince = 0 }) } func (sd *SessionDaemonManager) DetachBlock(ctx context.Context, daemonId string, blockId string) { - log.Printf("[sessiondaemon] DetachBlock: daemon=%s block=%s", daemonId, blockId) + stackBuf := make([]byte, 4096) + stackLen := runtime.Stack(stackBuf, false) + log.Printf("[sessiondaemon] DetachBlock: daemon=%s block=%s stack:\n%s", daemonId, blockId, string(stackBuf[:stackLen])) sd.Lock.Lock() daemon, ok := sd.Daemons[daemonId] if !ok { @@ -187,8 +191,11 @@ func (sd *SessionDaemonManager) DetachBlock(ctx context.Context, daemonId string defer daemon.Lock.Unlock() delete(daemon.Blocks, blockId) if len(daemon.Blocks) == 0 { + // Start idle countdown (IdleTimeout in seconds). + // Survives app restart: if daemon was idle before shutdown, + // it resumes counting down from where it left off. wstore.DBUpdateFn(ctx, daemonId, func(dbD *waveobj.SessionDaemon) { - dbD.IdleSince = time.Now().UnixMilli() + dbD.IdleSince = dbD.IdleTimeout }) } } @@ -258,6 +265,35 @@ func (sd *SessionDaemonManager) StartIdleReaper(ctx context.Context) { }() } +// cleanupDeadBlocks removes block IDs from the daemon's in-memory +// Blocks map that no longer exist in the database. This handles the +// case where a block was deleted without calling DetachBlock. +func (sd *SessionDaemonManager) cleanupDeadBlocks(ctx context.Context, daemonId string, memDaemon *SessionDaemon) { + memDaemon.Lock.Lock() + var deadBlocks []string + for blockId := range memDaemon.Blocks { + _, err := wstore.DBMustGet[*waveobj.Block](ctx, blockId) + if err != nil { + deadBlocks = append(deadBlocks, blockId) + } + } + for _, blockId := range deadBlocks { + delete(memDaemon.Blocks, blockId) + } + memDaemon.Lock.Unlock() + + if len(deadBlocks) > 0 { + log.Printf("[sessiondaemon] cleanupDeadBlocks: daemon=%s removed %d dead blocks: %v", daemonId, len(deadBlocks), deadBlocks) + remaining := len(memDaemon.Blocks) + if remaining == 0 { + // All blocks are dead, start idle countdown. + wstore.DBUpdateFn(ctx, daemonId, func(dbD *waveobj.SessionDaemon) { + dbD.IdleSince = dbD.IdleTimeout + }) + } + } +} + func (sd *SessionDaemonManager) reapIdleDaemons(ctx context.Context) { allDaemons, err := wstore.DBGetAllObjsByType[*waveobj.SessionDaemon](ctx, waveobj.OType_SessionDaemon) if err != nil { @@ -274,22 +310,36 @@ func (sd *SessionDaemonManager) reapIdleDaemons(ctx context.Context) { sd.Lock.Unlock() if hasMem && memDaemon.HasAttachedBlocks() { + // Verify all attached blocks are still alive. If a block + // was deleted without proper detach, clean it up here + // to prevent the daemon from holding onto a dead block forever. + sd.cleanupDeadBlocks(ctx, dbDaemon.OID, memDaemon) + if memDaemon.HasAttachedBlocks() { + continue + } + } + + if dbDaemon.IdleTimeout <= 0 { continue } - if dbDaemon.IdleTimeout <= 0 || dbDaemon.IdleSince == 0 { + // IdleSince is a countdown in seconds (set to IdleTimeout when idle starts). + // Decrement inside the DB update closure to avoid race with concurrent AttachBlock + // which resets IdleSince to 0. + var newRemaining int64 + wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbD *waveobj.SessionDaemon) { + dbD.IdleSince -= IdleCheckInterval + newRemaining = dbD.IdleSince + }) + if newRemaining > 0 { continue } - if time.Since(time.UnixMilli(dbDaemon.IdleSince)) > time.Duration(dbDaemon.IdleTimeout)*time.Second { - log.Printf("[sessiondaemon:%s] idle timeout reached, terminating", dbDaemon.OID) - if hasMem { - memDaemon.Stop(ctx) - sd.Remove(dbDaemon.OID) - } - wstore.DBUpdateFn(ctx, dbDaemon.OID, func(sdDb *waveobj.SessionDaemon) { - sdDb.Status = "done" - }) + log.Printf("[sessiondaemon:%s] idle timeout reached, terminating", dbDaemon.OID) + if hasMem { + memDaemon.Stop(ctx) + sd.Remove(dbDaemon.OID) } + wstore.DBDelete(ctx, waveobj.OType_SessionDaemon, dbDaemon.OID) } } diff --git a/pkg/wshrpc/wshserver/wshserver.go b/pkg/wshrpc/wshserver/wshserver.go index 4333661c18..88553eed4f 100644 --- a/pkg/wshrpc/wshserver/wshserver.go +++ b/pkg/wshrpc/wshserver/wshserver.go @@ -309,6 +309,10 @@ func (ws *WshServer) CreateSubBlockCommand(ctx context.Context, data wshrpc.Comm } func (ws *WshServer) ControllerDestroyCommand(ctx context.Context, blockId string) error { + status := blockcontroller.GetBlockControllerRuntimeStatus(blockId) + if status != nil && status.ShellProcStatus == blockcontroller.Status_Running { + return nil + } blockcontroller.DestroyBlockController(blockId) return nil } @@ -1607,7 +1611,7 @@ func (ws *WshServer) SessionCreateCommand(ctx context.Context, data wshrpc.Comma } func (ws *WshServer) SessionDeleteCommand(ctx context.Context, data wshrpc.CommandSessionDeleteData) error { - _, err := wstore.DBGet[*waveobj.SessionDaemon](ctx, data.DaemonId) + _, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, data.DaemonId) if err != nil { return fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) } @@ -1618,11 +1622,9 @@ func (ws *WshServer) SessionDeleteCommand(ctx context.Context, data wshrpc.Comma sessiondaemon.Manager.Remove(data.DaemonId) } - err = wstore.DBUpdateFn(ctx, data.DaemonId, func(sd *waveobj.SessionDaemon) { - sd.Status = "done" - }) + err = wstore.DBDelete(ctx, waveobj.OType_SessionDaemon, data.DaemonId) if err != nil { - return fmt.Errorf("update session daemon status: %w", err) + return fmt.Errorf("delete session daemon: %w", err) } return nil } @@ -1633,7 +1635,7 @@ func (ws *WshServer) SessionListCommand(ctx context.Context, data wshrpc.Command return nil, fmt.Errorf("list session daemons: %w", err) } - var rtn []wshrpc.SessionInfoRtnData + rtn := make([]wshrpc.SessionInfoRtnData, 0) for _, dbDaemon := range allDaemons { if dbDaemon.IsAnonymous && !data.ShowAll { continue @@ -1651,7 +1653,7 @@ func (ws *WshServer) SessionListCommand(ctx context.Context, data wshrpc.Command } func (ws *WshServer) SessionAttachCommand(ctx context.Context, data wshrpc.CommandSessionAttachData) error { - dbDaemon, err := wstore.DBGet[*waveobj.SessionDaemon](ctx, data.DaemonId) + dbDaemon, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, data.DaemonId) if err != nil { return fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) } @@ -1675,7 +1677,7 @@ func (ws *WshServer) SessionAttachCommand(ctx context.Context, data wshrpc.Comma } func (ws *WshServer) SessionDetachCommand(ctx context.Context, data wshrpc.CommandSessionDetachData) error { - _, err := wstore.DBGet[*waveobj.SessionDaemon](ctx, data.DaemonId) + _, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, data.DaemonId) if err != nil { return fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) } @@ -1701,7 +1703,7 @@ func (ws *WshServer) SessionDetachCommand(ctx context.Context, data wshrpc.Comma } func (ws *WshServer) SessionInfoCommand(ctx context.Context, data wshrpc.CommandSessionInfoData) (*wshrpc.SessionInfoRtnData, error) { - dbDaemon, err := wstore.DBGet[*waveobj.SessionDaemon](ctx, data.DaemonId) + dbDaemon, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, data.DaemonId) if err != nil { return nil, fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) } @@ -1709,7 +1711,7 @@ func (ws *WshServer) SessionInfoCommand(ctx context.Context, data wshrpc.Command } func (ws *WshServer) SessionTagCommand(ctx context.Context, data wshrpc.CommandSessionTagData) error { - _, err := wstore.DBGet[*waveobj.SessionDaemon](ctx, data.DaemonId) + _, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, data.DaemonId) if err != nil { return fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) } @@ -1732,6 +1734,9 @@ func (ws *WshServer) SessionTagCommand(ctx context.Context, data wshrpc.CommandS } func buildSessionInfoRtnData(ctx context.Context, dbDaemon *waveobj.SessionDaemon) (*wshrpc.SessionInfoRtnData, error) { + if dbDaemon == nil { + return nil, fmt.Errorf("session daemon is nil") + } blocks := sessiondaemon.Manager.GetBlocksForDaemon(dbDaemon.OID) return &wshrpc.SessionInfoRtnData{ DaemonId: dbDaemon.OID, diff --git a/pkg/wstore/wstore_dbsessionmigration.go b/pkg/wstore/wstore_dbsessionmigration.go index bf32abd88a..85c6a78a96 100644 --- a/pkg/wstore/wstore_dbsessionmigration.go +++ b/pkg/wstore/wstore_dbsessionmigration.go @@ -49,7 +49,7 @@ func runSessionDaemonMigration(ctx context.Context) error { IsAnonymous: true, Status: "running", CreatedAt: time.Now().UnixMilli(), - IdleTimeout: 3600, + IdleTimeout: 60, } err = DBInsert(ctx, dbDaemon) From 7ba5ba5324b6b3b2c4311163424fc38419d9bd9b Mon Sep 17 00:00:00 2001 From: lyx-tec Date: Sun, 14 Jun 2026 09:26:25 +0800 Subject: [PATCH 18/36] fix: session switching output duplication, stale job recovery, and session list UX Backend fixes: - ResyncController: detect stale session daemon job (dead but JobId still set) and clear it to allow restart on next resync (blockcontroller.go) - doReconnectJob: on JobManagerGone, also clear session daemon JobId in DB so daemons don't hold stale references across restart (jobcontroller.go) - Add jobcontroller import to blockcontroller Frontend fixes: - attachToDaemon race: track subscription with _mainFileSub, unsubscribe old before creating new. Add _attachSeq to prevent interleaved concurrent calls from both subscribing to the same file subject (termwrap.ts) - TermResyncHandler: update lastConnStatus even when hasResized=false to prevent stale initial state from blocking resync on conn changes (term.tsx) - Session list: show daemon ID (first 8 chars, monospace) instead of SSH connection URL and CWD (session-daemon-indicator.tsx) --- .../app/block/session-daemon-indicator.tsx | 36 +++++++++++++------ frontend/app/view/term/term.tsx | 1 + frontend/app/view/term/termwrap.ts | 21 +++++++++-- frontend/types/gotypes.d.ts | 2 ++ pkg/blockcontroller/blockcontroller.go | 30 +++++++++++++++- .../sessiondaemoncontroller.go | 25 ++++--------- pkg/jobcontroller/jobcontroller.go | 12 +++++++ pkg/wshrpc/wshrpctypes.go | 6 ++-- pkg/wshrpc/wshserver/wshserver.go | 19 ++++++++++ 9 files changed, 118 insertions(+), 34 deletions(-) diff --git a/frontend/app/block/session-daemon-indicator.tsx b/frontend/app/block/session-daemon-indicator.tsx index 98acc842a8..48ecf7320c 100644 --- a/frontend/app/block/session-daemon-indicator.tsx +++ b/frontend/app/block/session-daemon-indicator.tsx @@ -10,12 +10,27 @@ import * as jotai from "jotai"; import { useEffect, useRef, useState } from "react"; import { BlockEnv } from "./blockenv"; +function formatCreatedTime(ms: number): string { + const d = new Date(ms); + const now = new Date(); + const diffMs = now.getTime() - d.getTime(); + const diffMin = Math.floor(diffMs / 60000); + if (diffMin < 1) return "just now"; + if (diffMin < 60) return `${diffMin}m ago`; + const diffHr = Math.floor(diffMin / 60); + if (diffHr < 24) return `${diffHr}h ago`; + const diffDay = Math.floor(diffHr / 24); + if (diffDay < 7) return `${diffDay}d ago`; + return d.toLocaleDateString(undefined, { month: "short", day: "numeric", year: "numeric" }); +} + interface SessionInfo { daemonid: string; name: string; connection: string; status: string; isanonymous: boolean; + createdat?: number; blocks?: string[]; jobid?: string; } @@ -125,10 +140,7 @@ export function SessionDaemonIndicator({ blockId, useTermHeader }: SessionDaemon if (targetDaemonId === daemonId) return; fireAndForget(async () => { try { - if (daemonId) { - await RpcApi.SessionDetachCommand(TabRpcClient, { daemonid: daemonId, blockid: blockId }); - } - await RpcApi.SessionAttachCommand(TabRpcClient, { daemonid: targetDaemonId, blockid: blockId }); + await RpcApi.SessionAttachCommand(TabRpcClient, { daemonid: targetDaemonId, blockid: blockId, currentdaemonid: daemonId ?? undefined }); setShowPopup(false); } catch (e) { console.log("error switching session:", e); @@ -270,7 +282,7 @@ export function SessionDaemonIndicator({ blockId, useTermHeader }: SessionDaemon lineHeight: "18px", }} > - {s.name || s.connection || "Unnamed session"} + {s.name || "Unnamed session"}
- {s.name ? s.connection : s.daemonid.slice(0, 8)} + {s.daemonid.slice(0, 8)}
@@ -293,6 +306,11 @@ export function SessionDaemonIndicator({ blockId, useTermHeader }: SessionDaemon }} > + + {formatCreatedTime(s.createdat)} + {canClose ? ( { @@ -323,10 +341,8 @@ export function SessionDaemonIndicator({ blockId, useTermHeader }: SessionDaemon Close ) : ( - - {isActive ? "Active" : `${blockCount} block${blockCount === 1 ? "" : "s"}`} + + {isActive ? "active" : `${blockCount} block${blockCount === 1 ? "" : "s"}`} )} diff --git a/frontend/app/view/term/term.tsx b/frontend/app/view/term/term.tsx index b19ceff00b..55efe32e4a 100644 --- a/frontend/app/view/term/term.tsx +++ b/frontend/app/view/term/term.tsx @@ -52,6 +52,7 @@ const TermResyncHandler = React.memo(({ blockId, model }: TerminalViewProps) => React.useEffect(() => { if (!model.termRef.current?.hasResized) { console.log("[TermResyncHandler] hasResized=false, skipping resync", blockId); + setLastConnStatus(connStatus); return; } const isConnected = connStatus?.status == "connected"; diff --git a/frontend/app/view/term/termwrap.ts b/frontend/app/view/term/termwrap.ts index 76ab3030eb..d0588ab924 100644 --- a/frontend/app/view/term/termwrap.ts +++ b/frontend/app/view/term/termwrap.ts @@ -24,6 +24,7 @@ import { SearchAddon } from "@xterm/addon-search"; import { SerializeAddon } from "@xterm/addon-serialize"; import { WebLinksAddon } from "@xterm/addon-web-links"; import { WebglAddon } from "@xterm/addon-webgl"; +import { Subscription } from "rxjs"; import * as TermTypes from "@xterm/xterm"; import { Terminal } from "@xterm/xterm"; import debug from "debug"; @@ -86,6 +87,8 @@ export class TermWrap { searchAddon: SearchAddon; serializeAddon: SerializeAddon; mainFileSubject: SubjectWithRef; + _mainFileSub: Subscription | null = null; + _attachSeq: number = 0; loaded: boolean; heldData: Uint8Array[]; handleResize_debounced: () => void; @@ -331,9 +334,15 @@ export class TermWrap { } async attachToDaemon(jobId: string): Promise { + this._attachSeq++; + const mySeq = this._attachSeq; if (this.zoneId === jobId) { return; } + if (this._mainFileSub) { + this._mainFileSub.unsubscribe(); + this._mainFileSub = null; + } if (this.mainFileSubject) { this.mainFileSubject.release(); } @@ -342,14 +351,20 @@ export class TermWrap { this.heldData = []; this.zoneId = jobId; this.mainFileSubject = getFileSubject(this.getZoneId(), TermFileName); - this.mainFileSubject.subscribe(this.handleNewFileSubjectData.bind(this)); + this._mainFileSub = this.mainFileSubject.subscribe(this.handleNewFileSubjectData.bind(this)); await this.loadInitialTerminalData(); } async detachFromDaemon(): Promise { + this._attachSeq++; + const mySeq = this._attachSeq; if (this.zoneId === this.blockId) { return; } + if (this._mainFileSub) { + this._mainFileSub.unsubscribe(); + this._mainFileSub = null; + } if (this.mainFileSubject) { this.mainFileSubject.release(); } @@ -358,7 +373,7 @@ export class TermWrap { this.heldData = []; this.zoneId = this.blockId; this.mainFileSubject = getFileSubject(this.getZoneId(), TermFileName); - this.mainFileSubject.subscribe(this.handleNewFileSubjectData.bind(this)); + this._mainFileSub = this.mainFileSubject.subscribe(this.handleNewFileSubjectData.bind(this)); await this.loadInitialTerminalData(); } @@ -443,7 +458,7 @@ export class TermWrap { } this.mainFileSubject = getFileSubject(this.getZoneId(), TermFileName); - this.mainFileSubject.subscribe(this.handleNewFileSubjectData.bind(this)); + this._mainFileSub = this.mainFileSubject.subscribe(this.handleNewFileSubjectData.bind(this)); console.log("[termwrap] initTerminal: zoneId=", this.getZoneId(), "blockId=", this.blockId); try { diff --git a/frontend/types/gotypes.d.ts b/frontend/types/gotypes.d.ts index da0bb147b5..e6e5c5fa40 100644 --- a/frontend/types/gotypes.d.ts +++ b/frontend/types/gotypes.d.ts @@ -639,6 +639,7 @@ declare global { type CommandSessionAttachData = { daemonid: string; blockid: string; + currentdaemonid?: string; }; // wshrpc.CommandSessionCreateData @@ -1436,6 +1437,7 @@ declare global { jobid?: string; isanonymous: boolean; status: string; + cwd?: string; createdat: number; idletimeout: number; idlesince?: number; diff --git a/pkg/blockcontroller/blockcontroller.go b/pkg/blockcontroller/blockcontroller.go index 5a4af19f08..d2be11a06d 100644 --- a/pkg/blockcontroller/blockcontroller.go +++ b/pkg/blockcontroller/blockcontroller.go @@ -17,6 +17,7 @@ import ( "github.com/google/uuid" "github.com/wavetermdev/waveterm/pkg/blocklogger" "github.com/wavetermdev/waveterm/pkg/filestore" + "github.com/wavetermdev/waveterm/pkg/jobcontroller" "github.com/wavetermdev/waveterm/pkg/remote" "github.com/wavetermdev/waveterm/pkg/remote/conncontroller" "github.com/wavetermdev/waveterm/pkg/sessiondaemon" @@ -257,8 +258,11 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts needsReplace = true } case *SessionDaemonController: + sdc := existing.(*SessionDaemonController) if daemonId == "" || conncontroller.IsLocalConnName(connName) || conncontroller.IsWslConnName(connName) { needsReplace = true + } else if daemonId != sdc.DaemonId { + needsReplace = true } case *TsunamiController: if controllerName != BlockController_Tsunami { @@ -330,7 +334,31 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts // Check if we need to start/restart status := controller.GetRuntimeStatus() - if status.ShellProcStatus == Status_Init { + if status.ShellProcStatus == Status_Running { + // For SessionDaemonController, verify the job is still alive. + // The remote job manager may have died, leaving the daemon with a stale JobId. + // If so, clear the JobId so Start() runs again on the next ResyncController call. + if sdc, ok := controller.(*SessionDaemonController); ok { + if daemon := sessiondaemon.Manager.Get(sdc.DaemonId); daemon != nil && daemon.JobId != "" { + jobStatus, jErr := jobcontroller.GetJobManagerStatus(ctx, daemon.JobId) + if jErr != nil || jobStatus != jobcontroller.JobManagerStatus_Running { + log.Printf("[sessiondaemon] resync: job %s not running (status=%s err=%v), recreating controller", daemon.JobId, jobStatus, jErr) + daemon.Lock.Lock() + daemon.JobId = "" + daemon.Lock.Unlock() + wstore.DBUpdateFn(ctx, sdc.DaemonId, func(dbSd *waveobj.SessionDaemon) { + dbSd.JobId = "" + dbSd.Status = "init" + }) + stopBlockController(blockId) + time.Sleep(100 * time.Millisecond) + existing = nil + // Fall through to controller recreation + Start below + } + } + } + } + if status.ShellProcStatus == Status_Init || existing == nil { // For shell/cmd, check connection status first (for non-local connections) if controllerName == BlockController_Shell || controllerName == BlockController_Cmd { if !conncontroller.IsLocalConnName(connName) { diff --git a/pkg/blockcontroller/sessiondaemoncontroller.go b/pkg/blockcontroller/sessiondaemoncontroller.go index ce95684a8c..cd6ad8cc43 100644 --- a/pkg/blockcontroller/sessiondaemoncontroller.go +++ b/pkg/blockcontroller/sessiondaemoncontroller.go @@ -84,13 +84,13 @@ func (sdc *SessionDaemonController) Start(ctx context.Context, blockMeta waveobj log.Printf("[sessiondaemon] start: reconnecting to existing job %s", dbDaemon.JobId) err = jobcontroller.ReconnectJob(ctx, dbDaemon.JobId, rtOpts) if err != nil { - log.Printf("[sessiondaemon] start: reconnect failed job=%s err=%v", dbDaemon.JobId, err) - return fmt.Errorf("error reconnecting to existing job %q: %w", dbDaemon.JobId, err) + log.Printf("[sessiondaemon] start: reconnect failed job=%s err=%v, starting new job", dbDaemon.JobId, err) + } else { + log.Printf("[sessiondaemon] start: reconnect ok job=%s", dbDaemon.JobId) + sdc.incrementVersion() + sdc.sendControllerStatus() + return nil } - log.Printf("[sessiondaemon] start: reconnect ok job=%s", dbDaemon.JobId) - sdc.incrementVersion() - sdc.sendControllerStatus() - return nil } } @@ -190,18 +190,7 @@ func (sdc *SessionDaemonController) Stop(graceful bool, newStatus string, destro log.Printf("[sessiondaemon] stop: db lookup failed daemon=%s err=%v", sdc.DaemonId, err) return } - remaining := len(sessiondaemon.Manager.GetBlocksForDaemon(sdc.DaemonId)) - if dbDaemon.IsAnonymous && remaining == 0 { - log.Printf("[sessiondaemon] stop: stopping anonymous daemon %s (no blocks remaining)", sdc.DaemonId) - daemon := sessiondaemon.Manager.Get(sdc.DaemonId) - if daemon != nil { - daemon.Stop(ctx) - } - sessiondaemon.Manager.Remove(sdc.DaemonId) - wstore.DBDelete(ctx, waveobj.OType_SessionDaemon, sdc.DaemonId) - } else { - log.Printf("[sessiondaemon] stop: daemon=%s remaining blocks=%d anonymous=%v", sdc.DaemonId, remaining, dbDaemon.IsAnonymous) - } + log.Printf("[sessiondaemon] stop: daemon=%s remaining blocks=%d anonymous=%v", sdc.DaemonId, len(sessiondaemon.Manager.GetBlocksForDaemon(sdc.DaemonId)), dbDaemon.IsAnonymous) } func (sdc *SessionDaemonController) SendInput(inputUnion *BlockInputUnion) error { diff --git a/pkg/jobcontroller/jobcontroller.go b/pkg/jobcontroller/jobcontroller.go index 59eca080eb..4ded5b7e6b 100644 --- a/pkg/jobcontroller/jobcontroller.go +++ b/pkg/jobcontroller/jobcontroller.go @@ -1140,6 +1140,18 @@ func doReconnectJob(ctx context.Context, jobId string, rtOpts *waveobj.RuntimeOp } else { sendBlockJobStatusEventByJob(ctx, updatedJob) } + // Clear session daemon references to this job so daemons can be restarted + sessionDaemons, qErr := wstore.DBGetAllObjsByType[*waveobj.SessionDaemon](ctx, waveobj.OType_SessionDaemon) + if qErr == nil { + for _, sd := range sessionDaemons { + if sd.JobId == jobId { + wstore.DBUpdateFn(ctx, sd.OID, func(dbSd *waveobj.SessionDaemon) { + dbSd.JobId = "" + dbSd.Status = "init" + }) + } + } + } telemetry.GoRecordTEventWrap(&telemetrydata.TEvent{ Event: "job:done", Props: telemetrydata.TEventProps{ diff --git a/pkg/wshrpc/wshrpctypes.go b/pkg/wshrpc/wshrpctypes.go index f8a94e928b..eeed88e6ef 100644 --- a/pkg/wshrpc/wshrpctypes.go +++ b/pkg/wshrpc/wshrpctypes.go @@ -951,8 +951,9 @@ type CommandSessionListData struct { } type CommandSessionAttachData struct { - DaemonId string `json:"daemonid"` - BlockId string `json:"blockid"` + DaemonId string `json:"daemonid"` + BlockId string `json:"blockid"` + CurrentDaemonId string `json:"currentdaemonid,omitempty"` } type CommandSessionDetachData struct { @@ -976,6 +977,7 @@ type SessionInfoRtnData struct { JobId string `json:"jobid,omitempty"` IsAnonymous bool `json:"isanonymous"` Status string `json:"status"` + Cwd string `json:"cwd,omitempty"` CreatedAt int64 `json:"createdat"` IdleTimeout int64 `json:"idletimeout"` IdleSince int64 `json:"idlesince,omitempty"` diff --git a/pkg/wshrpc/wshserver/wshserver.go b/pkg/wshrpc/wshserver/wshserver.go index 88553eed4f..19323d262f 100644 --- a/pkg/wshrpc/wshserver/wshserver.go +++ b/pkg/wshrpc/wshserver/wshserver.go @@ -1653,8 +1653,19 @@ func (ws *WshServer) SessionListCommand(ctx context.Context, data wshrpc.Command } func (ws *WshServer) SessionAttachCommand(ctx context.Context, data wshrpc.CommandSessionAttachData) error { + if data.CurrentDaemonId != "" && data.CurrentDaemonId == data.DaemonId { + return nil + } + + if data.CurrentDaemonId != "" { + sessiondaemon.Manager.DetachBlock(ctx, data.CurrentDaemonId, data.BlockId) + } + dbDaemon, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, data.DaemonId) if err != nil { + if data.CurrentDaemonId != "" { + sessiondaemon.Manager.AttachBlock(ctx, data.CurrentDaemonId, data.BlockId) + } return fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) } @@ -1670,9 +1681,15 @@ func (ws *WshServer) SessionAttachCommand(ctx context.Context, data wshrpc.Comma block.JobId = dbDaemon.JobId }) if err != nil { + sessiondaemon.Manager.DetachBlock(ctx, data.DaemonId, data.BlockId) + if data.CurrentDaemonId != "" { + sessiondaemon.Manager.AttachBlock(ctx, data.CurrentDaemonId, data.BlockId) + } return fmt.Errorf("update block meta: %w", err) } + resyncBlockController(ctx, data.BlockId) + wcore.SendWaveObjUpdate(waveobj.MakeORef(waveobj.OType_Block, data.BlockId)) return nil } @@ -1698,6 +1715,7 @@ func (ws *WshServer) SessionDetachCommand(ctx context.Context, data wshrpc.Comma return fmt.Errorf("update block meta: %w", err) } resyncBlockController(ctx, blockId) + wcore.SendWaveObjUpdate(waveobj.MakeORef(waveobj.OType_Block, blockId)) } return nil } @@ -1745,6 +1763,7 @@ func buildSessionInfoRtnData(ctx context.Context, dbDaemon *waveobj.SessionDaemo JobId: dbDaemon.JobId, IsAnonymous: dbDaemon.IsAnonymous, Status: dbDaemon.Status, + Cwd: dbDaemon.Cwd, CreatedAt: dbDaemon.CreatedAt, IdleTimeout: dbDaemon.IdleTimeout, IdleSince: dbDaemon.IdleSince, From 553a3d1162cb2057063863487ec227a93845396a Mon Sep 17 00:00:00 2001 From: lyx-tec Date: Sun, 14 Jun 2026 18:55:35 +0800 Subject: [PATCH 19/36] session: name display, rename, sorting, and cross-block sync - SessionDaemon: add LastActiveAt field for recently-active sort ordering - RecordSessionActivityCommand RPC: updates lastactiveat on block focus - FocusManager: subscribe to blockFocusAtom, call RecordSessionActivity - SessionListCommand: sort by LastActiveAt desc (fallback CreatedAt) - Session list popup: name as main title, SSH addr, Sess/Job ID labels - Block header: show session name for named sessions, daemon ID for anon - Inline rename of session name via SessionTagCommand in popup - Shared jotai atom family for cross-block header sync on rename --- .../app/block/session-daemon-indicator.tsx | 151 ++++++++++++++++-- frontend/app/store/focusManager.ts | 21 ++- frontend/app/store/wshclientapi.ts | 6 + frontend/types/gotypes.d.ts | 7 + pkg/waveobj/wtype.go | 3 +- pkg/wshrpc/wshclient/wshclient.go | 6 + pkg/wshrpc/wshrpctypes.go | 6 + pkg/wshrpc/wshserver/wshserver.go | 38 +++-- 8 files changed, 212 insertions(+), 26 deletions(-) diff --git a/frontend/app/block/session-daemon-indicator.tsx b/frontend/app/block/session-daemon-indicator.tsx index 48ecf7320c..2f427da453 100644 --- a/frontend/app/block/session-daemon-indicator.tsx +++ b/frontend/app/block/session-daemon-indicator.tsx @@ -3,13 +3,30 @@ import { RpcApi } from "@/app/store/wshclientapi"; import { TabRpcClient } from "@/app/store/wshrpcutil"; +import { globalStore } from "@/app/store/jotaiStore"; import { useWaveEnv } from "@/app/waveenv/waveenv"; import { fireAndForget } from "@/util/util"; import { autoUpdate, flip, FloatingPortal, offset, shift, useFloating } from "@floating-ui/react"; import * as jotai from "jotai"; -import { useEffect, useRef, useState } from "react"; +import { useCallback, useEffect, useRef, useState } from "react"; import { BlockEnv } from "./blockenv"; +interface SessionDisplayData { + name: string | null; + isanonymous: boolean; +} + +const sessionDisplayAtomMap = new Map>(); + +function getSessionDisplayAtom(daemonId: string): jotai.PrimitiveAtom { + let a = sessionDisplayAtomMap.get(daemonId); + if (!a) { + a = jotai.atom({ name: null, isanonymous: true }); + sessionDisplayAtomMap.set(daemonId, a); + } + return a; +} + function formatCreatedTime(ms: number): string { const d = new Date(ms); const now = new Date(); @@ -33,6 +50,7 @@ interface SessionInfo { createdat?: number; blocks?: string[]; jobid?: string; + lastactiveat?: number; } interface SessionDaemonIndicatorProps { @@ -97,6 +115,11 @@ export function SessionDaemonIndicator({ blockId, useTermHeader }: SessionDaemon const daemonId = jotai.useAtomValue(waveEnv.getBlockMetaKeyAtom(blockId, "session:daemonid")); const [showPopup, setShowPopup] = useState(false); const [sessions, setSessions] = useState([]); + const [editingId, setEditingId] = useState(null); + const [editName, setEditName] = useState(""); + const sessionDisplayAtom = daemonId ? getSessionDisplayAtom(daemonId) : null; + const sessionDisplay = jotai.useAtomValue(sessionDisplayAtom ?? jotai.atom({ name: null, isanonymous: true })); + const editInputRef = useRef(null); const popupRef = useRef(null); const iconRef = useRef(null); const { refs, floatingStyles } = useFloating({ @@ -119,6 +142,19 @@ export function SessionDaemonIndicator({ blockId, useTermHeader }: SessionDaemon }); }, [showPopup]); + useEffect(() => { + if (!daemonId) return; + fireAndForget(async () => { + try { + const info = await RpcApi.SessionInfoCommand(TabRpcClient, { daemonid: daemonId }); + if (info) { + const atom = getSessionDisplayAtom(daemonId); + globalStore.set(atom, { name: info.name || null, isanonymous: info.isanonymous }); + } + } catch (_) {} + }); + }, [daemonId]); + useEffect(() => { function handleClick(e: MouseEvent) { if ( @@ -138,6 +174,7 @@ export function SessionDaemonIndicator({ blockId, useTermHeader }: SessionDaemon const handleAttach = (targetDaemonId: string) => { if (targetDaemonId === daemonId) return; + if (editingId) return; fireAndForget(async () => { try { await RpcApi.SessionAttachCommand(TabRpcClient, { daemonid: targetDaemonId, blockid: blockId, currentdaemonid: daemonId ?? undefined }); @@ -148,6 +185,34 @@ export function SessionDaemonIndicator({ blockId, useTermHeader }: SessionDaemon }); }; + const handleStartEdit = useCallback((daemonId: string, currentName: string) => { + setEditingId(daemonId); + setEditName(currentName || ""); + setTimeout(() => editInputRef.current?.focus(), 0); + }, []); + + const handleSaveEdit = useCallback(() => { + const id = editingId; + const name = editName.trim(); + if (!id) return; + setEditingId(null); + const atom = getSessionDisplayAtom(id); + globalStore.set(atom, { name: name || null, isanonymous: !name }); + fireAndForget(async () => { + try { + await RpcApi.SessionTagCommand(TabRpcClient, { daemonid: id, name: name || "Unnamed session" }); + const list = await RpcApi.SessionListCommand(TabRpcClient, { showall: true }); + setSessions((list ?? []) as SessionInfo[]); + } catch (e) { + console.log("error renaming session:", e); + } + }); + }, [editingId, editName]); + + const handleCancelEdit = useCallback(() => { + setEditingId(null); + }, []); + if (!useTermHeader) { return null; } @@ -167,7 +232,7 @@ export function SessionDaemonIndicator({ blockId, useTermHeader }: SessionDaemon {daemonId && ( - {daemonId.slice(0, 8)} + {sessionDisplay.isanonymous ? daemonId.slice(0, 8) : (sessionDisplay.name || daemonId.slice(0, 8))} )} @@ -274,16 +339,63 @@ export function SessionDaemonIndicator({ blockId, useTermHeader }: SessionDaemon />
-
- {s.name || "Unnamed session"} -
+ {editingId === s.daemonid ? ( + setEditName(e.target.value)} + onKeyDown={(e) => { + if (e.key === "Enter") handleSaveEdit(); + if (e.key === "Escape") handleCancelEdit(); + }} + onBlur={handleSaveEdit} + onClick={(e) => e.stopPropagation()} + style={{ + width: "100%", + fontWeight: 650, + color: "var(--text-primary)", + fontSize: 14, + lineHeight: "20px", + background: "rgba(148, 163, 184, 0.12)", + border: "1px solid rgba(56, 189, 248, 0.3)", + borderRadius: 4, + padding: "1px 6px", + outline: "none", + }} + /> + ) : ( +
{ + e.stopPropagation(); + handleStartEdit(s.daemonid, s.name); + }} + style={{ + ...truncateStyle, + fontWeight: 650, + color: "var(--text-primary)", + lineHeight: "20px", + fontSize: 14, + cursor: "text", + }} + title="Click to rename" + > + {s.name || "Unnamed session"} +
+ )} + {s.connection && ( +
+ {s.connection} +
+ )}
- {s.daemonid.slice(0, 8)} + Sess: {s.daemonid.slice(0, 8)}
+ {s.jobid && ( +
+ Job: {s.jobid.slice(0, 8)} +
+ )}
{ + const blockId = globalStore.get(this.blockFocusAtom); + if (blockId && blockId !== prevBlockId) { + prevBlockId = blockId; + try { + const daemonId = globalStore.get(getBlockMetaKeyAtom(blockId, "session:daemonid")); + if (daemonId) { + fireAndForget(() => RpcApi.RecordSessionActivityCommand(TabRpcClient, { daemonid: daemonId })); + } + } catch (_) {} + } else if (!blockId) { + prevBlockId = null; + } + }); } static getInstance(): FocusManager { diff --git a/frontend/app/store/wshclientapi.ts b/frontend/app/store/wshclientapi.ts index d5cad46c67..5155a22652 100644 --- a/frontend/app/store/wshclientapi.ts +++ b/frontend/app/store/wshclientapi.ts @@ -672,6 +672,12 @@ export class RpcApiType { return client.wshRpcCall("readappfile", data, opts); } + // command "recordsessionactivity" [call] + RecordSessionActivityCommand(client: WshClient, data: CommandRecordSessionActivityData, opts?: RpcOpts): Promise { + if (this.mockClient) return this.mockClient.mockWshRpcCall(client, "recordsessionactivity", data, opts); + return client.wshRpcCall("recordsessionactivity", data, opts); + } + // command "recordtevent" [call] RecordTEventCommand(client: WshClient, data: TEvent, opts?: RpcOpts): Promise { if (this.mockClient) return this.mockClient.mockWshRpcCall(client, "recordtevent", data, opts); diff --git a/frontend/types/gotypes.d.ts b/frontend/types/gotypes.d.ts index e6e5c5fa40..d93f65b6d9 100644 --- a/frontend/types/gotypes.d.ts +++ b/frontend/types/gotypes.d.ts @@ -528,6 +528,11 @@ declare global { modts?: number; }; + // wshrpc.CommandRecordSessionActivityData + type CommandRecordSessionActivityData = { + daemonid: string; + }; + // wshrpc.CommandRemoteDisconnectFromJobManagerData type CommandRemoteDisconnectFromJobManagerData = { jobid: string; @@ -1427,6 +1432,7 @@ declare global { createdat?: number; idletimeout?: number; idlesince?: number; + lastactiveat?: number; }; // wshrpc.SessionInfoRtnData @@ -1441,6 +1447,7 @@ declare global { createdat: number; idletimeout: number; idlesince?: number; + lastactiveat?: number; blocks?: string[]; }; diff --git a/pkg/waveobj/wtype.go b/pkg/waveobj/wtype.go index 4e50a20ae8..01323fa143 100644 --- a/pkg/waveobj/wtype.go +++ b/pkg/waveobj/wtype.go @@ -368,7 +368,8 @@ type SessionDaemon struct { Cwd string `json:"cwd,omitempty"` CreatedAt int64 `json:"createdat,omitempty"` IdleTimeout int64 `json:"idletimeout,omitempty"` - IdleSince int64 `json:"idlesince,omitempty"` // ms timestamp when last block detached (0 = has attached blocks) + IdleSince int64 `json:"idlesince,omitempty"` // ms timestamp when last block detached (0 = has attached blocks) + LastActiveAt int64 `json:"lastactiveat,omitempty"` // ms timestamp when a block last gained focus in this session Meta MetaMapType `json:"meta"` } diff --git a/pkg/wshrpc/wshclient/wshclient.go b/pkg/wshrpc/wshclient/wshclient.go index c8f11e6122..24f7535f1a 100644 --- a/pkg/wshrpc/wshclient/wshclient.go +++ b/pkg/wshrpc/wshclient/wshclient.go @@ -670,6 +670,12 @@ func ReadAppFileCommand(w *wshutil.WshRpc, data wshrpc.CommandReadAppFileData, o return resp, err } +// command "recordsessionactivity", wshserver.RecordSessionActivityCommand +func RecordSessionActivityCommand(w *wshutil.WshRpc, data wshrpc.CommandRecordSessionActivityData, opts *wshrpc.RpcOpts) error { + _, err := sendRpcRequestCallHelper[any](w, "recordsessionactivity", data, opts) + return err +} + // command "recordtevent", wshserver.RecordTEventCommand func RecordTEventCommand(w *wshutil.WshRpc, data telemetrydata.TEvent, opts *wshrpc.RpcOpts) error { _, err := sendRpcRequestCallHelper[any](w, "recordtevent", data, opts) diff --git a/pkg/wshrpc/wshrpctypes.go b/pkg/wshrpc/wshrpctypes.go index eeed88e6ef..80cf8b13bc 100644 --- a/pkg/wshrpc/wshrpctypes.go +++ b/pkg/wshrpc/wshrpctypes.go @@ -220,6 +220,7 @@ type WshRpcInterface interface { SessionDetachCommand(ctx context.Context, data CommandSessionDetachData) error SessionInfoCommand(ctx context.Context, data CommandSessionInfoData) (*SessionInfoRtnData, error) SessionTagCommand(ctx context.Context, data CommandSessionTagData) error + RecordSessionActivityCommand(ctx context.Context, data CommandRecordSessionActivityData) error } // for frontend @@ -970,6 +971,10 @@ type CommandSessionTagData struct { Name string `json:"name"` } +type CommandRecordSessionActivityData struct { + DaemonId string `json:"daemonid"` +} + type SessionInfoRtnData struct { DaemonId string `json:"daemonid"` Name string `json:"name"` @@ -981,5 +986,6 @@ type SessionInfoRtnData struct { CreatedAt int64 `json:"createdat"` IdleTimeout int64 `json:"idletimeout"` IdleSince int64 `json:"idlesince,omitempty"` + LastActiveAt int64 `json:"lastactiveat,omitempty"` Blocks []string `json:"blocks,omitempty"` } diff --git a/pkg/wshrpc/wshserver/wshserver.go b/pkg/wshrpc/wshserver/wshserver.go index 19323d262f..83c693b2ac 100644 --- a/pkg/wshrpc/wshserver/wshserver.go +++ b/pkg/wshrpc/wshserver/wshserver.go @@ -1647,6 +1647,11 @@ func (ws *WshServer) SessionListCommand(ctx context.Context, data wshrpc.Command rtn = append(rtn, *info) } sort.Slice(rtn, func(i, j int) bool { + ai := rtn[i].LastActiveAt + aj := rtn[j].LastActiveAt + if ai != aj { + return ai > aj + } return rtn[i].CreatedAt > rtn[j].CreatedAt }) return rtn, nil @@ -1751,23 +1756,34 @@ func (ws *WshServer) SessionTagCommand(ctx context.Context, data wshrpc.CommandS return nil } +func (ws *WshServer) RecordSessionActivityCommand(ctx context.Context, data wshrpc.CommandRecordSessionActivityData) error { + err := wstore.DBUpdateFn(ctx, data.DaemonId, func(sd *waveobj.SessionDaemon) { + sd.LastActiveAt = time.Now().UnixMilli() + }) + if err != nil { + return fmt.Errorf("record session activity: %w", err) + } + return nil +} + func buildSessionInfoRtnData(ctx context.Context, dbDaemon *waveobj.SessionDaemon) (*wshrpc.SessionInfoRtnData, error) { if dbDaemon == nil { return nil, fmt.Errorf("session daemon is nil") } blocks := sessiondaemon.Manager.GetBlocksForDaemon(dbDaemon.OID) return &wshrpc.SessionInfoRtnData{ - DaemonId: dbDaemon.OID, - Name: dbDaemon.Name, - Connection: dbDaemon.Connection, - JobId: dbDaemon.JobId, - IsAnonymous: dbDaemon.IsAnonymous, - Status: dbDaemon.Status, - Cwd: dbDaemon.Cwd, - CreatedAt: dbDaemon.CreatedAt, - IdleTimeout: dbDaemon.IdleTimeout, - IdleSince: dbDaemon.IdleSince, - Blocks: blocks, + DaemonId: dbDaemon.OID, + Name: dbDaemon.Name, + Connection: dbDaemon.Connection, + JobId: dbDaemon.JobId, + IsAnonymous: dbDaemon.IsAnonymous, + Status: dbDaemon.Status, + Cwd: dbDaemon.Cwd, + CreatedAt: dbDaemon.CreatedAt, + IdleTimeout: dbDaemon.IdleTimeout, + IdleSince: dbDaemon.IdleSince, + LastActiveAt: dbDaemon.LastActiveAt, + Blocks: blocks, }, nil } From fdf351de0e7d9c6d19db0568940bea75b500ecc1 Mon Sep 17 00:00:00 2001 From: lyx-tec Date: Sun, 14 Jun 2026 23:40:28 +0800 Subject: [PATCH 20/36] feat: named session creation, universal create button, and floating-ui refactor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove auto-create anonymous session daemon for SSH blocks in ResyncController SSH blocks now behave as plain terminals unless user explicitly creates a session. Stale daemon cleanup only clears daemonId, no longer auto-recreates. (blockcontroller.go) - Refactor floating-ui from callback refs to elements option Replaces inline ref={(elem) => refs.setFloating(elem)} with stable useRef + elements: { reference, floating } pattern. Eliminates React #185 at the architectural level. Removes all ref callback gymnastics. - Show '+ Create new session' button for all blocks Previously only shown for blocks without daemon. Now always visible. For blocks with existing daemon, creates then attaches with currentdaemonid to switch to the new session. - Two-step session creation with optional name prompt Click button → inline input (placeholder: Session name (optional)) Enter → create + tag (if named) + attach + close popup Escape → cancel Whitespace-only input treated as anonymous. - Cleanup: useCallback on handleAttach, creatingRef guard on handleCreateAndAttach, formatCreatedTime handles undefined, consistent deps arrays. --- .../app/block/session-daemon-indicator.tsx | 136 ++++++++++++++++-- pkg/blockcontroller/blockcontroller.go | 22 +-- 2 files changed, 123 insertions(+), 35 deletions(-) diff --git a/frontend/app/block/session-daemon-indicator.tsx b/frontend/app/block/session-daemon-indicator.tsx index 2f427da453..ab96dc9432 100644 --- a/frontend/app/block/session-daemon-indicator.tsx +++ b/frontend/app/block/session-daemon-indicator.tsx @@ -27,7 +27,8 @@ function getSessionDisplayAtom(daemonId: string): jotai.PrimitiveAtom(); const daemonId = jotai.useAtomValue(waveEnv.getBlockMetaKeyAtom(blockId, "session:daemonid")); + const connName = jotai.useAtomValue(waveEnv.getBlockMetaKeyAtom(blockId, "connection")); const [showPopup, setShowPopup] = useState(false); const [sessions, setSessions] = useState([]); const [editingId, setEditingId] = useState(null); const [editName, setEditName] = useState(""); + const [creating, setCreating] = useState(false); + const creatingRef = useRef(false); + const [showCreateInput, setShowCreateInput] = useState(false); + const [newSessionName, setNewSessionName] = useState(""); + const createInputRef = useRef(null); const sessionDisplayAtom = daemonId ? getSessionDisplayAtom(daemonId) : null; const sessionDisplay = jotai.useAtomValue(sessionDisplayAtom ?? jotai.atom({ name: null, isanonymous: true })); const editInputRef = useRef(null); const popupRef = useRef(null); const iconRef = useRef(null); - const { refs, floatingStyles } = useFloating({ + const { floatingStyles } = useFloating({ + elements: { + reference: iconRef.current, + floating: popupRef.current, + }, open: showPopup, onOpenChange: setShowPopup, placement: "bottom-end", @@ -172,7 +183,7 @@ export function SessionDaemonIndicator({ blockId, useTermHeader }: SessionDaemon } }, [showPopup]); - const handleAttach = (targetDaemonId: string) => { + const handleAttach = useCallback((targetDaemonId: string) => { if (targetDaemonId === daemonId) return; if (editingId) return; fireAndForget(async () => { @@ -183,7 +194,7 @@ export function SessionDaemonIndicator({ blockId, useTermHeader }: SessionDaemon console.log("error switching session:", e); } }); - }; + }, [daemonId, editingId, blockId]); const handleStartEdit = useCallback((daemonId: string, currentName: string) => { setEditingId(daemonId); @@ -213,36 +224,62 @@ export function SessionDaemonIndicator({ blockId, useTermHeader }: SessionDaemon setEditingId(null); }, []); + const handleCreateAndAttach = useCallback(async (name?: string) => { + if (!connName || creatingRef.current) return; + creatingRef.current = true; + setCreating(true); + try { + const info = await RpcApi.SessionCreateCommand(TabRpcClient, { connection: connName }); + if (info?.daemonid) { + if (name) { + await RpcApi.SessionTagCommand(TabRpcClient, { daemonid: info.daemonid, name }); + } + await RpcApi.SessionAttachCommand(TabRpcClient, { + daemonid: info.daemonid, + blockid: blockId, + currentdaemonid: daemonId ?? undefined, + }); + setShowPopup(false); + } + } catch (e) { + console.log("error creating session:", e); + } finally { + creatingRef.current = false; + setCreating(false); + } + }, [connName, blockId, daemonId]); + if (!useTermHeader) { return null; } + const isSshConn = connName && !connName.startsWith("local") && !connName.startsWith("wsl://"); + const visible = !!daemonId || isSshConn; + return ( <>
{ - iconRef.current = elem; - refs.setReference(elem); - }} + ref={iconRef} className="iconbutton text-[13px] ml-[-4px]" - title={daemonId ? `Session: ${daemonId}` : "Attach to Session"} + title={daemonId ? `Session: ${daemonId}` : "No session attached"} onClick={() => setShowPopup((v) => !v)} - style={{ display: "inline-flex", alignItems: "center", gap: 4 }} + style={{ display: visible ? "inline-flex" : "none", alignItems: "center", gap: 4 }} > - {daemonId && ( + {daemonId ? ( {sessionDisplay.isanonymous ? daemonId.slice(0, 8) : (sessionDisplay.name || daemonId.slice(0, 8))} + ) : ( + + non session + )}
{showPopup && (
{ - popupRef.current = elem; - refs.setFloating(elem); - }} + ref={popupRef} style={{ ...popupStyle, ...floatingStyles }} onMouseDown={(e) => e.stopPropagation()} onFocusCapture={(e) => e.stopPropagation()} @@ -278,6 +315,75 @@ export function SessionDaemonIndicator({ blockId, useTermHeader }: SessionDaemon {sessions.length}
+ {showCreateInput ? ( +
+ + setNewSessionName(e.target.value)} + onKeyDown={(e) => { + if (e.key === "Enter") { + const name = newSessionName.trim(); + handleCreateAndAttach(name || undefined); + setShowCreateInput(false); + setNewSessionName(""); + } + if (e.key === "Escape") { + setShowCreateInput(false); + setNewSessionName(""); + } + }} + placeholder="Session name (optional)" + style={{ + flex: 1, + background: "transparent", + border: "none", + outline: "none", + color: "#7dd3fc", + fontSize: 13, + fontWeight: 600, + }} + /> +
+ ) : ( +
{ + setShowCreateInput(true); + setTimeout(() => createInputRef.current?.focus(), 0); + }} + style={{ + display: "flex", + alignItems: "center", + gap: 8, + padding: "8px 10px", + marginBottom: 4, + cursor: creating ? "default" : "pointer", + borderRadius: 8, + background: "rgba(56, 189, 248, 0.08)", + border: "1px solid rgba(56, 189, 248, 0.18)", + opacity: creating ? 0.5 : 1, + }} + > + + + {creating ? "Creating..." : "Create new session"} + +
+ )} {sessions.length === 0 && (
Loading sessions... diff --git a/pkg/blockcontroller/blockcontroller.go b/pkg/blockcontroller/blockcontroller.go index d2be11a06d..b370ca62e2 100644 --- a/pkg/blockcontroller/blockcontroller.go +++ b/pkg/blockcontroller/blockcontroller.go @@ -200,19 +200,6 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts // Check for SessionDaemon controller daemonId := blockData.Meta.GetString(waveobj.MetaKey_SessionDaemonId, "") - // Auto-create anonymous daemon for SSH blocks without daemonid - if daemonId == "" && controllerName == BlockController_Shell && !conncontroller.IsLocalConnName(connName) && !conncontroller.IsWslConnName(connName) { - err = CheckConnStatus(blockId) - if err != nil { - return fmt.Errorf("cannot start shellproc: %w", err) - } - newDaemonId, err := autoCreateSessionDaemon(ctx, blockId, blockData.Meta, connName, rtOpts) - if err != nil { - return fmt.Errorf("auto-create session daemon: %w", err) - } - daemonId = newDaemonId - } - // For local/WSL connections, session daemon is not applicable — clear and fall through to ShellController if daemonId != "" && controllerName == BlockController_Shell && (conncontroller.IsLocalConnName(connName) || conncontroller.IsWslConnName(connName)) { if existing != nil { @@ -226,11 +213,11 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts daemonId = "" } - // Validate existing daemon: if stale (done/not found), clear and auto-create + // Validate existing daemon: if stale (done/not found), clear it if daemonId != "" && controllerName == BlockController_Shell { dbDaemon, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, daemonId) if err != nil || dbDaemon.Status == "done" { - log.Printf("[sessiondaemon] stale daemon=%s block=%s status=%s err=%v, clearing and recreating", daemonId, blockId, func() string { if dbDaemon != nil { return dbDaemon.Status }; return "db_load_error" }(), err) + log.Printf("[sessiondaemon] stale daemon=%s block=%s status=%s err=%v, clearing", daemonId, blockId, func() string { if dbDaemon != nil { return dbDaemon.Status }; return "db_load_error" }(), err) if existing != nil { DestroyBlockController(blockId) time.Sleep(100 * time.Millisecond) @@ -240,11 +227,6 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts delete(block.Meta, waveobj.MetaKey_SessionDaemonId) }) daemonId = "" - newDaemonId, err := autoCreateSessionDaemon(ctx, blockId, blockData.Meta, connName, rtOpts) - if err != nil { - return fmt.Errorf("auto-create session daemon after stale cleanup: %w", err) - } - daemonId = newDaemonId } } From 9f39b106ed788ecd10e0d6b5d07e456e6fd7c344 Mon Sep 17 00:00:00 2001 From: lyx-tec Date: Mon, 15 Jun 2026 15:36:05 +0800 Subject: [PATCH 21/36] feat: session daemon state machine and full DB load on startup - Add daemon state constants: init, running, disconnected, done - InitFromDB loads ALL daemons from DB (not just running/disconnected), eliminating the hasMem=false orphan job path - Reconnect() distinguishes JobManagerGone (->done) from connection errors (->disconnected), reconciling with jobcontroller's auto-cleanup - SessionDaemonController.Start(): check daemon status at entry (done/disconnected -> error, no auto job creation), use daemon.Reconnect() for state-aware reconnection, removed silent terminate+create fallthrough - ResyncController: stale daemon and dead-job checks use constants, set Status_Done instead of init when job confirmed dead - reapIdleDaemons split into reapRunning (existing logic) and reapDone (5min timeout for done daemons with no blocks) - SessionCreateCommand and autoCreateSessionDaemon use Status_Init constant --- pkg/blockcontroller/blockcontroller.go | 14 +- .../sessiondaemoncontroller.go | 53 +++--- pkg/sessiondaemon/sessiondaemon.go | 157 +++++++++++++----- pkg/wshrpc/wshserver/wshserver.go | 2 +- 4 files changed, 153 insertions(+), 73 deletions(-) diff --git a/pkg/blockcontroller/blockcontroller.go b/pkg/blockcontroller/blockcontroller.go index b370ca62e2..3ac7d15db1 100644 --- a/pkg/blockcontroller/blockcontroller.go +++ b/pkg/blockcontroller/blockcontroller.go @@ -216,7 +216,7 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts // Validate existing daemon: if stale (done/not found), clear it if daemonId != "" && controllerName == BlockController_Shell { dbDaemon, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, daemonId) - if err != nil || dbDaemon.Status == "done" { + if err != nil || dbDaemon.Status == sessiondaemon.Status_Done { log.Printf("[sessiondaemon] stale daemon=%s block=%s status=%s err=%v, clearing", daemonId, blockId, func() string { if dbDaemon != nil { return dbDaemon.Status }; return "db_load_error" }(), err) if existing != nil { DestroyBlockController(blockId) @@ -324,18 +324,18 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts if daemon := sessiondaemon.Manager.Get(sdc.DaemonId); daemon != nil && daemon.JobId != "" { jobStatus, jErr := jobcontroller.GetJobManagerStatus(ctx, daemon.JobId) if jErr != nil || jobStatus != jobcontroller.JobManagerStatus_Running { - log.Printf("[sessiondaemon] resync: job %s not running (status=%s err=%v), recreating controller", daemon.JobId, jobStatus, jErr) + log.Printf("[sessiondaemon] resync: job %s not running (status=%s err=%v), marking done", daemon.JobId, jobStatus, jErr) daemon.Lock.Lock() daemon.JobId = "" daemon.Lock.Unlock() wstore.DBUpdateFn(ctx, sdc.DaemonId, func(dbSd *waveobj.SessionDaemon) { dbSd.JobId = "" - dbSd.Status = "init" + dbSd.Status = sessiondaemon.Status_Done }) - stopBlockController(blockId) - time.Sleep(100 * time.Millisecond) - existing = nil - // Fall through to controller recreation + Start below + stopBlockController(blockId) + time.Sleep(100 * time.Millisecond) + existing = nil + // Fall through to controller recreation + Start below } } } diff --git a/pkg/blockcontroller/sessiondaemoncontroller.go b/pkg/blockcontroller/sessiondaemoncontroller.go index cd6ad8cc43..8b7cad5c18 100644 --- a/pkg/blockcontroller/sessiondaemoncontroller.go +++ b/pkg/blockcontroller/sessiondaemoncontroller.go @@ -3,14 +3,11 @@ package blockcontroller import ( "context" "fmt" - "io/fs" "log" "sync" "time" "github.com/google/uuid" - "github.com/wavetermdev/waveterm/pkg/filestore" - "github.com/wavetermdev/waveterm/pkg/jobcontroller" "github.com/wavetermdev/waveterm/pkg/remote" "github.com/wavetermdev/waveterm/pkg/remote/conncontroller" "github.com/wavetermdev/waveterm/pkg/sessiondaemon" @@ -77,32 +74,36 @@ func (sdc *SessionDaemonController) Start(ctx context.Context, blockMeta waveobj return fmt.Errorf("error getting session daemon: %w", err) } - if dbDaemon.JobId != "" { - status, err := jobcontroller.GetJobManagerStatus(ctx, dbDaemon.JobId) - log.Printf("[sessiondaemon] start: existing job=%s status=%s err=%v", dbDaemon.JobId, status, err) - if err == nil && status == jobcontroller.JobManagerStatus_Running { - log.Printf("[sessiondaemon] start: reconnecting to existing job %s", dbDaemon.JobId) - err = jobcontroller.ReconnectJob(ctx, dbDaemon.JobId, rtOpts) - if err != nil { - log.Printf("[sessiondaemon] start: reconnect failed job=%s err=%v, starting new job", dbDaemon.JobId, err) - } else { - log.Printf("[sessiondaemon] start: reconnect ok job=%s", dbDaemon.JobId) - sdc.incrementVersion() - sdc.sendControllerStatus() - return nil - } - } + if dbDaemon.Status == sessiondaemon.Status_Done { + return fmt.Errorf("remote job manager has exited, restart or delete the session") + } + if dbDaemon.Status == sessiondaemon.Status_Disconnected { + return fmt.Errorf("daemon is disconnected, waiting for connection to recover") } - // Terminate old job if it exists (crashed or network issue) if dbDaemon.JobId != "" { - log.Printf("[sessiondaemon] start: terminating old job %s", dbDaemon.JobId) - jobcontroller.TerminateAndDetachJob(ctx, dbDaemon.JobId) - } + log.Printf("[sessiondaemon] start: attempting reconnect to job=%s status=%s", dbDaemon.JobId, dbDaemon.Status) + err = daemon.Reconnect(ctx, dbDaemon, rtOpts) + if err == nil { + log.Printf("[sessiondaemon] start: reconnect ok block=%s job=%s", sdc.BlockId, dbDaemon.JobId) + sdc.incrementVersion() + sdc.sendControllerStatus() + return nil + } + log.Printf("[sessiondaemon] start: reconnect failed block=%s job=%s err=%v", sdc.BlockId, dbDaemon.JobId, err) - fsErr := filestore.WFS.MakeFile(ctx, sdc.BlockId, wavebase.BlockFile_Term, nil, wshrpc.FileOpts{MaxSize: DefaultTermMaxFileSize, Circular: true}) - if fsErr != nil && fsErr != fs.ErrExist { - return fmt.Errorf("error creating block term file: %w", fsErr) + dbDaemon, dbErr := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, sdc.DaemonId) + if dbErr != nil { + return fmt.Errorf("error reading daemon after reconnect failure: %w", dbErr) + } + switch dbDaemon.Status { + case sessiondaemon.Status_Disconnected: + return fmt.Errorf("daemon is disconnected, waiting for connection to recover") + case sessiondaemon.Status_Done: + return fmt.Errorf("remote job manager has exited, restart or delete the session") + default: + return fmt.Errorf("unexpected daemon status %q after reconnect failure", dbDaemon.Status) + } } log.Printf("[sessiondaemon] start: starting new job block=%s", sdc.BlockId) @@ -253,7 +254,7 @@ func autoCreateSessionDaemon(ctx context.Context, blockId string, blockMeta wave Name: "", Connection: connName, IsAnonymous: true, - Status: "init", + Status: sessiondaemon.Status_Init, CreatedAt: time.Now().UnixMilli(), IdleTimeout: sessiondaemon.DefaultAnonymousIdleTimeout, } diff --git a/pkg/sessiondaemon/sessiondaemon.go b/pkg/sessiondaemon/sessiondaemon.go index 845da176dd..b73d93e4e2 100644 --- a/pkg/sessiondaemon/sessiondaemon.go +++ b/pkg/sessiondaemon/sessiondaemon.go @@ -17,9 +17,17 @@ import ( ) const ( - DefaultAnonymousIdleTimeout = 600 // 10min + DefaultAnonymousIdleTimeout = 600 // 10min DefaultNamedIdleTimeout = 86400 // 24h - IdleCheckInterval = 60 // 检查间隔(秒) + IdleCheckInterval = 60 // 60s + DoneReapTimeout = 300 // 5min for done daemons with no blocks +) + +const ( + Status_Init = "init" + Status_Running = "running" + Status_Disconnected = "disconnected" + Status_Done = "done" ) type SessionDaemon struct { @@ -68,7 +76,7 @@ func (sd *SessionDaemon) SetJobId(ctx context.Context, dbDaemon *waveobj.Session err := wstore.DBUpdateFn(ctx, dbDaemon.OID, func(sdDb *waveobj.SessionDaemon) { sdDb.JobId = jobId - sdDb.Status = "running" + sdDb.Status = Status_Running }) if err != nil { log.Printf("[sessiondaemon:%s] warning: failed to update jobid in db: %v", sd.DaemonId, err) @@ -83,7 +91,34 @@ func (sd *SessionDaemon) Reconnect(ctx context.Context, dbDaemon *waveobj.Sessio sd.Lock.Lock() sd.JobId = dbDaemon.JobId sd.Lock.Unlock() - return jobcontroller.ReconnectJob(ctx, dbDaemon.JobId, rtOpts) + + err := jobcontroller.ReconnectJob(ctx, dbDaemon.JobId, rtOpts) + if err != nil { + var jobGone bool + wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbSd *waveobj.SessionDaemon) { + if dbSd.JobId == "" { + dbSd.Status = Status_Done + jobGone = true + } else { + dbSd.Status = Status_Disconnected + } + }) + if jobGone { + sd.Lock.Lock() + sd.JobId = "" + sd.Lock.Unlock() + log.Printf("[sessiondaemon:%s] reconnect: job manager gone, status -> done", sd.DaemonId) + return fmt.Errorf("job manager has exited") + } + log.Printf("[sessiondaemon:%s] reconnect: failed, status -> disconnected: %v", sd.DaemonId, err) + return fmt.Errorf("reconnect failed: %w", err) + } + + wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbSd *waveobj.SessionDaemon) { + dbSd.Status = Status_Running + }) + log.Printf("[sessiondaemon:%s] reconnect: success, status -> running", sd.DaemonId) + return nil } func (sd *SessionDaemon) Stop(ctx context.Context) { @@ -235,18 +270,31 @@ func (sd *SessionDaemonManager) InitFromDB(ctx context.Context) error { } for _, dbDaemon := range daemons { - if dbDaemon.Status == "running" || dbDaemon.Status == "disconnected" { - daemon, err := sd.GetOrCreate(ctx, dbDaemon) - if err != nil { - log.Printf("[sessiondaemon] warning: failed to load daemon %s: %v", dbDaemon.OID, err) - continue - } + daemon, err := sd.GetOrCreate(ctx, dbDaemon) + if err != nil { + log.Printf("[sessiondaemon] warning: failed to load daemon %s: %v", dbDaemon.OID, err) + continue + } + + switch dbDaemon.Status { + case Status_Running, Status_Disconnected: err = daemon.Reconnect(ctx, dbDaemon, nil) if err != nil { log.Printf("[sessiondaemon:%s] reconnect failed: %v", dbDaemon.OID, err) } + case Status_Done: + log.Printf("[sessiondaemon:%s] loaded done daemon", dbDaemon.OID) + case Status_Init: + log.Printf("[sessiondaemon:%s] loaded init daemon", dbDaemon.OID) + default: + log.Printf("[sessiondaemon:%s] unknown status %q, treating as init", dbDaemon.OID, dbDaemon.Status) + wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbSd *waveobj.SessionDaemon) { + dbSd.Status = Status_Init + }) } } + + log.Printf("[sessiondaemon] InitFromDB complete: %d daemons loaded", len(sd.Daemons)) return nil } @@ -301,45 +349,76 @@ func (sd *SessionDaemonManager) reapIdleDaemons(ctx context.Context) { } for _, dbDaemon := range allDaemons { - if dbDaemon.Status != "running" { - continue - } - sd.Lock.Lock() memDaemon, hasMem := sd.Daemons[dbDaemon.OID] sd.Lock.Unlock() - if hasMem && memDaemon.HasAttachedBlocks() { - // Verify all attached blocks are still alive. If a block - // was deleted without proper detach, clean it up here - // to prevent the daemon from holding onto a dead block forever. - sd.cleanupDeadBlocks(ctx, dbDaemon.OID, memDaemon) - if memDaemon.HasAttachedBlocks() { - continue - } + switch dbDaemon.Status { + case Status_Running: + sd.reapRunning(ctx, dbDaemon, memDaemon, hasMem) + case Status_Done: + sd.reapDone(ctx, dbDaemon, memDaemon, hasMem) } + } +} - if dbDaemon.IdleTimeout <= 0 { - continue +func (sd *SessionDaemonManager) reapRunning(ctx context.Context, dbDaemon *waveobj.SessionDaemon, memDaemon *SessionDaemon, hasMem bool) { + if hasMem && memDaemon.HasAttachedBlocks() { + sd.cleanupDeadBlocks(ctx, dbDaemon.OID, memDaemon) + if memDaemon.HasAttachedBlocks() { + return } + } + + if dbDaemon.IdleTimeout <= 0 { + return + } + + var newRemaining int64 + wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbD *waveobj.SessionDaemon) { + dbD.IdleSince -= IdleCheckInterval + newRemaining = dbD.IdleSince + }) + if newRemaining > 0 { + return + } + + log.Printf("[sessiondaemon:%s] idle timeout reached, terminating", dbDaemon.OID) + if hasMem { + memDaemon.Stop(ctx) + sd.Remove(dbDaemon.OID) + } + wstore.DBDelete(ctx, waveobj.OType_SessionDaemon, dbDaemon.OID) +} + +func (sd *SessionDaemonManager) reapDone(ctx context.Context, dbDaemon *waveobj.SessionDaemon, memDaemon *SessionDaemon, hasMem bool) { + if hasMem && memDaemon.HasAttachedBlocks() { + return + } + + if dbDaemon.IdleTimeout <= 0 { + return + } - // IdleSince is a countdown in seconds (set to IdleTimeout when idle starts). - // Decrement inside the DB update closure to avoid race with concurrent AttachBlock - // which resets IdleSince to 0. - var newRemaining int64 + if dbDaemon.IdleSince <= 0 { wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbD *waveobj.SessionDaemon) { - dbD.IdleSince -= IdleCheckInterval - newRemaining = dbD.IdleSince + dbD.IdleSince = DoneReapTimeout }) - if newRemaining > 0 { - continue - } + return + } - log.Printf("[sessiondaemon:%s] idle timeout reached, terminating", dbDaemon.OID) - if hasMem { - memDaemon.Stop(ctx) - sd.Remove(dbDaemon.OID) - } - wstore.DBDelete(ctx, waveobj.OType_SessionDaemon, dbDaemon.OID) + var newRemaining int64 + wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbD *waveobj.SessionDaemon) { + dbD.IdleSince -= IdleCheckInterval + newRemaining = dbD.IdleSince + }) + if newRemaining > 0 { + return + } + + log.Printf("[sessiondaemon:%s] done daemon reaped", dbDaemon.OID) + if hasMem { + sd.Remove(dbDaemon.OID) } + wstore.DBDelete(ctx, waveobj.OType_SessionDaemon, dbDaemon.OID) } diff --git a/pkg/wshrpc/wshserver/wshserver.go b/pkg/wshrpc/wshserver/wshserver.go index 83c693b2ac..3680241150 100644 --- a/pkg/wshrpc/wshserver/wshserver.go +++ b/pkg/wshrpc/wshserver/wshserver.go @@ -1587,7 +1587,7 @@ func (ws *WshServer) SessionCreateCommand(ctx context.Context, data wshrpc.Comma Name: data.Name, Connection: data.Connection, IsAnonymous: data.Name == "", - Status: "init", + Status: sessiondaemon.Status_Init, CreatedAt: time.Now().UnixMilli(), IdleTimeout: data.IdleTimeout, } From dd23cdbd20a9486cb34dbc1010a0268bcfc90171 Mon Sep 17 00:00:00 2001 From: lyx-tec Date: Mon, 15 Jun 2026 16:18:44 +0800 Subject: [PATCH 22/36] fix: daemon Stop returns error, block DB delete on terminate failure - TerminateAndDetachJob returns error instead of void - SessionDaemon.Stop() returns error, propagates terminate failure - reapRunning: skip DB delete if Stop fails, retry next cycle - SessionDeleteCommand: return error if Stop fails, preserving daemon DB record for retry - handleBlockCloseEvent: log terminate errors --- pkg/jobcontroller/jobcontroller.go | 9 +++++++-- pkg/sessiondaemon/sessiondaemon.go | 15 ++++++++++++--- pkg/wshrpc/wshserver/wshserver.go | 5 ++++- 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/pkg/jobcontroller/jobcontroller.go b/pkg/jobcontroller/jobcontroller.go index 4ded5b7e6b..153edaa981 100644 --- a/pkg/jobcontroller/jobcontroller.go +++ b/pkg/jobcontroller/jobcontroller.go @@ -476,7 +476,10 @@ func handleBlockCloseEvent(event *wps.WaveEvent) { } for _, jobId := range jobIds { - TerminateAndDetachJob(ctx, jobId) + err := TerminateAndDetachJob(ctx, jobId) + if err != nil { + log.Printf("[job:%s] error in handleBlockCloseEvent: %v", jobId, err) + } } } @@ -926,15 +929,17 @@ func tryTerminateJobManager(ctx context.Context, jobId string) { } } -func TerminateAndDetachJob(ctx context.Context, jobId string) { +func TerminateAndDetachJob(ctx context.Context, jobId string) error { err := TerminateJobManager(ctx, jobId) if err != nil { log.Printf("[job:%s] error terminating job manager: %v", jobId, err) + return fmt.Errorf("terminate job manager: %w", err) } err = DetachJobFromBlock(ctx, jobId, true) if err != nil { log.Printf("[job:%s] error detaching job from block: %v", jobId, err) } + return nil } func TerminateJobManager(ctx context.Context, jobId string) error { diff --git a/pkg/sessiondaemon/sessiondaemon.go b/pkg/sessiondaemon/sessiondaemon.go index b73d93e4e2..d67515ce51 100644 --- a/pkg/sessiondaemon/sessiondaemon.go +++ b/pkg/sessiondaemon/sessiondaemon.go @@ -121,14 +121,19 @@ func (sd *SessionDaemon) Reconnect(ctx context.Context, dbDaemon *waveobj.Sessio return nil } -func (sd *SessionDaemon) Stop(ctx context.Context) { +func (sd *SessionDaemon) Stop(ctx context.Context) error { sd.Lock.Lock() jobId := sd.JobId sd.Lock.Unlock() log.Printf("[sessiondaemon] stop daemon=%s job=%s", sd.DaemonId, jobId) if jobId != "" { - jobcontroller.TerminateAndDetachJob(ctx, jobId) + err := jobcontroller.TerminateAndDetachJob(ctx, jobId) + if err != nil { + log.Printf("[sessiondaemon:%s] error terminating remote job %s: %v", sd.DaemonId, jobId, err) + return fmt.Errorf("failed to terminate remote job: %w", err) + } } + return nil } func (sd *SessionDaemon) SendInput(ctx context.Context, inputData []byte, sigName string, termSize *waveobj.TermSize) error { @@ -385,7 +390,11 @@ func (sd *SessionDaemonManager) reapRunning(ctx context.Context, dbDaemon *waveo log.Printf("[sessiondaemon:%s] idle timeout reached, terminating", dbDaemon.OID) if hasMem { - memDaemon.Stop(ctx) + err := memDaemon.Stop(ctx) + if err != nil { + log.Printf("[sessiondaemon:%s] error stopping daemon, will retry next cycle: %v", dbDaemon.OID, err) + return + } sd.Remove(dbDaemon.OID) } wstore.DBDelete(ctx, waveobj.OType_SessionDaemon, dbDaemon.OID) diff --git a/pkg/wshrpc/wshserver/wshserver.go b/pkg/wshrpc/wshserver/wshserver.go index 3680241150..094153ff21 100644 --- a/pkg/wshrpc/wshserver/wshserver.go +++ b/pkg/wshrpc/wshserver/wshserver.go @@ -1618,7 +1618,10 @@ func (ws *WshServer) SessionDeleteCommand(ctx context.Context, data wshrpc.Comma memDaemon := sessiondaemon.Manager.Get(data.DaemonId) if memDaemon != nil { - memDaemon.Stop(ctx) + err = memDaemon.Stop(ctx) + if err != nil { + return fmt.Errorf("failed to stop session daemon: %w", err) + } sessiondaemon.Manager.Remove(data.DaemonId) } From 04fe60f922e07cea12ac7d538d0281972cdc45e6 Mon Sep 17 00:00:00 2001 From: lyx-tec Date: Mon, 15 Jun 2026 16:24:40 +0800 Subject: [PATCH 23/36] fix: session daemon jobs use daemon: prefix for AttachedBlockId to prevent handleBlockCloseEvent from terminating shared daemon jobs --- pkg/blockcontroller/sessiondaemoncontroller.go | 5 +++++ pkg/jobcontroller/jobcontroller.go | 8 ++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/pkg/blockcontroller/sessiondaemoncontroller.go b/pkg/blockcontroller/sessiondaemoncontroller.go index 8b7cad5c18..1abbd88e68 100644 --- a/pkg/blockcontroller/sessiondaemoncontroller.go +++ b/pkg/blockcontroller/sessiondaemoncontroller.go @@ -176,6 +176,11 @@ func (sdc *SessionDaemonController) startNewJob(ctx context.Context, blockMeta w if err != nil { return "", fmt.Errorf("failed to start remote shell job: %w", err) } + + wstore.DBUpdateFn(ctx, jobId, func(job *waveobj.Job) { + job.AttachedBlockId = "daemon:" + sdc.DaemonId + }) + return jobId, nil } diff --git a/pkg/jobcontroller/jobcontroller.go b/pkg/jobcontroller/jobcontroller.go index 153edaa981..c53cbb91a3 100644 --- a/pkg/jobcontroller/jobcontroller.go +++ b/pkg/jobcontroller/jobcontroller.go @@ -230,7 +230,7 @@ func SendBlockJobStatusEvent(ctx context.Context, blockId string) { } func sendBlockJobStatusEventByJob(ctx context.Context, job *waveobj.Job) { - if job == nil || job.AttachedBlockId == "" { + if job == nil || job.AttachedBlockId == "" || strings.HasPrefix(job.AttachedBlockId, "daemon:") { return } SendBlockJobStatusEvent(ctx, job.AttachedBlockId) @@ -813,7 +813,7 @@ func handleAppendJobFile(ctx context.Context, jobId string, fileName string, dat if err != nil { return fmt.Errorf("error getting job: %w", err) } - if job != nil && job.AttachedBlockId != "" { + if job != nil && job.AttachedBlockId != "" && !strings.HasPrefix(job.AttachedBlockId, "daemon:") { err = doWFSAppend(ctx, waveobj.MakeORef(waveobj.OType_Block, job.AttachedBlockId), fileName, data) if err != nil { return fmt.Errorf("error appending to block file: %w", err) @@ -1511,7 +1511,7 @@ func SendInput(ctx context.Context, data wshrpc.CommandJobInputData) error { } func resetTerminalState(logCtx context.Context, blockId string) { - if blockId == "" { + if blockId == "" || strings.HasPrefix(blockId, "daemon:") { return } ctx, cancelFn := context.WithTimeout(context.Background(), DefaultTimeout) @@ -1561,7 +1561,7 @@ func writeSessionSeparatorToTerminal(blockId string, termWidth int) { // msg should not have a terminating newline func writeMutedMessageToTerminal(blockId string, msg string) { - if blockId == "" { + if blockId == "" || strings.HasPrefix(blockId, "daemon:") { return } ctx, cancelFn := context.WithTimeout(context.Background(), DefaultTimeout) From 3f1f1a63f62b98caced2c586b2a2d7722f5cee74 Mon Sep 17 00:00:00 2001 From: lyx-tec Date: Mon, 15 Jun 2026 16:27:17 +0800 Subject: [PATCH 24/36] fix: restore MakeFile call for term file in SessionDaemonController.Start --- pkg/blockcontroller/sessiondaemoncontroller.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pkg/blockcontroller/sessiondaemoncontroller.go b/pkg/blockcontroller/sessiondaemoncontroller.go index 1abbd88e68..2f7acbf490 100644 --- a/pkg/blockcontroller/sessiondaemoncontroller.go +++ b/pkg/blockcontroller/sessiondaemoncontroller.go @@ -3,11 +3,13 @@ package blockcontroller import ( "context" "fmt" + "io/fs" "log" "sync" "time" "github.com/google/uuid" + "github.com/wavetermdev/waveterm/pkg/filestore" "github.com/wavetermdev/waveterm/pkg/remote" "github.com/wavetermdev/waveterm/pkg/remote/conncontroller" "github.com/wavetermdev/waveterm/pkg/sessiondaemon" @@ -107,6 +109,10 @@ func (sdc *SessionDaemonController) Start(ctx context.Context, blockMeta waveobj } log.Printf("[sessiondaemon] start: starting new job block=%s", sdc.BlockId) + fsErr := filestore.WFS.MakeFile(ctx, sdc.BlockId, wavebase.BlockFile_Term, nil, wshrpc.FileOpts{MaxSize: DefaultTermMaxFileSize, Circular: true}) + if fsErr != nil && fsErr != fs.ErrExist { + return fmt.Errorf("error creating block term file: %w", fsErr) + } jobId, err := sdc.startNewJob(ctx, blockMeta, rtOpts) if err != nil { log.Printf("[sessiondaemon] start: new job failed block=%s err=%v", sdc.BlockId, err) From 35f1dee8dca6dee7d84b32a565417297f5bef705 Mon Sep 17 00:00:00 2001 From: lyx-tec Date: Mon, 15 Jun 2026 16:36:40 +0800 Subject: [PATCH 25/36] feat: add pencil icon hint on session name in popup list --- .../app/block/session-daemon-indicator.tsx | 28 +++++++++++++++---- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/frontend/app/block/session-daemon-indicator.tsx b/frontend/app/block/session-daemon-indicator.tsx index ab96dc9432..03d791571c 100644 --- a/frontend/app/block/session-daemon-indicator.tsx +++ b/frontend/app/block/session-daemon-indicator.tsx @@ -477,16 +477,32 @@ export function SessionDaemonIndicator({ blockId, useTermHeader }: SessionDaemon handleStartEdit(s.daemonid, s.name); }} style={{ - ...truncateStyle, - fontWeight: 650, - color: "var(--text-primary)", - lineHeight: "20px", - fontSize: 14, + display: "flex", + alignItems: "center", + gap: 6, cursor: "text", }} title="Click to rename" > - {s.name || "Unnamed session"} + + {s.name || "Unnamed session"} + +
)} {s.connection && ( From cf1119210ecafa61b0a36ecb4fbb51e24f624fb1 Mon Sep 17 00:00:00 2001 From: lyx-tec Date: Mon, 15 Jun 2026 19:54:56 +0800 Subject: [PATCH 26/36] feat: remote jobmanager idle timeout with centralized disconnect manager + memory-DB consistency check P4/P6: remote idle timeout (2 days default) - CommandRemoteStartJobData/CommandRemoteReconnectToJobManagerData get RemoteIdleTimeoutSeconds field - DefaultRemoteIdleTimeoutSeconds = 172800, passed via RPC on job start/reconnect - Remote wsh tracks Pid/StartTs/RemoteIdleTimeoutSeconds in JobManagerConnection - Centralized disconnectManager: single ticker goroutine, 60s cycle - removeJobManagerConnection -> addDisconnectEntry with deadline - connectToJobManager -> removeDisconnectEntry (cancel on reconnect) - Deadline expired -> isProcessRunning -> SIGTERM P7: verifyConsistency runs in idle reaper cycle - Memory-only daemons without DB entry: removed - DB-only daemons without memory entry: loaded via GetOrCreate pattern --- pkg/jobcontroller/jobcontroller.go | 33 ++++---- pkg/sessiondaemon/sessiondaemon.go | 36 +++++++++ pkg/wshrpc/wshremote/wshremote.go | 36 ++++++--- pkg/wshrpc/wshremote/wshremote_job.go | 107 +++++++++++++++++++++++++- pkg/wshrpc/wshrpctypes.go | 32 ++++---- 5 files changed, 201 insertions(+), 43 deletions(-) diff --git a/pkg/jobcontroller/jobcontroller.go b/pkg/jobcontroller/jobcontroller.go index c53cbb91a3..067dcd18e5 100644 --- a/pkg/jobcontroller/jobcontroller.go +++ b/pkg/jobcontroller/jobcontroller.go @@ -40,6 +40,7 @@ import ( ) const DefaultTimeout = 2 * time.Second +const DefaultRemoteIdleTimeoutSeconds = 172800 // 2 days const ( JobManagerStatus_Init = "init" @@ -696,16 +697,17 @@ func StartJob(ctx context.Context, params StartJobParams) (string, error) { publicKeyBase64 := base64.StdEncoding.EncodeToString(publicKey) jobEnv := envutil.CopyAndAddToEnvMap(params.Env, "WAVETERM_JOBID", jobId) startJobData := wshrpc.CommandRemoteStartJobData{ - Cmd: params.Cmd, - Args: params.Args, - Env: jobEnv, - TermSize: *params.TermSize, - StreamMeta: streamMeta, - JobAuthToken: jobAuthToken, - JobId: jobId, - MainServerJwtToken: jobAccessToken, - ClientId: clientId, - PublicKeyBase64: publicKeyBase64, + Cmd: params.Cmd, + Args: params.Args, + Env: jobEnv, + TermSize: *params.TermSize, + StreamMeta: streamMeta, + JobAuthToken: jobAuthToken, + JobId: jobId, + MainServerJwtToken: jobAccessToken, + ClientId: clientId, + PublicKeyBase64: publicKeyBase64, + RemoteIdleTimeoutSeconds: DefaultRemoteIdleTimeoutSeconds, } rpcOpts := &wshrpc.RpcOpts{ @@ -1112,11 +1114,12 @@ func doReconnectJob(ctx context.Context, jobId string, rtOpts *waveobj.RuntimeOp } reconnectData := wshrpc.CommandRemoteReconnectToJobManagerData{ - JobId: jobId, - JobAuthToken: job.JobAuthToken, - MainServerJwtToken: jobAccessToken, - JobManagerPid: job.JobManagerPid, - JobManagerStartTs: job.JobManagerStartTs, + JobId: jobId, + JobAuthToken: job.JobAuthToken, + MainServerJwtToken: jobAccessToken, + JobManagerPid: job.JobManagerPid, + JobManagerStartTs: job.JobManagerStartTs, + RemoteIdleTimeoutSeconds: DefaultRemoteIdleTimeoutSeconds, } rpcOpts := &wshrpc.RpcOpts{ diff --git a/pkg/sessiondaemon/sessiondaemon.go b/pkg/sessiondaemon/sessiondaemon.go index d67515ce51..2887870d76 100644 --- a/pkg/sessiondaemon/sessiondaemon.go +++ b/pkg/sessiondaemon/sessiondaemon.go @@ -313,6 +313,7 @@ func (sd *SessionDaemonManager) StartIdleReaper(ctx context.Context) { return case <-ticker.C: sd.reapIdleDaemons(ctx) + sd.verifyConsistency(ctx) } } }() @@ -431,3 +432,38 @@ func (sd *SessionDaemonManager) reapDone(ctx context.Context, dbDaemon *waveobj. } wstore.DBDelete(ctx, waveobj.OType_SessionDaemon, dbDaemon.OID) } + +func (sd *SessionDaemonManager) verifyConsistency(ctx context.Context) { + daemons, err := wstore.DBGetAllObjsByType[*waveobj.SessionDaemon](ctx, waveobj.OType_SessionDaemon) + if err != nil { + return + } + + dbIds := make(map[string]bool) + for _, dbDaemon := range daemons { + dbIds[dbDaemon.OID] = true + } + + sd.Lock.Lock() + defer sd.Lock.Unlock() + + for id := range sd.Daemons { + if !dbIds[id] { + log.Printf("[sessiondaemon] consistency: daemon %s in memory but not in DB, removing from memory", id) + delete(sd.Daemons, id) + } + } + + for _, dbDaemon := range daemons { + if _, exists := sd.Daemons[dbDaemon.OID]; !exists { + log.Printf("[sessiondaemon] consistency: daemon %s in DB but not in memory, loading", dbDaemon.OID) + sd.Daemons[dbDaemon.OID] = &SessionDaemon{ + DaemonId: dbDaemon.OID, + Name: dbDaemon.Name, + JobId: dbDaemon.JobId, + InputSessionId: uuid.New().String(), + Blocks: make(map[string]bool), + } + } + } +} diff --git a/pkg/wshrpc/wshremote/wshremote.go b/pkg/wshrpc/wshremote/wshremote.go index 35dc92d5bb..6e892c9ec2 100644 --- a/pkg/wshrpc/wshremote/wshremote.go +++ b/pkg/wshrpc/wshremote/wshremote.go @@ -26,10 +26,20 @@ import ( ) type JobManagerConnection struct { - JobId string - Conn net.Conn - WshRpc *wshutil.WshRpc - CleanupFn func() + JobId string + Conn net.Conn + WshRpc *wshutil.WshRpc + CleanupFn func() + Pid int + StartTs int64 + RemoteIdleTimeoutSeconds int64 +} + +type disconnectEntry struct { + Deadline time.Time + JobId string + Pid int + StartTs int64 } type ServerImpl struct { @@ -41,17 +51,21 @@ type ServerImpl struct { JobManagerMap map[string]*JobManagerConnection SockName string Lock sync.Mutex + + disconnectDeadlines map[string]*disconnectEntry + disconnectMu sync.Mutex } func MakeRemoteRpcServerImpl(logWriter io.Writer, router *wshutil.WshRouter, rpcClient *wshutil.WshRpc, isLocal bool, initialEnv map[string]string, sockName string) *ServerImpl { return &ServerImpl{ - LogWriter: logWriter, - Router: router, - RpcClient: rpcClient, - IsLocal: isLocal, - InitialEnv: initialEnv, - JobManagerMap: make(map[string]*JobManagerConnection), - SockName: sockName, + LogWriter: logWriter, + Router: router, + RpcClient: rpcClient, + IsLocal: isLocal, + InitialEnv: initialEnv, + JobManagerMap: make(map[string]*JobManagerConnection), + SockName: sockName, + disconnectDeadlines: make(map[string]*disconnectEntry), } } diff --git a/pkg/wshrpc/wshremote/wshremote_job.go b/pkg/wshrpc/wshremote/wshremote_job.go index b357116427..df5cf4470b 100644 --- a/pkg/wshrpc/wshremote/wshremote_job.go +++ b/pkg/wshrpc/wshremote/wshremote_job.go @@ -104,6 +104,8 @@ func (impl *ServerImpl) connectToJobManager(ctx context.Context, jobId string, m } impl.addJobManagerConnection(jobConn) + impl.removeDisconnectEntry(jobId) + log.Printf("connectToJobManager: successfully connected and authenticated\n") return jobRouteId, cleanup, nil } @@ -117,11 +119,16 @@ func (impl *ServerImpl) addJobManagerConnection(conn *JobManagerConnection) { func (impl *ServerImpl) removeJobManagerConnection(jobId string) { impl.Lock.Lock() - defer impl.Lock.Unlock() - if _, exists := impl.JobManagerMap[jobId]; exists { + conn, exists := impl.JobManagerMap[jobId] + if exists { delete(impl.JobManagerMap, jobId) log.Printf("removeJobManagerConnection: removed job manager connection for jobid=%s\n", jobId) } + impl.Lock.Unlock() + + if conn != nil && conn.Pid > 0 { + impl.addDisconnectEntry(jobId, conn.Pid, conn.StartTs, conn.RemoteIdleTimeoutSeconds) + } } func (impl *ServerImpl) getJobManagerConnection(jobId string) *JobManagerConnection { @@ -254,6 +261,17 @@ func (impl *ServerImpl) RemoteStartJobCommand(ctx context.Context, data wshrpc.C return nil, err } + impl.Lock.Lock() + if jobConn := impl.JobManagerMap[data.JobId]; jobConn != nil { + jobConn.Pid = cmd.Process.Pid + jobConn.StartTs = time.Now().UnixMilli() + jobConn.RemoteIdleTimeoutSeconds = data.RemoteIdleTimeoutSeconds + if jobConn.RemoteIdleTimeoutSeconds <= 0 { + jobConn.RemoteIdleTimeoutSeconds = 172800 + } + } + impl.Lock.Unlock() + combinedEnv := make(map[string]string) for k, v := range impl.InitialEnv { combinedEnv[k] = v @@ -317,6 +335,17 @@ func (impl *ServerImpl) RemoteReconnectToJobManagerCommand(ctx context.Context, }, nil } + impl.Lock.Lock() + if jobConn := impl.JobManagerMap[data.JobId]; jobConn != nil { + jobConn.Pid = data.JobManagerPid + jobConn.StartTs = data.JobManagerStartTs + jobConn.RemoteIdleTimeoutSeconds = data.RemoteIdleTimeoutSeconds + if jobConn.RemoteIdleTimeoutSeconds <= 0 { + jobConn.RemoteIdleTimeoutSeconds = 172800 + } + } + impl.Lock.Unlock() + log.Printf("RemoteReconnectToJobManagerCommand: successfully reconnected to job manager\n") return &wshrpc.CommandRemoteReconnectToJobManagerRtnData{ Success: true, @@ -357,3 +386,77 @@ func (impl *ServerImpl) RemoteTerminateJobManagerCommand(ctx context.Context, da } return nil } + +const disconnectCheckInterval = 60 // seconds + +var disconnectManagerStarted sync.Once + +func (impl *ServerImpl) ensureDisconnectManager() { + disconnectManagerStarted.Do(func() { + go impl.runDisconnectManager() + }) +} + +func (impl *ServerImpl) runDisconnectManager() { + ticker := time.NewTicker(disconnectCheckInterval * time.Second) + defer ticker.Stop() + for range ticker.C { + impl.reapDisconnectedJobs() + } +} + +func (impl *ServerImpl) reapDisconnectedJobs() { + impl.disconnectMu.Lock() + now := time.Now() + var expired []*disconnectEntry + var remaining []*disconnectEntry + for _, entry := range impl.disconnectDeadlines { + if now.After(entry.Deadline) { + expired = append(expired, entry) + } else { + remaining = append(remaining, entry) + } + } + impl.disconnectDeadlines = make(map[string]*disconnectEntry) + for _, entry := range remaining { + impl.disconnectDeadlines[entry.JobId] = entry + } + impl.disconnectMu.Unlock() + + for _, entry := range expired { + proc, err := isProcessRunning(entry.Pid, entry.StartTs) + if err != nil { + log.Printf("disconnectManager: error checking process for job=%s pid=%d: %v", entry.JobId, entry.Pid, err) + continue + } + if proc != nil { + log.Printf("disconnectManager: terminating orphaned job=%s pid=%d", entry.JobId, entry.Pid) + err = proc.SendSignal(syscall.SIGTERM) + if err != nil { + log.Printf("disconnectManager: error sending SIGTERM to job=%s pid=%d: %v", entry.JobId, entry.Pid, err) + } + } + } +} + +func (impl *ServerImpl) addDisconnectEntry(jobId string, pid int, startTs int64, timeoutSeconds int64) { + impl.ensureDisconnectManager() + impl.disconnectMu.Lock() + defer impl.disconnectMu.Unlock() + impl.disconnectDeadlines[jobId] = &disconnectEntry{ + JobId: jobId, + Pid: pid, + StartTs: startTs, + Deadline: time.Now().Add(time.Duration(timeoutSeconds) * time.Second), + } + log.Printf("disconnectManager: added entry for job=%s deadline=%v", jobId, impl.disconnectDeadlines[jobId].Deadline) +} + +func (impl *ServerImpl) removeDisconnectEntry(jobId string) { + impl.disconnectMu.Lock() + defer impl.disconnectMu.Unlock() + if _, exists := impl.disconnectDeadlines[jobId]; exists { + delete(impl.disconnectDeadlines, jobId) + log.Printf("disconnectManager: removed entry for job=%s", jobId) + } +} diff --git a/pkg/wshrpc/wshrpctypes.go b/pkg/wshrpc/wshrpctypes.go index 80cf8b13bc..ed8e1ed860 100644 --- a/pkg/wshrpc/wshrpctypes.go +++ b/pkg/wshrpc/wshrpctypes.go @@ -743,24 +743,26 @@ type CommandStartJobData struct { } type CommandRemoteStartJobData struct { - Cmd string `json:"cmd"` - Args []string `json:"args"` - Env map[string]string `json:"env"` - TermSize waveobj.TermSize `json:"termsize"` - StreamMeta *StreamMeta `json:"streammeta,omitempty"` - JobAuthToken string `json:"jobauthtoken"` - JobId string `json:"jobid"` - MainServerJwtToken string `json:"mainserverjwttoken"` - ClientId string `json:"clientid"` - PublicKeyBase64 string `json:"publickeybase64"` + Cmd string `json:"cmd"` + Args []string `json:"args"` + Env map[string]string `json:"env"` + TermSize waveobj.TermSize `json:"termsize"` + StreamMeta *StreamMeta `json:"streammeta,omitempty"` + JobAuthToken string `json:"jobauthtoken"` + JobId string `json:"jobid"` + MainServerJwtToken string `json:"mainserverjwttoken"` + ClientId string `json:"clientid"` + PublicKeyBase64 string `json:"publickeybase64"` + RemoteIdleTimeoutSeconds int64 `json:"remoteidletimeoutseconds,omitempty"` } type CommandRemoteReconnectToJobManagerData struct { - JobId string `json:"jobid"` - JobAuthToken string `json:"jobauthtoken"` - MainServerJwtToken string `json:"mainserverjwttoken"` - JobManagerPid int `json:"jobmanagerpid"` - JobManagerStartTs int64 `json:"jobmanagerstartts"` + JobId string `json:"jobid"` + JobAuthToken string `json:"jobauthtoken"` + MainServerJwtToken string `json:"mainserverjwttoken"` + JobManagerPid int `json:"jobmanagerpid"` + JobManagerStartTs int64 `json:"jobmanagerstartts"` + RemoteIdleTimeoutSeconds int64 `json:"remoteidletimeoutseconds,omitempty"` } type CommandRemoteReconnectToJobManagerRtnData struct { From 941b41b85db409e2192483cd11477f363d0acf25 Mon Sep 17 00:00:00 2001 From: lyx-tec Date: Tue, 16 Jun 2026 16:04:17 +0800 Subject: [PATCH 27/36] fix: session daemon reliability - race conditions, memory/DB consistency, and code quality MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug fixes: - Fix indentation bug where stopBlockController was outside the 'job not running' check, causing healthy SessionDaemonControllers to be destroyed every resync - Handle disconnected daemons in stale daemon cleanup (was only checking done) - Remove Reconnect from InitFromDB — connections not ready at startup time - Sync block.JobId after SetJobId so frontend useEffect retriggers attachToDaemon - Frontend SessionInfo retry (15x @ 200ms) when daemon status=init (job not started) - Remove debug stack trace (4KB alloc) from DetachBlock Memory/DB consistency: - SetJobId, MarkDone: roll back memory JobId when DB write fails - Reconnect: do not clear memory JobId when DB status update fails - Log all unchecked DBUpdateFn/DBDelete calls (8 sites) - cleanupDeadBlocks: collect block IDs outside lock, then query DB, then relock Abstraction / code quality: - Extract idle timer helpers: resetIdleTimer, startIdleCountdown, advanceIdleTimer - Decompose Start() into tryReconnect() + createJobAndSync() + syncJobIdToBlocks() - Simplify SetJobId: drop redundant dbDaemon parameter - Converge daemon ops to Manager: MarkDone, ClearJobIdFromDaemons (callback), GetMemJobId, Rename, RecordActivity - Remove redundant DB read from SessionDaemonController.Stop UI: - Remove obsolete shield icon (DurableSessionFlyover) from block header - Add detailed debug logging for session attach/detach/state transitions --- frontend/app/block/blockframe-header.tsx | 11 - frontend/app/view/term/term.tsx | 38 ++- frontend/types/gotypes.d.ts | 2 + pkg/blockcontroller/blockcontroller.go | 40 +-- .../sessiondaemoncontroller.go | 88 ++++-- pkg/jobcontroller/jobcontroller.go | 17 +- pkg/sessiondaemon/sessiondaemon.go | 263 ++++++++++++++---- pkg/wshrpc/wshserver/wshserver.go | 61 ++-- 8 files changed, 370 insertions(+), 150 deletions(-) diff --git a/frontend/app/block/blockframe-header.tsx b/frontend/app/block/blockframe-header.tsx index 4f29e7a2af..290e6cc8a3 100644 --- a/frontend/app/block/blockframe-header.tsx +++ b/frontend/app/block/blockframe-header.tsx @@ -9,7 +9,6 @@ import { renderHeaderElements, } from "@/app/block/blockutil"; import { ConnectionButton } from "@/app/block/connectionbutton"; -import { DurableSessionFlyover } from "@/app/block/durable-session-flyover"; import { SessionDaemonIndicator } from "@/app/block/session-daemon-indicator"; import { getBlockBadgeAtom } from "@/app/store/badge"; import { @@ -225,7 +224,6 @@ const BlockFrame_Header = ({ let viewIconUnion = util.useAtomValueSafe(viewModel?.viewIcon) ?? blockViewToIcon(metaView); const preIconButton = util.useAtomValueSafe(viewModel?.preIconButton); const useTermHeader = util.useAtomValueSafe(viewModel?.useTermHeader); - const termConfigedDurable = util.useAtomValueSafe(viewModel?.termConfigedDurable); const hideViewName = util.useAtomValueSafe(viewModel?.hideViewName); const badge = jotai.useAtomValue(getBlockBadgeAtom(useTermHeader ? nodeModel.blockId : null)); const magnified = jotai.useAtomValue(nodeModel.isMagnified); @@ -272,15 +270,6 @@ const BlockFrame_Header = ({ isTerminalBlock={isTerminalBlock} /> )} - {useTermHeader && termConfigedDurable != null && ( - - )} {useTermHeader && badge && (
diff --git a/frontend/app/view/term/term.tsx b/frontend/app/view/term/term.tsx index 55efe32e4a..3792cf821f 100644 --- a/frontend/app/view/term/term.tsx +++ b/frontend/app/view/term/term.tsx @@ -361,31 +361,57 @@ const TerminalView = ({ blockId, model }: ViewComponentProps) => React.useEffect(() => { const termWrap = model.termRef.current; const daemonId = blockData?.meta?.["session:daemonid"]; + const blockJobId = blockData?.jobid; + console.log("[term:daemon-effect] block=%s daemon=%s blockJob=%s zoneId=%s", + blockId, daemonId || "(none)", blockJobId || "(none)", termWrap?.zoneId || "(no-termwrap)"); if (termWrap == null) { return; } if (!daemonId) { + console.log("[term:daemon-effect] block=%s no daemon, detaching zoneId=%s", blockId, termWrap.zoneId); fireAndForget(termWrap.detachFromDaemon.bind(termWrap)); return undefined; } let cancelled = false; - fireAndForget(async () => { + let retryTimer: ReturnType | null = null; + const tryAttach = async (retry: number) => { + if (cancelled) return; try { - if (cancelled) { + const info = await RpcApi.SessionInfoCommand(TabRpcClient, { daemonid: daemonId }); + if (cancelled) return; + if (!info.jobid) { + // If the daemon is still initializing (job not started yet), retry. + // This handles the race where SessionAttach sends the WaveObj update + // before the job is started by the resync controller (~20ms window). + if (info.status === "init" && retry < 15) { + console.log("[term:daemon-effect] block=%s daemon=%s jobId not ready, will retry (attempt %d, status=%s)", + blockId, daemonId, retry, info.status); + retryTimer = setTimeout(() => tryAttach(retry + 1), 200); + return; + } + console.log("[term:daemon-effect] block=%s daemon=%s jobId not ready after %d retries, info=%o", + blockId, daemonId, retry, info); return; } - const info = await RpcApi.SessionInfoCommand(TabRpcClient, { daemonid: daemonId }); - if (!cancelled && info.jobid) { - await termWrap.attachToDaemon(info.jobid); + if (termWrap.zoneId === info.jobid) { + console.log("[term:daemon-effect] block=%s zoneId already=%s, skipping attach", blockId, info.jobid); + return; } + console.log("[term:daemon-effect] block=%s attaching zoneId %s -> %s", + blockId, termWrap.zoneId, info.jobid); + await termWrap.attachToDaemon(info.jobid); } catch (e) { if (!cancelled) { console.log("error attaching terminal to session daemon", daemonId, e); } } - }); + }; + fireAndForget(() => tryAttach(0)); return () => { cancelled = true; + if (retryTimer != null) { + clearTimeout(retryTimer); + } }; }, [blockData?.meta?.["session:daemonid"], blockData?.jobid, termWrapInst]); diff --git a/frontend/types/gotypes.d.ts b/frontend/types/gotypes.d.ts index d93f65b6d9..dd7b12b9ce 100644 --- a/frontend/types/gotypes.d.ts +++ b/frontend/types/gotypes.d.ts @@ -587,6 +587,7 @@ declare global { mainserverjwttoken: string; jobmanagerpid: number; jobmanagerstartts: number; + remoteidletimeoutseconds?: number; }; // wshrpc.CommandRemoteReconnectToJobManagerRtnData @@ -608,6 +609,7 @@ declare global { mainserverjwttoken: string; clientid: string; publickeybase64: string; + remoteidletimeoutseconds?: number; }; // wshrpc.CommandRemoteTerminateJobManagerData diff --git a/pkg/blockcontroller/blockcontroller.go b/pkg/blockcontroller/blockcontroller.go index 3ac7d15db1..83483ada22 100644 --- a/pkg/blockcontroller/blockcontroller.go +++ b/pkg/blockcontroller/blockcontroller.go @@ -213,11 +213,20 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts daemonId = "" } - // Validate existing daemon: if stale (done/not found), clear it + // Validate existing daemon: if stale (done/disconnected/not found), clear it and fall through to ShellController if daemonId != "" && controllerName == BlockController_Shell { dbDaemon, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, daemonId) - if err != nil || dbDaemon.Status == sessiondaemon.Status_Done { - log.Printf("[sessiondaemon] stale daemon=%s block=%s status=%s err=%v, clearing", daemonId, blockId, func() string { if dbDaemon != nil { return dbDaemon.Status }; return "db_load_error" }(), err) + staleStatus := false + if err != nil { + log.Printf("[sessiondaemon] staledaemon: daemon=%s block=%s not found in DB err=%v, clearing", daemonId, blockId, err) + staleStatus = true + } else if dbDaemon.Status == sessiondaemon.Status_Done || dbDaemon.Status == sessiondaemon.Status_Disconnected { + log.Printf("[sessiondaemon] staledaemon: daemon=%s block=%s status=%s, clearing and falling back to ShellController", daemonId, blockId, dbDaemon.Status) + staleStatus = true + } else { + log.Printf("[sessiondaemon] staledaemon: daemon=%s block=%s status=%s, keeping", daemonId, blockId, dbDaemon.Status) + } + if staleStatus { if existing != nil { DestroyBlockController(blockId) time.Sleep(100 * time.Millisecond) @@ -322,20 +331,17 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts // If so, clear the JobId so Start() runs again on the next ResyncController call. if sdc, ok := controller.(*SessionDaemonController); ok { if daemon := sessiondaemon.Manager.Get(sdc.DaemonId); daemon != nil && daemon.JobId != "" { - jobStatus, jErr := jobcontroller.GetJobManagerStatus(ctx, daemon.JobId) - if jErr != nil || jobStatus != jobcontroller.JobManagerStatus_Running { - log.Printf("[sessiondaemon] resync: job %s not running (status=%s err=%v), marking done", daemon.JobId, jobStatus, jErr) - daemon.Lock.Lock() - daemon.JobId = "" - daemon.Lock.Unlock() - wstore.DBUpdateFn(ctx, sdc.DaemonId, func(dbSd *waveobj.SessionDaemon) { - dbSd.JobId = "" - dbSd.Status = sessiondaemon.Status_Done - }) - stopBlockController(blockId) - time.Sleep(100 * time.Millisecond) - existing = nil - // Fall through to controller recreation + Start below + jobId := daemon.JobId + jobStatus, jErr := jobcontroller.GetJobManagerStatus(ctx, jobId) + if jErr == nil && jobStatus == jobcontroller.JobManagerStatus_Running { + log.Printf("[sessiondaemon] resync: daemon=%s block=%s job=%s alive, skipping", sdc.DaemonId, blockId, jobId) + } else { + log.Printf("[sessiondaemon] resync: daemon=%s block=%s job=%s not running (status=%s err=%v), marking done and recreating controller", sdc.DaemonId, blockId, jobId, jobStatus, jErr) + sessiondaemon.Manager.MarkDone(ctx, sdc.DaemonId) + stopBlockController(blockId) + time.Sleep(100 * time.Millisecond) + existing = nil + // Fall through to controller recreation + Start below } } } diff --git a/pkg/blockcontroller/sessiondaemoncontroller.go b/pkg/blockcontroller/sessiondaemoncontroller.go index 2f7acbf490..9b6ab5529f 100644 --- a/pkg/blockcontroller/sessiondaemoncontroller.go +++ b/pkg/blockcontroller/sessiondaemoncontroller.go @@ -18,6 +18,7 @@ import ( "github.com/wavetermdev/waveterm/pkg/utilds" "github.com/wavetermdev/waveterm/pkg/wavebase" "github.com/wavetermdev/waveterm/pkg/waveobj" + "github.com/wavetermdev/waveterm/pkg/wcore" "github.com/wavetermdev/waveterm/pkg/wps" "github.com/wavetermdev/waveterm/pkg/wshrpc" "github.com/wavetermdev/waveterm/pkg/wshrpc/wshclient" @@ -84,30 +85,42 @@ func (sdc *SessionDaemonController) Start(ctx context.Context, blockMeta waveobj } if dbDaemon.JobId != "" { - log.Printf("[sessiondaemon] start: attempting reconnect to job=%s status=%s", dbDaemon.JobId, dbDaemon.Status) - err = daemon.Reconnect(ctx, dbDaemon, rtOpts) - if err == nil { - log.Printf("[sessiondaemon] start: reconnect ok block=%s job=%s", sdc.BlockId, dbDaemon.JobId) - sdc.incrementVersion() - sdc.sendControllerStatus() - return nil - } - log.Printf("[sessiondaemon] start: reconnect failed block=%s job=%s err=%v", sdc.BlockId, dbDaemon.JobId, err) + return sdc.tryReconnect(ctx, daemon, dbDaemon, rtOpts) + } - dbDaemon, dbErr := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, sdc.DaemonId) - if dbErr != nil { - return fmt.Errorf("error reading daemon after reconnect failure: %w", dbErr) - } - switch dbDaemon.Status { - case sessiondaemon.Status_Disconnected: - return fmt.Errorf("daemon is disconnected, waiting for connection to recover") - case sessiondaemon.Status_Done: - return fmt.Errorf("remote job manager has exited, restart or delete the session") - default: - return fmt.Errorf("unexpected daemon status %q after reconnect failure", dbDaemon.Status) - } + return sdc.createJobAndSync(ctx, blockMeta, rtOpts) +} + +// tryReconnect attempts to reconnect to the daemon's existing job. +func (sdc *SessionDaemonController) tryReconnect(ctx context.Context, daemon *sessiondaemon.SessionDaemon, dbDaemon *waveobj.SessionDaemon, rtOpts *waveobj.RuntimeOpts) error { + log.Printf("[sessiondaemon] start: attempting reconnect to job=%s status=%s", dbDaemon.JobId, dbDaemon.Status) + err := daemon.Reconnect(ctx, dbDaemon, rtOpts) + if err == nil { + log.Printf("[sessiondaemon] start: reconnect ok block=%s job=%s", sdc.BlockId, dbDaemon.JobId) + sdc.incrementVersion() + sdc.sendControllerStatus() + return nil } + log.Printf("[sessiondaemon] start: reconnect failed block=%s job=%s err=%v", sdc.BlockId, dbDaemon.JobId, err) + + dbDaemon, dbErr := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, sdc.DaemonId) + if dbErr != nil { + return fmt.Errorf("error reading daemon after reconnect failure: %w", dbErr) + } + switch dbDaemon.Status { + case sessiondaemon.Status_Disconnected: + return fmt.Errorf("daemon is disconnected, waiting for connection to recover") + case sessiondaemon.Status_Done: + return fmt.Errorf("remote job manager has exited, restart or delete the session") + default: + return fmt.Errorf("unexpected daemon status %q after reconnect failure", dbDaemon.Status) + } +} +// createJobAndSync starts a new remote job for the daemon and syncs +// the resulting JobId to all attached blocks so the frontend can +// switch its zoneId. +func (sdc *SessionDaemonController) createJobAndSync(ctx context.Context, blockMeta waveobj.MetaMapType, rtOpts *waveobj.RuntimeOpts) error { log.Printf("[sessiondaemon] start: starting new job block=%s", sdc.BlockId) fsErr := filestore.WFS.MakeFile(ctx, sdc.BlockId, wavebase.BlockFile_Term, nil, wshrpc.FileOpts{MaxSize: DefaultTermMaxFileSize, Circular: true}) if fsErr != nil && fsErr != fs.ErrExist { @@ -120,18 +133,40 @@ func (sdc *SessionDaemonController) Start(ctx context.Context, blockMeta waveobj } log.Printf("[sessiondaemon] start: new job started block=%s job=%s", sdc.BlockId, jobId) - err = daemon.SetJobId(ctx, dbDaemon, jobId) + daemon := sessiondaemon.Manager.Get(sdc.DaemonId) + if daemon == nil { + return fmt.Errorf("session daemon %s not found in manager", sdc.DaemonId) + } + + err = daemon.SetJobId(ctx, jobId) if err != nil { log.Printf("[sessiondaemon] start: set job id failed daemon=%s job=%s err=%v", sdc.DaemonId, jobId, err) return fmt.Errorf("failed to set job id on daemon: %w", err) } + sdc.syncJobIdToBlocks(ctx, jobId) + log.Printf("[sessiondaemon] start: done block=%s daemon=%s job=%s", sdc.BlockId, sdc.DaemonId, jobId) sdc.incrementVersion() sdc.sendControllerStatus() return nil } +// syncJobIdToBlocks writes the daemon's JobId to every attached block's +// DB record so the frontend useEffect picks up the change and calls +// attachToDaemon, switching the terminal zoneId to the new job's output stream. +func (sdc *SessionDaemonController) syncJobIdToBlocks(ctx context.Context, jobId string) { + attachedBlocks := sessiondaemon.Manager.GetBlocksForDaemon(sdc.DaemonId) + log.Printf("[sessiondaemon] start: syncing jobId=%s to %d attached blocks for daemon=%s", jobId, len(attachedBlocks), sdc.DaemonId) + for _, blockId := range attachedBlocks { + wstore.DBUpdateFn(ctx, blockId, func(block *waveobj.Block) { + block.JobId = jobId + }) + wcore.SendWaveObjUpdate(waveobj.MakeORef(waveobj.OType_Block, blockId)) + log.Printf("[sessiondaemon] start: synced jobId=%s to block=%s", jobId, blockId) + } +} + func (sdc *SessionDaemonController) startNewJob(ctx context.Context, blockMeta waveobj.MetaMapType, rtOpts *waveobj.RuntimeOpts) (string, error) { log.Printf("[sessiondaemon] startNewJob: block=%s conn=%s", sdc.BlockId, sdc.ConnName) termSize := waveobj.TermSize{ @@ -191,18 +226,13 @@ func (sdc *SessionDaemonController) startNewJob(ctx context.Context, blockMeta w } func (sdc *SessionDaemonController) Stop(graceful bool, newStatus string, destroy bool) { - log.Printf("[sessiondaemon] stop: block=%s daemon=%s graceful=%v destroy=%v", sdc.BlockId, sdc.DaemonId, graceful, destroy) if !destroy { return } ctx := context.Background() sessiondaemon.Manager.DetachBlock(ctx, sdc.DaemonId, sdc.BlockId) - dbDaemon, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, sdc.DaemonId) - if err != nil { - log.Printf("[sessiondaemon] stop: db lookup failed daemon=%s err=%v", sdc.DaemonId, err) - return - } - log.Printf("[sessiondaemon] stop: daemon=%s remaining blocks=%d anonymous=%v", sdc.DaemonId, len(sessiondaemon.Manager.GetBlocksForDaemon(sdc.DaemonId)), dbDaemon.IsAnonymous) + log.Printf("[sessiondaemon] stop: block=%s daemon=%s remaining=%d", + sdc.BlockId, sdc.DaemonId, len(sessiondaemon.Manager.GetBlocksForDaemon(sdc.DaemonId))) } func (sdc *SessionDaemonController) SendInput(inputUnion *BlockInputUnion) error { diff --git a/pkg/jobcontroller/jobcontroller.go b/pkg/jobcontroller/jobcontroller.go index 067dcd18e5..6eca94db94 100644 --- a/pkg/jobcontroller/jobcontroller.go +++ b/pkg/jobcontroller/jobcontroller.go @@ -71,6 +71,11 @@ const JobOutputFileName = "term" const AutoReconnectDelay = 1 * time.Second const AutoReconnectCooldown = 30 * time.Second +// ClearSessionDaemonJobFn is set by sessiondaemon to handle cleaning +// up daemon state when a remote job manager exits. The sessiondaemon +// package cannot be imported here (import cycle), so a callback is used. +var ClearSessionDaemonJobFn func(ctx context.Context, jobId string) + type connState struct { actual bool processed bool @@ -1149,16 +1154,8 @@ func doReconnectJob(ctx context.Context, jobId string, rtOpts *waveobj.RuntimeOp sendBlockJobStatusEventByJob(ctx, updatedJob) } // Clear session daemon references to this job so daemons can be restarted - sessionDaemons, qErr := wstore.DBGetAllObjsByType[*waveobj.SessionDaemon](ctx, waveobj.OType_SessionDaemon) - if qErr == nil { - for _, sd := range sessionDaemons { - if sd.JobId == jobId { - wstore.DBUpdateFn(ctx, sd.OID, func(dbSd *waveobj.SessionDaemon) { - dbSd.JobId = "" - dbSd.Status = "init" - }) - } - } + if ClearSessionDaemonJobFn != nil { + ClearSessionDaemonJobFn(ctx, jobId) } telemetry.GoRecordTEventWrap(&telemetrydata.TEvent{ Event: "job:done", diff --git a/pkg/sessiondaemon/sessiondaemon.go b/pkg/sessiondaemon/sessiondaemon.go index 2887870d76..e9b0f53d0a 100644 --- a/pkg/sessiondaemon/sessiondaemon.go +++ b/pkg/sessiondaemon/sessiondaemon.go @@ -5,7 +5,6 @@ import ( "encoding/base64" "fmt" "log" - "runtime" "sync" "time" @@ -50,6 +49,12 @@ var Manager = &SessionDaemonManager{ Daemons: make(map[string]*SessionDaemon), } +func init() { + jobcontroller.ClearSessionDaemonJobFn = func(ctx context.Context, jobId string) { + Manager.ClearJobIdFromDaemons(ctx, jobId) + } +} + func (sd *SessionDaemon) GetNextInputSeq() (string, int) { sd.Lock.Lock() defer sd.Lock.Unlock() @@ -69,18 +74,26 @@ func (sd *SessionDaemon) HasBlock(blockId string) bool { return sd.Blocks[blockId] } -func (sd *SessionDaemon) SetJobId(ctx context.Context, dbDaemon *waveobj.SessionDaemon, jobId string) error { +func (sd *SessionDaemon) SetJobId(ctx context.Context, jobId string) error { sd.Lock.Lock() + oldJobId := sd.JobId sd.JobId = jobId sd.Lock.Unlock() + log.Printf("[sessiondaemon:%s] SetJobId: %s -> %s", sd.DaemonId, oldJobId, jobId) - err := wstore.DBUpdateFn(ctx, dbDaemon.OID, func(sdDb *waveobj.SessionDaemon) { + err := wstore.DBUpdateFn(ctx, sd.DaemonId, func(sdDb *waveobj.SessionDaemon) { sdDb.JobId = jobId sdDb.Status = Status_Running }) if err != nil { - log.Printf("[sessiondaemon:%s] warning: failed to update jobid in db: %v", sd.DaemonId, err) + // Roll back memory to keep it consistent with DB. + sd.Lock.Lock() + sd.JobId = oldJobId + sd.Lock.Unlock() + log.Printf("[sessiondaemon:%s] SetJobId: DB update failed, rolled back to %s: %v", sd.DaemonId, oldJobId, err) + return err } + log.Printf("[sessiondaemon:%s] SetJobId: DB updated (status=running job=%s)", sd.DaemonId, jobId) return nil } @@ -95,7 +108,7 @@ func (sd *SessionDaemon) Reconnect(ctx context.Context, dbDaemon *waveobj.Sessio err := jobcontroller.ReconnectJob(ctx, dbDaemon.JobId, rtOpts) if err != nil { var jobGone bool - wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbSd *waveobj.SessionDaemon) { + dbErr := wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbSd *waveobj.SessionDaemon) { if dbSd.JobId == "" { dbSd.Status = Status_Done jobGone = true @@ -103,6 +116,11 @@ func (sd *SessionDaemon) Reconnect(ctx context.Context, dbDaemon *waveobj.Sessio dbSd.Status = Status_Disconnected } }) + if dbErr != nil { + log.Printf("[sessiondaemon:%s] reconnect: error updating status: %v (memory may be stale)", sd.DaemonId, dbErr) + // If the DB write failed, jobGone is unreliable — do NOT clear memory JobId. + return fmt.Errorf("reconnect failed: %w", err) + } if jobGone { sd.Lock.Lock() sd.JobId = "" @@ -114,9 +132,11 @@ func (sd *SessionDaemon) Reconnect(ctx context.Context, dbDaemon *waveobj.Sessio return fmt.Errorf("reconnect failed: %w", err) } - wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbSd *waveobj.SessionDaemon) { + if err := wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbSd *waveobj.SessionDaemon) { dbSd.Status = Status_Running - }) + }); err != nil { + log.Printf("[sessiondaemon:%s] reconnect: error updating status to running: %v", sd.DaemonId, err) + } log.Printf("[sessiondaemon:%s] reconnect: success, status -> running", sd.DaemonId) return nil } @@ -210,16 +230,11 @@ func (sd *SessionDaemonManager) AttachBlock(ctx context.Context, daemonId string sd.Lock.Unlock() defer daemon.Lock.Unlock() daemon.Blocks[blockId] = true - // Reset idle countdown: block attached, daemon is no longer idle. - wstore.DBUpdateFn(ctx, daemonId, func(dbD *waveobj.SessionDaemon) { - dbD.IdleSince = 0 - }) + sd.resetIdleTimer(ctx, daemonId) } func (sd *SessionDaemonManager) DetachBlock(ctx context.Context, daemonId string, blockId string) { - stackBuf := make([]byte, 4096) - stackLen := runtime.Stack(stackBuf, false) - log.Printf("[sessiondaemon] DetachBlock: daemon=%s block=%s stack:\n%s", daemonId, blockId, string(stackBuf[:stackLen])) + log.Printf("[sessiondaemon] DetachBlock: daemon=%s block=%s", daemonId, blockId) sd.Lock.Lock() daemon, ok := sd.Daemons[daemonId] if !ok { @@ -231,13 +246,45 @@ func (sd *SessionDaemonManager) DetachBlock(ctx context.Context, daemonId string defer daemon.Lock.Unlock() delete(daemon.Blocks, blockId) if len(daemon.Blocks) == 0 { - // Start idle countdown (IdleTimeout in seconds). - // Survives app restart: if daemon was idle before shutdown, - // it resumes counting down from where it left off. - wstore.DBUpdateFn(ctx, daemonId, func(dbD *waveobj.SessionDaemon) { - dbD.IdleSince = dbD.IdleTimeout - }) + sd.startIdleCountdown(ctx, daemonId) + } +} + +// --- idle timer helpers --- +// These centralize IdleSince management so there is a single place +// to understand the countdown mechanics. + +func (sd *SessionDaemonManager) resetIdleTimer(ctx context.Context, daemonId string) { + err := wstore.DBUpdateFn(ctx, daemonId, func(dbD *waveobj.SessionDaemon) { + dbD.IdleSince = 0 + }) + if err != nil { + log.Printf("[sessiondaemon:%s] error resetting idle timer: %v", daemonId, err) + } +} + +func (sd *SessionDaemonManager) startIdleCountdown(ctx context.Context, daemonId string) { + err := wstore.DBUpdateFn(ctx, daemonId, func(dbD *waveobj.SessionDaemon) { + dbD.IdleSince = dbD.IdleTimeout + }) + if err != nil { + log.Printf("[sessiondaemon:%s] error starting idle countdown: %v", daemonId, err) + } +} + +// advanceIdleTimer decrements IdleSince and returns the new value. +// A return value <= 0 means the timer has expired. Returns 0 on error. +func (sd *SessionDaemonManager) advanceIdleTimer(ctx context.Context, daemonId string) int64 { + var remaining int64 + err := wstore.DBUpdateFn(ctx, daemonId, func(dbD *waveobj.SessionDaemon) { + dbD.IdleSince -= IdleCheckInterval + remaining = dbD.IdleSince + }) + if err != nil { + log.Printf("[sessiondaemon:%s] error advancing idle timer: %v", daemonId, err) + return 0 } + return remaining } func (sd *SessionDaemonManager) GetBlocksForDaemon(daemonId string) []string { @@ -268,6 +315,107 @@ func (sd *SessionDaemonManager) SendInput(daemonId string, inputData []byte, sig return daemon.SendInput(ctx, inputData, sigName, termSize) } +// MarkDone clears the daemon's JobId and sets its status to Done, +// both in memory and in the database. Used when the resync controller +// detects that a daemon's remote job manager has exited. +func (sd *SessionDaemonManager) MarkDone(ctx context.Context, daemonId string) { + sd.Lock.Lock() + daemon, ok := sd.Daemons[daemonId] + sd.Lock.Unlock() + if !ok { + return + } + daemon.Lock.Lock() + oldJobId := daemon.JobId + daemon.JobId = "" + daemon.Lock.Unlock() + if err := wstore.DBUpdateFn(ctx, daemonId, func(dbSd *waveobj.SessionDaemon) { + dbSd.JobId = "" + dbSd.Status = Status_Done + }); err != nil { + // Roll back memory to avoid inconsistency. + daemon.Lock.Lock() + daemon.JobId = oldJobId + daemon.Lock.Unlock() + log.Printf("[sessiondaemon:%s] MarkDone: DB update failed, rolled back memory JobId: %v", daemonId, err) + return + } + log.Printf("[sessiondaemon:%s] MarkDone: job cleared, status=done", daemonId) +} + +// GetMemJobId returns the in-memory JobId for a daemon, used as a +// fallback when the DB read returns stale data (e.g., SessionInfoCommand +// called before a SetJobId transaction is visible). +func (sd *SessionDaemonManager) GetMemJobId(daemonId string) string { + sd.Lock.Lock() + daemon, ok := sd.Daemons[daemonId] + sd.Lock.Unlock() + if !ok { + return "" + } + daemon.Lock.Lock() + defer daemon.Lock.Unlock() + return daemon.JobId +} + +// Rename updates the daemon's name and marks it as non-anonymous, +// both in memory and in the database. +func (sd *SessionDaemonManager) Rename(ctx context.Context, daemonId string, name string) error { + sd.Lock.Lock() + daemon, ok := sd.Daemons[daemonId] + sd.Lock.Unlock() + if ok { + daemon.Lock.Lock() + daemon.Name = name + daemon.Lock.Unlock() + } + err := wstore.DBUpdateFn(ctx, daemonId, func(sdDb *waveobj.SessionDaemon) { + sdDb.Name = name + sdDb.IsAnonymous = false + }) + if err != nil { + return fmt.Errorf("update session daemon: %w", err) + } + return nil +} + +// RecordActivity updates the daemon's LastActiveAt timestamp in the database. +func (sd *SessionDaemonManager) RecordActivity(ctx context.Context, daemonId string) error { + err := wstore.DBUpdateFn(ctx, daemonId, func(sdDb *waveobj.SessionDaemon) { + sdDb.LastActiveAt = time.Now().UnixMilli() + }) + if err != nil { + return fmt.Errorf("record session activity: %w", err) + } + return nil +} + +// ClearJobIdFromDaemons clears the JobId from all daemons (memory + DB) +// whose job matches jobId. Called when a remote job manager exits so that +// the daemon can be restarted. +func (sd *SessionDaemonManager) ClearJobIdFromDaemons(ctx context.Context, jobId string) { + sd.Lock.Lock() + defer sd.Lock.Unlock() + for _, daemon := range sd.Daemons { + daemon.Lock.Lock() + if daemon.JobId == jobId { + oldDaemonJobId := daemon.JobId + daemon.JobId = "" + daemon.Lock.Unlock() + if err := wstore.DBUpdateFn(ctx, daemon.DaemonId, func(dbSd *waveobj.SessionDaemon) { + dbSd.JobId = "" + dbSd.Status = Status_Init + }); err != nil { + log.Printf("[sessiondaemon:%s] ClearJobIdFromDaemons: DB update failed, memory stale (was job=%s): %v", + daemon.DaemonId, oldDaemonJobId, err) + } + log.Printf("[sessiondaemon:%s] ClearJobIdFromDaemons: job=%s cleared, status=init", daemon.DaemonId, jobId) + continue + } + daemon.Lock.Unlock() + } +} + func (sd *SessionDaemonManager) InitFromDB(ctx context.Context) error { daemons, err := wstore.DBGetAllObjsByType[*waveobj.SessionDaemon](ctx, waveobj.OType_SessionDaemon) if err != nil { @@ -275,7 +423,7 @@ func (sd *SessionDaemonManager) InitFromDB(ctx context.Context) error { } for _, dbDaemon := range daemons { - daemon, err := sd.GetOrCreate(ctx, dbDaemon) + _, err := sd.GetOrCreate(ctx, dbDaemon) if err != nil { log.Printf("[sessiondaemon] warning: failed to load daemon %s: %v", dbDaemon.OID, err) continue @@ -283,19 +431,21 @@ func (sd *SessionDaemonManager) InitFromDB(ctx context.Context) error { switch dbDaemon.Status { case Status_Running, Status_Disconnected: - err = daemon.Reconnect(ctx, dbDaemon, nil) - if err != nil { - log.Printf("[sessiondaemon:%s] reconnect failed: %v", dbDaemon.OID, err) - } + // Do NOT call Reconnect here — connections may not be established yet. + // Reconnection is deferred to SessionDaemonController.Start() when a + // block referencing this daemon is resynced and the connection is ready. + log.Printf("[sessiondaemon:%s] loaded daemon status=%s job=%s (reconnect deferred)", dbDaemon.OID, dbDaemon.Status, dbDaemon.JobId) case Status_Done: log.Printf("[sessiondaemon:%s] loaded done daemon", dbDaemon.OID) case Status_Init: log.Printf("[sessiondaemon:%s] loaded init daemon", dbDaemon.OID) default: log.Printf("[sessiondaemon:%s] unknown status %q, treating as init", dbDaemon.OID, dbDaemon.Status) - wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbSd *waveobj.SessionDaemon) { + if err := wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbSd *waveobj.SessionDaemon) { dbSd.Status = Status_Init - }) + }); err != nil { + log.Printf("[sessiondaemon:%s] error fixing unknown status: %v", dbDaemon.OID, err) + } } } @@ -323,28 +473,37 @@ func (sd *SessionDaemonManager) StartIdleReaper(ctx context.Context) { // Blocks map that no longer exist in the database. This handles the // case where a block was deleted without calling DetachBlock. func (sd *SessionDaemonManager) cleanupDeadBlocks(ctx context.Context, daemonId string, memDaemon *SessionDaemon) { + // Collect block IDs under the daemon lock, then release it for DB queries. memDaemon.Lock.Lock() - var deadBlocks []string + blockIds := make([]string, 0, len(memDaemon.Blocks)) for blockId := range memDaemon.Blocks { + blockIds = append(blockIds, blockId) + } + memDaemon.Lock.Unlock() + + var deadBlocks []string + for _, blockId := range blockIds { _, err := wstore.DBMustGet[*waveobj.Block](ctx, blockId) if err != nil { deadBlocks = append(deadBlocks, blockId) } } + + if len(deadBlocks) == 0 { + return + } + + log.Printf("[sessiondaemon] cleanupDeadBlocks: daemon=%s removing %d dead blocks: %v", daemonId, len(deadBlocks), deadBlocks) + + memDaemon.Lock.Lock() for _, blockId := range deadBlocks { delete(memDaemon.Blocks, blockId) } + remaining := len(memDaemon.Blocks) memDaemon.Lock.Unlock() - if len(deadBlocks) > 0 { - log.Printf("[sessiondaemon] cleanupDeadBlocks: daemon=%s removed %d dead blocks: %v", daemonId, len(deadBlocks), deadBlocks) - remaining := len(memDaemon.Blocks) - if remaining == 0 { - // All blocks are dead, start idle countdown. - wstore.DBUpdateFn(ctx, daemonId, func(dbD *waveobj.SessionDaemon) { - dbD.IdleSince = dbD.IdleTimeout - }) - } + if remaining == 0 { + sd.startIdleCountdown(ctx, daemonId) } } @@ -380,12 +539,8 @@ func (sd *SessionDaemonManager) reapRunning(ctx context.Context, dbDaemon *waveo return } - var newRemaining int64 - wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbD *waveobj.SessionDaemon) { - dbD.IdleSince -= IdleCheckInterval - newRemaining = dbD.IdleSince - }) - if newRemaining > 0 { + remaining := sd.advanceIdleTimer(ctx, dbDaemon.OID) + if remaining > 0 { return } @@ -398,7 +553,9 @@ func (sd *SessionDaemonManager) reapRunning(ctx context.Context, dbDaemon *waveo } sd.Remove(dbDaemon.OID) } - wstore.DBDelete(ctx, waveobj.OType_SessionDaemon, dbDaemon.OID) + if err := wstore.DBDelete(ctx, waveobj.OType_SessionDaemon, dbDaemon.OID); err != nil { + log.Printf("[sessiondaemon:%s] reapRunning: error deleting from DB: %v", dbDaemon.OID, err) + } } func (sd *SessionDaemonManager) reapDone(ctx context.Context, dbDaemon *waveobj.SessionDaemon, memDaemon *SessionDaemon, hasMem bool) { @@ -411,18 +568,16 @@ func (sd *SessionDaemonManager) reapDone(ctx context.Context, dbDaemon *waveobj. } if dbDaemon.IdleSince <= 0 { - wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbD *waveobj.SessionDaemon) { + if err := wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbD *waveobj.SessionDaemon) { dbD.IdleSince = DoneReapTimeout - }) + }); err != nil { + log.Printf("[sessiondaemon:%s] reapDone: error setting done reap timeout: %v", dbDaemon.OID, err) + } return } - var newRemaining int64 - wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbD *waveobj.SessionDaemon) { - dbD.IdleSince -= IdleCheckInterval - newRemaining = dbD.IdleSince - }) - if newRemaining > 0 { + remaining := sd.advanceIdleTimer(ctx, dbDaemon.OID) + if remaining > 0 { return } @@ -430,7 +585,9 @@ func (sd *SessionDaemonManager) reapDone(ctx context.Context, dbDaemon *waveobj. if hasMem { sd.Remove(dbDaemon.OID) } - wstore.DBDelete(ctx, waveobj.OType_SessionDaemon, dbDaemon.OID) + if err := wstore.DBDelete(ctx, waveobj.OType_SessionDaemon, dbDaemon.OID); err != nil { + log.Printf("[sessiondaemon:%s] reapDone: error deleting from DB: %v", dbDaemon.OID, err) + } } func (sd *SessionDaemonManager) verifyConsistency(ctx context.Context) { diff --git a/pkg/wshrpc/wshserver/wshserver.go b/pkg/wshrpc/wshserver/wshserver.go index 094153ff21..c90cd71284 100644 --- a/pkg/wshrpc/wshserver/wshserver.go +++ b/pkg/wshrpc/wshserver/wshserver.go @@ -1661,11 +1661,23 @@ func (ws *WshServer) SessionListCommand(ctx context.Context, data wshrpc.Command } func (ws *WshServer) SessionAttachCommand(ctx context.Context, data wshrpc.CommandSessionAttachData) error { + log.Printf("[sessiondaemon] SessionAttach: block=%s old_daemon=%s new_daemon=%s new_daemon_job=%s", + data.BlockId, data.CurrentDaemonId, data.DaemonId, func() string { + if data.DaemonId != "" { + if db, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, data.DaemonId); err == nil && db != nil { + return db.JobId + } + } + return "" + }()) + if data.CurrentDaemonId != "" && data.CurrentDaemonId == data.DaemonId { + log.Printf("[sessiondaemon] SessionAttach: block=%s already attached to daemon=%s, skipping", data.BlockId, data.DaemonId) return nil } if data.CurrentDaemonId != "" { + log.Printf("[sessiondaemon] SessionAttach: detaching block=%s from old_daemon=%s", data.BlockId, data.CurrentDaemonId) sessiondaemon.Manager.DetachBlock(ctx, data.CurrentDaemonId, data.BlockId) } @@ -1688,6 +1700,14 @@ func (ws *WshServer) SessionAttachCommand(ctx context.Context, data wshrpc.Comma block.Meta[waveobj.MetaKey_SessionDaemonId] = data.DaemonId block.JobId = dbDaemon.JobId }) + log.Printf("[sessiondaemon] SessionAttach: block=%s daemon=%s meta_updated daemon_job=%s block_job=%s", + data.BlockId, data.DaemonId, dbDaemon.JobId, func() string { + if b, err := wstore.DBMustGet[*waveobj.Block](ctx, data.BlockId); err == nil && b != nil { + return b.JobId + } + return "?" + }()) + if err != nil { sessiondaemon.Manager.DetachBlock(ctx, data.DaemonId, data.BlockId) if data.CurrentDaemonId != "" { @@ -1696,6 +1716,7 @@ func (ws *WshServer) SessionAttachCommand(ctx context.Context, data wshrpc.Comma return fmt.Errorf("update block meta: %w", err) } + log.Printf("[sessiondaemon] SessionAttach: triggering resync for block=%s", data.BlockId) resyncBlockController(ctx, data.BlockId) wcore.SendWaveObjUpdate(waveobj.MakeORef(waveobj.OType_Block, data.BlockId)) return nil @@ -1733,7 +1754,20 @@ func (ws *WshServer) SessionInfoCommand(ctx context.Context, data wshrpc.Command if err != nil { return nil, fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) } - return buildSessionInfoRtnData(ctx, dbDaemon) + // If the DB daemon has no JobId yet, check the in-memory daemon which + // may be more current (SetJobId updates memory before DB, so the block + // JobId sync can trigger a frontend SessionInfo call before the daemon + // DB write is visible). + if dbDaemon.JobId == "" { + if memJobId := sessiondaemon.Manager.GetMemJobId(dbDaemon.OID); memJobId != "" { + log.Printf("[sessiondaemon] SessionInfo: daemon=%s DB jobId empty, using in-memory jobId=%s", dbDaemon.OID, memJobId) + dbDaemon.JobId = memJobId + } + } + info, err := buildSessionInfoRtnData(ctx, dbDaemon) + log.Printf("[sessiondaemon] SessionInfo: daemon=%s job=%s status=%s blocks=%d err=%v", + data.DaemonId, dbDaemon.JobId, dbDaemon.Status, len(info.Blocks), err) + return info, err } func (ws *WshServer) SessionTagCommand(ctx context.Context, data wshrpc.CommandSessionTagData) error { @@ -1741,32 +1775,11 @@ func (ws *WshServer) SessionTagCommand(ctx context.Context, data wshrpc.CommandS if err != nil { return fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) } - - memDaemon := sessiondaemon.Manager.Get(data.DaemonId) - if memDaemon != nil { - memDaemon.Lock.Lock() - memDaemon.Name = data.Name - memDaemon.Lock.Unlock() - } - - err = wstore.DBUpdateFn(ctx, data.DaemonId, func(sd *waveobj.SessionDaemon) { - sd.Name = data.Name - sd.IsAnonymous = false - }) - if err != nil { - return fmt.Errorf("update session daemon: %w", err) - } - return nil + return sessiondaemon.Manager.Rename(ctx, data.DaemonId, data.Name) } func (ws *WshServer) RecordSessionActivityCommand(ctx context.Context, data wshrpc.CommandRecordSessionActivityData) error { - err := wstore.DBUpdateFn(ctx, data.DaemonId, func(sd *waveobj.SessionDaemon) { - sd.LastActiveAt = time.Now().UnixMilli() - }) - if err != nil { - return fmt.Errorf("record session activity: %w", err) - } - return nil + return sessiondaemon.Manager.RecordActivity(ctx, data.DaemonId) } func buildSessionInfoRtnData(ctx context.Context, dbDaemon *waveobj.SessionDaemon) (*wshrpc.SessionInfoRtnData, error) { From b15cbbcd3eba19b3cafcc787708ce02c87a4f259 Mon Sep 17 00:00:00 2001 From: lyx-tec Date: Wed, 17 Jun 2026 07:02:35 +0800 Subject: [PATCH 28/36] feat: session list filter by connection, cross-connection attach guard, OnConnectionUp lifecycle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Session list now only shows sessions on the current block's SSH connection - SessionAttachCommand rejects cross-connection attach (block conn != daemon conn) - OnConnectionUp: when SSH connection becomes ready, check all daemon job managers via SSH exec (ps/tasklist). Alive → reconnect. Dead → clean up to init for restart. - CheckRemoteProcessAlive in conncontroller: cross-platform (Unix/Win) process liveness check --- .../app/block/session-daemon-indicator.tsx | 10 +-- pkg/jobcontroller/jobcontroller.go | 9 +++ pkg/remote/conncontroller/conncontroller.go | 49 ++++++++++++++ pkg/sessiondaemon/sessiondaemon.go | 64 +++++++++++++++++++ pkg/wshrpc/wshserver/wshserver.go | 12 ++++ 5 files changed, 140 insertions(+), 4 deletions(-) diff --git a/frontend/app/block/session-daemon-indicator.tsx b/frontend/app/block/session-daemon-indicator.tsx index 03d791571c..7742ff6707 100644 --- a/frontend/app/block/session-daemon-indicator.tsx +++ b/frontend/app/block/session-daemon-indicator.tsx @@ -8,7 +8,7 @@ import { useWaveEnv } from "@/app/waveenv/waveenv"; import { fireAndForget } from "@/util/util"; import { autoUpdate, flip, FloatingPortal, offset, shift, useFloating } from "@floating-ui/react"; import * as jotai from "jotai"; -import { useCallback, useEffect, useRef, useState } from "react"; +import { useCallback, useEffect, useMemo, useRef, useState } from "react"; import { BlockEnv } from "./blockenv"; interface SessionDisplayData { @@ -256,6 +256,8 @@ export function SessionDaemonIndicator({ blockId, useTermHeader }: SessionDaemon const isSshConn = connName && !connName.startsWith("local") && !connName.startsWith("wsl://"); const visible = !!daemonId || isSshConn; + const sameConnSessions = useMemo(() => sessions.filter((s) => s.connection === connName), [sessions, connName]); + return ( <>
)} - {sessions.length === 0 && ( + {sameConnSessions.length === 0 && (
- Loading sessions... + No sessions on this connection
)} - {sessions.map((s) => { + {sameConnSessions.map((s) => { const isActive = s.daemonid === daemonId; const blockCount = s.blocks?.length ?? 0; const canClose = blockCount === 0; diff --git a/pkg/jobcontroller/jobcontroller.go b/pkg/jobcontroller/jobcontroller.go index 6eca94db94..cd7c1cf19e 100644 --- a/pkg/jobcontroller/jobcontroller.go +++ b/pkg/jobcontroller/jobcontroller.go @@ -76,6 +76,10 @@ const AutoReconnectCooldown = 30 * time.Second // package cannot be imported here (import cycle), so a callback is used. var ClearSessionDaemonJobFn func(ctx context.Context, jobId string) +// OnConnectionUpFn is set by sessiondaemon to handle session daemon +// state reconciliation when an SSH connection becomes ready. +var OnConnectionUpFn func(ctx context.Context, connName string) + type connState struct { actual bool processed bool @@ -520,6 +524,11 @@ func onConnectionUp(connName string) { } log.Printf("[conn:%s] finished reconnecting jobs: %d/%d successful", connName, successCount, len(jobsToReconnect)) + + // Reconcile session daemon state for this connection. + if OnConnectionUpFn != nil { + OnConnectionUpFn(ctx, connName) + } } func onConnectionDown(connName string) { diff --git a/pkg/remote/conncontroller/conncontroller.go b/pkg/remote/conncontroller/conncontroller.go index a24a789009..66f45f7e2c 100644 --- a/pkg/remote/conncontroller/conncontroller.go +++ b/pkg/remote/conncontroller/conncontroller.go @@ -13,6 +13,7 @@ import ( "net" "os" "path/filepath" + "strconv" "strings" "sync" "sync/atomic" @@ -1259,3 +1260,51 @@ func GetConnectionsFromConfig() ([]string, error) { return resolveSshConfigPatterns(sshConfigFiles) } + +// runSSHCommand executes a command over the SSH connection and returns stdout. +func runSSHCommand(ctx context.Context, client *ssh.Client, cmd string) (string, error) { + session, err := client.NewSession() + if err != nil { + return "", err + } + defer session.Close() + var outBuf strings.Builder + session.Stdout = &outBuf + session.Stderr = &outBuf + err = runSessionWithContext(ctx, session, cmd) + return strings.TrimSpace(outBuf.String()), err +} + +// CheckRemoteProcessAlive verifies whether a process with the given PID +// is still running on the remote host. It tries Unix (ps) first, then +// falls back to Windows (tasklist). Returns true if the process exists. +func CheckRemoteProcessAlive(ctx context.Context, connName string, pid int) (bool, error) { + opts, err := remote.ParseOpts(connName) + if err != nil { + return false, err + } + conn := MaybeGetConn(opts) + if conn == nil { + return false, fmt.Errorf("connection %q not found", connName) + } + client := conn.GetClient() + if client == nil { + return false, fmt.Errorf("connection %q not connected", connName) + } + + pidStr := strconv.Itoa(pid) + + // Unix: "ps -p -o pid=" returns the PID if process exists, empty otherwise. + out, _ := runSSHCommand(ctx, client, fmt.Sprintf("ps -p %s -o pid= 2>/dev/null", pidStr)) + if strings.TrimSpace(out) == pidStr { + return true, nil + } + + // Windows: "tasklist /FI ..." lists matching processes. + out, _ = runSSHCommand(ctx, client, fmt.Sprintf("tasklist /FI \"PID eq %s\" /NH 2>nul", pidStr)) + if strings.Contains(out, pidStr) { + return true, nil + } + + return false, nil +} diff --git a/pkg/sessiondaemon/sessiondaemon.go b/pkg/sessiondaemon/sessiondaemon.go index e9b0f53d0a..fac9236b29 100644 --- a/pkg/sessiondaemon/sessiondaemon.go +++ b/pkg/sessiondaemon/sessiondaemon.go @@ -10,6 +10,7 @@ import ( "github.com/google/uuid" "github.com/wavetermdev/waveterm/pkg/jobcontroller" + "github.com/wavetermdev/waveterm/pkg/remote/conncontroller" "github.com/wavetermdev/waveterm/pkg/waveobj" "github.com/wavetermdev/waveterm/pkg/wshrpc" "github.com/wavetermdev/waveterm/pkg/wstore" @@ -53,6 +54,9 @@ func init() { jobcontroller.ClearSessionDaemonJobFn = func(ctx context.Context, jobId string) { Manager.ClearJobIdFromDaemons(ctx, jobId) } + jobcontroller.OnConnectionUpFn = func(ctx context.Context, connName string) { + Manager.OnConnectionUp(ctx, connName) + } } func (sd *SessionDaemon) GetNextInputSeq() (string, int) { @@ -453,6 +457,66 @@ func (sd *SessionDaemonManager) InitFromDB(ctx context.Context) error { return nil } +// OnConnectionUp is called when an SSH connection becomes ready. +// It checks all daemons on that connection: reconnects live jobs and +// cleans up daemons whose remote job manager has died. +func (sd *SessionDaemonManager) OnConnectionUp(ctx context.Context, connName string) { + daemons, err := wstore.DBGetAllObjsByType[*waveobj.SessionDaemon](ctx, waveobj.OType_SessionDaemon) + if err != nil { + return + } + for _, dbDaemon := range daemons { + if dbDaemon.Connection != connName { + continue + } + if dbDaemon.JobId == "" { + continue + } + + // Read JobManagerPid from the job record. + job, err := wstore.DBMustGet[*waveobj.Job](ctx, dbDaemon.JobId) + if err != nil || job.JobManagerPid == 0 { + continue + } + + alive, err := conncontroller.CheckRemoteProcessAlive(ctx, connName, job.JobManagerPid) + if err != nil { + log.Printf("[sessiondaemon:%s] OnConnectionUp: error checking remote process: %v", dbDaemon.OID, err) + continue + } + if alive { + // Job manager is still running — try to reconnect and bring + // it back to running status. + log.Printf("[sessiondaemon:%s] OnConnectionUp: remote job manager alive (pid=%d), reconnecting", dbDaemon.OID, job.JobManagerPid) + sd.Lock.Lock() + memDaemon := sd.Daemons[dbDaemon.OID] + sd.Lock.Unlock() + if memDaemon != nil { + err := memDaemon.Reconnect(ctx, dbDaemon, nil) + if err != nil { + log.Printf("[sessiondaemon:%s] OnConnectionUp: reconnect failed: %v", dbDaemon.OID, err) + } + } + continue + } + // Job manager is dead — clean up the daemon so it can be + // restarted on next attach. + log.Printf("[sessiondaemon:%s] OnConnectionUp: remote job manager dead (pid=%d), cleaning up", dbDaemon.OID, job.JobManagerPid) + sd.Lock.Lock() + memDaemon := sd.Daemons[dbDaemon.OID] + sd.Lock.Unlock() + if memDaemon != nil { + memDaemon.Lock.Lock() + memDaemon.JobId = "" + memDaemon.Lock.Unlock() + } + wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbSd *waveobj.SessionDaemon) { + dbSd.JobId = "" + dbSd.Status = Status_Init + }) + } +} + func (sd *SessionDaemonManager) StartIdleReaper(ctx context.Context) { go func() { ticker := time.NewTicker(IdleCheckInterval * time.Second) diff --git a/pkg/wshrpc/wshserver/wshserver.go b/pkg/wshrpc/wshserver/wshserver.go index c90cd71284..6920073727 100644 --- a/pkg/wshrpc/wshserver/wshserver.go +++ b/pkg/wshrpc/wshserver/wshserver.go @@ -1689,6 +1689,18 @@ func (ws *WshServer) SessionAttachCommand(ctx context.Context, data wshrpc.Comma return fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) } + // Refuse cross-connection attach: a block on connection A cannot + // share a daemon whose remote job runs on connection B. + blockData, err := wstore.DBMustGet[*waveobj.Block](ctx, data.BlockId) + if err == nil { + blockConn := blockData.Meta.GetString(waveobj.MetaKey_Connection, "") + if blockConn != "" && blockConn != dbDaemon.Connection { + log.Printf("[sessiondaemon] SessionAttach: block=%s conn=%q daemon conn=%q mismatch, refusing", + data.BlockId, blockConn, dbDaemon.Connection) + return fmt.Errorf("cannot attach to session on connection %q from connection %q", dbDaemon.Connection, blockConn) + } + } + _, err = sessiondaemon.Manager.GetOrCreate(ctx, dbDaemon) if err != nil { return fmt.Errorf("create session daemon in manager: %w", err) From 86a4d49acafc3724f6a337f56a5628a1e9e5b2f8 Mon Sep 17 00:00:00 2001 From: lyx-tec Date: Wed, 17 Jun 2026 08:25:35 +0800 Subject: [PATCH 29/36] fix: handle dead remote job gracefully - auto-recover, direct delete, OnConnectionUp split - Start(): done status auto-recovers - clears JobId and creates new job - tryReconnect: simplified, done/disconnected handling moved to Start() - OnConnectionUp: dead+0 blocks -> DBDelete; dead+has blocks -> init - SessionDeleteCommand: on Stop failure, check remote process alive; dead -> force delete; alive -> refuse - isRemoteProcessDead helper - ConfirmModal infrastructure for future use - path-specific logging for all recovery/delete/reconnect flows --- frontend/app/modals/confirmmodal.tsx | 25 ++++++++++ frontend/app/modals/modalregistry.tsx | 2 + .../sessiondaemoncontroller.go | 49 ++++++++++++------- pkg/sessiondaemon/sessiondaemon.go | 38 +++++++++----- pkg/wshrpc/wshserver/wshserver.go | 23 ++++++++- 5 files changed, 107 insertions(+), 30 deletions(-) create mode 100644 frontend/app/modals/confirmmodal.tsx diff --git a/frontend/app/modals/confirmmodal.tsx b/frontend/app/modals/confirmmodal.tsx new file mode 100644 index 0000000000..158beffdb3 --- /dev/null +++ b/frontend/app/modals/confirmmodal.tsx @@ -0,0 +1,25 @@ +// Copyright 2025, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +import { Modal } from "@/app/modals/modal"; +import { modalsModel } from "@/app/store/modalmodel"; + +const ConfirmModal = ({ message, onOk }: { message: string; onOk?: () => void }) => { + const handleOk = () => { + modalsModel.popModal(); + if (onOk) onOk(); + }; + const handleClose = () => { + modalsModel.popModal(); + }; + + return ( + +
{message}
+
+ ); +}; + +ConfirmModal.displayName = "ConfirmModal"; + +export { ConfirmModal }; diff --git a/frontend/app/modals/modalregistry.tsx b/frontend/app/modals/modalregistry.tsx index 88d19e732c..ec583b4cbe 100644 --- a/frontend/app/modals/modalregistry.tsx +++ b/frontend/app/modals/modalregistry.tsx @@ -8,6 +8,7 @@ import { UpgradeOnboardingPatch } from "@/app/onboarding/onboarding-upgrade-patc import { DeleteFileModal, PublishAppModal, RenameFileModal } from "@/builder/builder-apppanel"; import { SetSecretDialog } from "@/builder/tabs/builder-secrettab"; import { AboutModal } from "./about"; +import { ConfirmModal } from "./confirmmodal"; import { UserInputModal } from "./userinputmodal"; const modalRegistry: { [key: string]: React.ComponentType } = { @@ -17,6 +18,7 @@ const modalRegistry: { [key: string]: React.ComponentType } = { [UserInputModal.displayName || "UserInputModal"]: UserInputModal, [AboutModal.displayName || "AboutModal"]: AboutModal, [MessageModal.displayName || "MessageModal"]: MessageModal, + [ConfirmModal.displayName || "ConfirmModal"]: ConfirmModal, [PublishAppModal.displayName || "PublishAppModal"]: PublishAppModal, [RenameFileModal.displayName || "RenameFileModal"]: RenameFileModal, [DeleteFileModal.displayName || "DeleteFileModal"]: DeleteFileModal, diff --git a/pkg/blockcontroller/sessiondaemoncontroller.go b/pkg/blockcontroller/sessiondaemoncontroller.go index 9b6ab5529f..184dbca359 100644 --- a/pkg/blockcontroller/sessiondaemoncontroller.go +++ b/pkg/blockcontroller/sessiondaemoncontroller.go @@ -78,14 +78,43 @@ func (sdc *SessionDaemonController) Start(ctx context.Context, blockMeta waveobj } if dbDaemon.Status == sessiondaemon.Status_Done { - return fmt.Errorf("remote job manager has exited, restart or delete the session") + // Job manager is dead — clear state and create a new job. + log.Printf("[sessiondaemon] start: daemon=%s is done, recovering to init", sdc.DaemonId) + daemon.Lock.Lock() + daemon.JobId = "" + daemon.Lock.Unlock() + wstore.DBUpdateFn(ctx, sdc.DaemonId, func(dbSd *waveobj.SessionDaemon) { + dbSd.JobId = "" + dbSd.Status = sessiondaemon.Status_Init + }) + return sdc.createJobAndSync(ctx, blockMeta, rtOpts) } if dbDaemon.Status == sessiondaemon.Status_Disconnected { return fmt.Errorf("daemon is disconnected, waiting for connection to recover") } if dbDaemon.JobId != "" { - return sdc.tryReconnect(ctx, daemon, dbDaemon, rtOpts) + err := sdc.tryReconnect(ctx, daemon, dbDaemon, rtOpts) + if err != nil { + // Reconnect failed — check if the job manager was confirmed + // gone. If so, clear state and create a new job. + dbDaemon2, dbErr := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, sdc.DaemonId) + if dbErr == nil && dbDaemon2.Status == sessiondaemon.Status_Done { + log.Printf("[sessiondaemon] start: daemon=%s reconnect confirmed done, recovering", sdc.DaemonId) + daemon.Lock.Lock() + daemon.JobId = "" + daemon.Lock.Unlock() + wstore.DBUpdateFn(ctx, sdc.DaemonId, func(dbSd *waveobj.SessionDaemon) { + dbSd.JobId = "" + dbSd.Status = sessiondaemon.Status_Init + }) + return sdc.createJobAndSync(ctx, blockMeta, rtOpts) + } + return err + } + sdc.incrementVersion() + sdc.sendControllerStatus() + return nil } return sdc.createJobAndSync(ctx, blockMeta, rtOpts) @@ -97,24 +126,10 @@ func (sdc *SessionDaemonController) tryReconnect(ctx context.Context, daemon *se err := daemon.Reconnect(ctx, dbDaemon, rtOpts) if err == nil { log.Printf("[sessiondaemon] start: reconnect ok block=%s job=%s", sdc.BlockId, dbDaemon.JobId) - sdc.incrementVersion() - sdc.sendControllerStatus() return nil } log.Printf("[sessiondaemon] start: reconnect failed block=%s job=%s err=%v", sdc.BlockId, dbDaemon.JobId, err) - - dbDaemon, dbErr := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, sdc.DaemonId) - if dbErr != nil { - return fmt.Errorf("error reading daemon after reconnect failure: %w", dbErr) - } - switch dbDaemon.Status { - case sessiondaemon.Status_Disconnected: - return fmt.Errorf("daemon is disconnected, waiting for connection to recover") - case sessiondaemon.Status_Done: - return fmt.Errorf("remote job manager has exited, restart or delete the session") - default: - return fmt.Errorf("unexpected daemon status %q after reconnect failure", dbDaemon.Status) - } + return err } // createJobAndSync starts a new remote job for the daemon and syncs diff --git a/pkg/sessiondaemon/sessiondaemon.go b/pkg/sessiondaemon/sessiondaemon.go index fac9236b29..df217024c8 100644 --- a/pkg/sessiondaemon/sessiondaemon.go +++ b/pkg/sessiondaemon/sessiondaemon.go @@ -499,21 +499,37 @@ func (sd *SessionDaemonManager) OnConnectionUp(ctx context.Context, connName str } continue } - // Job manager is dead — clean up the daemon so it can be - // restarted on next attach. - log.Printf("[sessiondaemon:%s] OnConnectionUp: remote job manager dead (pid=%d), cleaning up", dbDaemon.OID, job.JobManagerPid) + // Job manager is dead. + log.Printf("[sessiondaemon:%s] OnConnectionUp: remote job manager dead (pid=%d)", dbDaemon.OID, job.JobManagerPid) sd.Lock.Lock() memDaemon := sd.Daemons[dbDaemon.OID] sd.Lock.Unlock() - if memDaemon != nil { - memDaemon.Lock.Lock() - memDaemon.JobId = "" - memDaemon.Lock.Unlock() + hasBlocks := memDaemon != nil && memDaemon.HasAttachedBlocks() + + if hasBlocks { + // Blocks are still attached — reset to init so they auto-recover + // when the block becomes active again. + if memDaemon != nil { + memDaemon.Lock.Lock() + memDaemon.JobId = "" + memDaemon.Lock.Unlock() + } + wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbSd *waveobj.SessionDaemon) { + dbSd.JobId = "" + dbSd.Status = Status_Init + }) + log.Printf("[sessiondaemon:%s] OnConnectionUp: dead, has blocks, reset to init", dbDaemon.OID) + } else { + // No blocks referencing this daemon — safe to delete. + if memDaemon != nil { + sd.Remove(dbDaemon.OID) + } + if err := wstore.DBDelete(ctx, waveobj.OType_SessionDaemon, dbDaemon.OID); err != nil { + log.Printf("[sessiondaemon:%s] OnConnectionUp: error deleting dead daemon: %v", dbDaemon.OID, err) + } else { + log.Printf("[sessiondaemon:%s] OnConnectionUp: dead, no blocks, deleted", dbDaemon.OID) + } } - wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbSd *waveobj.SessionDaemon) { - dbSd.JobId = "" - dbSd.Status = Status_Init - }) } } diff --git a/pkg/wshrpc/wshserver/wshserver.go b/pkg/wshrpc/wshserver/wshserver.go index 6920073727..c0d3f73cfa 100644 --- a/pkg/wshrpc/wshserver/wshserver.go +++ b/pkg/wshrpc/wshserver/wshserver.go @@ -1611,16 +1611,21 @@ func (ws *WshServer) SessionCreateCommand(ctx context.Context, data wshrpc.Comma } func (ws *WshServer) SessionDeleteCommand(ctx context.Context, data wshrpc.CommandSessionDeleteData) error { - _, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, data.DaemonId) + dbDaemon, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, data.DaemonId) if err != nil { return fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) } memDaemon := sessiondaemon.Manager.Get(data.DaemonId) + forceDelete := false if memDaemon != nil { err = memDaemon.Stop(ctx) if err != nil { - return fmt.Errorf("failed to stop session daemon: %w", err) + forceDelete = isRemoteProcessDead(ctx, dbDaemon) + if !forceDelete { + return fmt.Errorf("failed to stop session daemon: %w", err) + } + log.Printf("[sessiondaemon] SessionDelete: daemon=%s remote job dead, deleting despite stop failure", data.DaemonId) } sessiondaemon.Manager.Remove(data.DaemonId) } @@ -1632,6 +1637,20 @@ func (ws *WshServer) SessionDeleteCommand(ctx context.Context, data wshrpc.Comma return nil } +// isRemoteProcessDead checks whether the daemon's remote job manager +// process has exited. Returns true if confirmed dead. +func isRemoteProcessDead(ctx context.Context, dbDaemon *waveobj.SessionDaemon) bool { + if dbDaemon.JobId == "" { + return false + } + job, err := wstore.DBMustGet[*waveobj.Job](ctx, dbDaemon.JobId) + if err != nil || job.JobManagerPid == 0 { + return false + } + alive, err := conncontroller.CheckRemoteProcessAlive(ctx, dbDaemon.Connection, job.JobManagerPid) + return err == nil && !alive +} + func (ws *WshServer) SessionListCommand(ctx context.Context, data wshrpc.CommandSessionListData) ([]wshrpc.SessionInfoRtnData, error) { allDaemons, err := wstore.DBGetAllObjsByType[*waveobj.SessionDaemon](ctx, waveobj.OType_SessionDaemon) if err != nil { From 8b2707b28d072f8dec429fd87982d04824f4309f Mon Sep 17 00:00:00 2001 From: lyx-tec Date: Sat, 20 Jun 2026 22:49:46 +0800 Subject: [PATCH 30/36] Handle dead shared-session job managers When a remote session job manager is confirmed gone, mark the session daemon done and detach attached blocks instead of auto-creating a replacement session. Fallback affected blocks to normal SSH shells, including both app restart recovery and runtime job-manager death paths. Keep done sessions visible as done in the session list, allow them to be closed manually, and reap detached done daemons with the done grace timeout. --- .../app/block/session-daemon-indicator.tsx | 6 +- pkg/blockcontroller/blockcontroller.go | 134 ++++++++++++++++-- .../sessiondaemoncontroller.go | 34 ++--- pkg/sessiondaemon/sessiondaemon.go | 37 +++-- pkg/wshrpc/wshserver/wshserver.go | 2 + 5 files changed, 170 insertions(+), 43 deletions(-) diff --git a/frontend/app/block/session-daemon-indicator.tsx b/frontend/app/block/session-daemon-indicator.tsx index 7742ff6707..b13e1b16ee 100644 --- a/frontend/app/block/session-daemon-indicator.tsx +++ b/frontend/app/block/session-daemon-indicator.tsx @@ -394,8 +394,8 @@ export function SessionDaemonIndicator({ blockId, useTermHeader }: SessionDaemon {sameConnSessions.map((s) => { const isActive = s.daemonid === daemonId; const blockCount = s.blocks?.length ?? 0; - const canClose = blockCount === 0; - const displayStatus = blockCount === 0 ? "idle" : s.status; + const canClose = blockCount === 0 || s.status === "done"; + const displayStatus = s.status === "done" ? "done" : blockCount === 0 ? "idle" : s.status; return (
Close diff --git a/pkg/blockcontroller/blockcontroller.go b/pkg/blockcontroller/blockcontroller.go index 83483ada22..bbeb05b6ce 100644 --- a/pkg/blockcontroller/blockcontroller.go +++ b/pkg/blockcontroller/blockcontroller.go @@ -25,6 +25,7 @@ import ( "github.com/wavetermdev/waveterm/pkg/util/shellutil" "github.com/wavetermdev/waveterm/pkg/wavebase" "github.com/wavetermdev/waveterm/pkg/waveobj" + "github.com/wavetermdev/waveterm/pkg/wcore" "github.com/wavetermdev/waveterm/pkg/wps" "github.com/wavetermdev/waveterm/pkg/wshrpc/wshclient" "github.com/wavetermdev/waveterm/pkg/wslconn" @@ -37,6 +38,8 @@ const ( BlockController_Tsunami = "tsunami" ) +const MetaKey_SessionNoAutoCreate = "session:noautocreate" + const ( Status_Running = "running" Status_Done = "done" @@ -137,6 +140,12 @@ func InitBlockController() { Event: wps.Event_BlockClose, AllScopes: true, }, nil) + sessiondaemon.OnDaemonJobDoneFn = func(ctx context.Context, daemonId string) { + err := fallbackSessionDaemonToShell(ctx, daemonId, "") + if err != nil { + log.Printf("[sessiondaemon] error falling back daemon=%s to shell: %v", daemonId, err) + } + } } func handleBlockCloseEvent(event *wps.WaveEvent) { @@ -209,6 +218,7 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts } _ = wstore.DBUpdateFn(ctx, blockId, func(block *waveobj.Block) { delete(block.Meta, waveobj.MetaKey_SessionDaemonId) + block.JobId = "" }) daemonId = "" } @@ -234,6 +244,8 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts } _ = wstore.DBUpdateFn(ctx, blockId, func(block *waveobj.Block) { delete(block.Meta, waveobj.MetaKey_SessionDaemonId) + block.Meta[MetaKey_SessionNoAutoCreate] = true + block.JobId = "" }) daemonId = "" } @@ -331,17 +343,43 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts // If so, clear the JobId so Start() runs again on the next ResyncController call. if sdc, ok := controller.(*SessionDaemonController); ok { if daemon := sessiondaemon.Manager.Get(sdc.DaemonId); daemon != nil && daemon.JobId != "" { - jobId := daemon.JobId - jobStatus, jErr := jobcontroller.GetJobManagerStatus(ctx, jobId) - if jErr == nil && jobStatus == jobcontroller.JobManagerStatus_Running { - log.Printf("[sessiondaemon] resync: daemon=%s block=%s job=%s alive, skipping", sdc.DaemonId, blockId, jobId) - } else { - log.Printf("[sessiondaemon] resync: daemon=%s block=%s job=%s not running (status=%s err=%v), marking done and recreating controller", sdc.DaemonId, blockId, jobId, jobStatus, jErr) - sessiondaemon.Manager.MarkDone(ctx, sdc.DaemonId) + dbDaemon, dbErr := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, sdc.DaemonId) + if dbErr != nil { + log.Printf("[sessiondaemon] resync: daemon=%s block=%s missing DB record, falling back to shell: %v", sdc.DaemonId, blockId, dbErr) + err = fallbackSessionDaemonToShell(ctx, sdc.DaemonId, blockId) + if err != nil { + return err + } stopBlockController(blockId) + deleteController(blockId) time.Sleep(100 * time.Millisecond) existing = nil - // Fall through to controller recreation + Start below + daemonId = "" + controller = MakeShellController(tabId, blockId, controllerName, connName) + registerController(blockId, controller) + status = controller.GetRuntimeStatus() + } else { + gone, goneErr := isSessionDaemonJobManagerGone(ctx, dbDaemon) + if goneErr != nil { + return fmt.Errorf("check session daemon job manager: %w", goneErr) + } + if gone { + log.Printf("[sessiondaemon] resync: daemon=%s block=%s job manager gone, falling back to shell", sdc.DaemonId, blockId) + err = fallbackSessionDaemonToShell(ctx, sdc.DaemonId, blockId) + if err != nil { + return err + } + stopBlockController(blockId) + deleteController(blockId) + time.Sleep(100 * time.Millisecond) + existing = nil + daemonId = "" + controller = MakeShellController(tabId, blockId, controllerName, connName) + registerController(blockId, controller) + status = controller.GetRuntimeStatus() + } else { + log.Printf("[sessiondaemon] resync: daemon=%s block=%s job=%s alive, skipping", sdc.DaemonId, blockId, daemon.JobId) + } } } } @@ -395,6 +433,86 @@ func DestroyBlockController(blockId string) { deleteController(blockId) } +func isSessionDaemonJobManagerGone(ctx context.Context, dbDaemon *waveobj.SessionDaemon) (bool, error) { + if dbDaemon == nil || dbDaemon.JobId == "" { + return true, nil + } + job, err := wstore.DBGet[*waveobj.Job](ctx, dbDaemon.JobId) + if err != nil { + return false, fmt.Errorf("get job %s: %w", dbDaemon.JobId, err) + } + if job == nil || job.JobManagerStatus == jobcontroller.JobManagerStatus_Done { + return true, nil + } + if job.JobManagerPid == 0 { + return false, nil + } + alive, err := conncontroller.CheckRemoteProcessAlive(ctx, dbDaemon.Connection, job.JobManagerPid) + if err != nil { + return false, err + } + return !alive, nil +} + +func fallbackSessionDaemonToShell(ctx context.Context, daemonId string, currentBlockId string) error { + log.Printf("[sessiondaemon] fallback: daemon=%s currentBlock=%s", daemonId, currentBlockId) + blockIds := sessiondaemon.Manager.GetBlocksForDaemon(daemonId) + if len(blockIds) == 0 && currentBlockId != "" { + blockIds = append(blockIds, currentBlockId) + } + + sessiondaemon.Manager.MarkDone(ctx, daemonId) + _ = wstore.DBUpdateFn(ctx, daemonId, func(dbSd *waveobj.SessionDaemon) { + dbSd.JobId = "" + dbSd.Status = sessiondaemon.Status_Done + }) + + seen := make(map[string]bool) + for _, blockId := range blockIds { + if blockId == "" || seen[blockId] { + continue + } + seen[blockId] = true + sessiondaemon.Manager.DetachBlock(ctx, daemonId, blockId) + err := wstore.DBUpdateFn(ctx, blockId, func(block *waveobj.Block) { + if block.Meta == nil { + block.Meta = make(waveobj.MetaMapType) + } + delete(block.Meta, waveobj.MetaKey_SessionDaemonId) + block.Meta[MetaKey_SessionNoAutoCreate] = true + block.JobId = "" + }) + if err != nil { + return fmt.Errorf("fallback block %s to shell: %w", blockId, err) + } + wcore.SendWaveObjUpdate(waveobj.MakeORef(waveobj.OType_Block, blockId)) + if blockId != currentBlockId { + DestroyBlockController(blockId) + resyncBlockController(ctx, blockId) + } + } + return nil +} + +func resyncBlockController(ctx context.Context, blockId string) { + tabs, err := wstore.DBGetAllObjsByType[*waveobj.Tab](ctx, waveobj.OType_Tab) + if err != nil { + log.Printf("[sessiondaemon] warning: error getting tabs for resync: %v", err) + return + } + for _, tab := range tabs { + for _, bid := range tab.BlockIds { + if bid == blockId { + err = ResyncController(ctx, tab.OID, blockId, nil, true) + if err != nil { + log.Printf("[sessiondaemon] warning: fallback resync failed block=%s: %v", blockId, err) + } + return + } + } + } +} + func sendConnMonitorInputNotification(controller Controller) { connName := controller.GetConnName() if connName == "" || conncontroller.IsLocalConnName(connName) || conncontroller.IsWslConnName(connName) { diff --git a/pkg/blockcontroller/sessiondaemoncontroller.go b/pkg/blockcontroller/sessiondaemoncontroller.go index 184dbca359..1b36a18a99 100644 --- a/pkg/blockcontroller/sessiondaemoncontroller.go +++ b/pkg/blockcontroller/sessiondaemoncontroller.go @@ -78,37 +78,28 @@ func (sdc *SessionDaemonController) Start(ctx context.Context, blockMeta waveobj } if dbDaemon.Status == sessiondaemon.Status_Done { - // Job manager is dead — clear state and create a new job. - log.Printf("[sessiondaemon] start: daemon=%s is done, recovering to init", sdc.DaemonId) - daemon.Lock.Lock() - daemon.JobId = "" - daemon.Lock.Unlock() - wstore.DBUpdateFn(ctx, sdc.DaemonId, func(dbSd *waveobj.SessionDaemon) { - dbSd.JobId = "" - dbSd.Status = sessiondaemon.Status_Init - }) - return sdc.createJobAndSync(ctx, blockMeta, rtOpts) + log.Printf("[sessiondaemon] start: daemon=%s is done, falling back block=%s to shell", sdc.DaemonId, sdc.BlockId) + return fallbackSessionDaemonToShell(ctx, sdc.DaemonId, sdc.BlockId) } if dbDaemon.Status == sessiondaemon.Status_Disconnected { return fmt.Errorf("daemon is disconnected, waiting for connection to recover") } if dbDaemon.JobId != "" { + gone, goneErr := isSessionDaemonJobManagerGone(ctx, dbDaemon) + if goneErr != nil { + return fmt.Errorf("check session daemon job manager: %w", goneErr) + } + if gone { + log.Printf("[sessiondaemon] start: daemon=%s job manager gone, falling back block=%s to shell", sdc.DaemonId, sdc.BlockId) + return fallbackSessionDaemonToShell(ctx, sdc.DaemonId, sdc.BlockId) + } err := sdc.tryReconnect(ctx, daemon, dbDaemon, rtOpts) if err != nil { - // Reconnect failed — check if the job manager was confirmed - // gone. If so, clear state and create a new job. dbDaemon2, dbErr := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, sdc.DaemonId) if dbErr == nil && dbDaemon2.Status == sessiondaemon.Status_Done { - log.Printf("[sessiondaemon] start: daemon=%s reconnect confirmed done, recovering", sdc.DaemonId) - daemon.Lock.Lock() - daemon.JobId = "" - daemon.Lock.Unlock() - wstore.DBUpdateFn(ctx, sdc.DaemonId, func(dbSd *waveobj.SessionDaemon) { - dbSd.JobId = "" - dbSd.Status = sessiondaemon.Status_Init - }) - return sdc.createJobAndSync(ctx, blockMeta, rtOpts) + log.Printf("[sessiondaemon] start: daemon=%s reconnect confirmed done, falling back block=%s to shell", sdc.DaemonId, sdc.BlockId) + return fallbackSessionDaemonToShell(ctx, sdc.DaemonId, sdc.BlockId) } return err } @@ -322,6 +313,7 @@ func autoCreateSessionDaemon(ctx context.Context, blockId string, blockMeta wave err = wstore.DBUpdateFn(ctx, blockId, func(block *waveobj.Block) { block.Meta[waveobj.MetaKey_SessionDaemonId] = dbDaemon.OID + delete(block.Meta, MetaKey_SessionNoAutoCreate) }) if err != nil { return "", fmt.Errorf("update block meta: %w", err) diff --git a/pkg/sessiondaemon/sessiondaemon.go b/pkg/sessiondaemon/sessiondaemon.go index df217024c8..929ace7dd7 100644 --- a/pkg/sessiondaemon/sessiondaemon.go +++ b/pkg/sessiondaemon/sessiondaemon.go @@ -50,6 +50,8 @@ var Manager = &SessionDaemonManager{ Daemons: make(map[string]*SessionDaemon), } +var OnDaemonJobDoneFn func(ctx context.Context, daemonId string) + func init() { jobcontroller.ClearSessionDaemonJobFn = func(ctx context.Context, jobId string) { Manager.ClearJobIdFromDaemons(ctx, jobId) @@ -269,6 +271,10 @@ func (sd *SessionDaemonManager) resetIdleTimer(ctx context.Context, daemonId str func (sd *SessionDaemonManager) startIdleCountdown(ctx context.Context, daemonId string) { err := wstore.DBUpdateFn(ctx, daemonId, func(dbD *waveobj.SessionDaemon) { + if dbD.Status == Status_Done { + dbD.IdleSince = DoneReapTimeout + return + } dbD.IdleSince = dbD.IdleTimeout }) if err != nil { @@ -395,29 +401,37 @@ func (sd *SessionDaemonManager) RecordActivity(ctx context.Context, daemonId str } // ClearJobIdFromDaemons clears the JobId from all daemons (memory + DB) -// whose job matches jobId. Called when a remote job manager exits so that -// the daemon can be restarted. +// whose job matches jobId. Called when a remote job manager exits. func (sd *SessionDaemonManager) ClearJobIdFromDaemons(ctx context.Context, jobId string) { sd.Lock.Lock() - defer sd.Lock.Unlock() + var affectedDaemonIds []string for _, daemon := range sd.Daemons { daemon.Lock.Lock() if daemon.JobId == jobId { oldDaemonJobId := daemon.JobId + daemonId := daemon.DaemonId daemon.JobId = "" daemon.Lock.Unlock() - if err := wstore.DBUpdateFn(ctx, daemon.DaemonId, func(dbSd *waveobj.SessionDaemon) { + if err := wstore.DBUpdateFn(ctx, daemonId, func(dbSd *waveobj.SessionDaemon) { dbSd.JobId = "" - dbSd.Status = Status_Init + dbSd.Status = Status_Done }); err != nil { log.Printf("[sessiondaemon:%s] ClearJobIdFromDaemons: DB update failed, memory stale (was job=%s): %v", - daemon.DaemonId, oldDaemonJobId, err) + daemonId, oldDaemonJobId, err) } - log.Printf("[sessiondaemon:%s] ClearJobIdFromDaemons: job=%s cleared, status=init", daemon.DaemonId, jobId) + affectedDaemonIds = append(affectedDaemonIds, daemonId) + log.Printf("[sessiondaemon:%s] ClearJobIdFromDaemons: job=%s cleared, status=done", daemonId, jobId) continue } daemon.Lock.Unlock() } + sd.Lock.Unlock() + + for _, daemonId := range affectedDaemonIds { + if OnDaemonJobDoneFn != nil { + OnDaemonJobDoneFn(ctx, daemonId) + } + } } func (sd *SessionDaemonManager) InitFromDB(ctx context.Context) error { @@ -507,8 +521,6 @@ func (sd *SessionDaemonManager) OnConnectionUp(ctx context.Context, connName str hasBlocks := memDaemon != nil && memDaemon.HasAttachedBlocks() if hasBlocks { - // Blocks are still attached — reset to init so they auto-recover - // when the block becomes active again. if memDaemon != nil { memDaemon.Lock.Lock() memDaemon.JobId = "" @@ -516,9 +528,12 @@ func (sd *SessionDaemonManager) OnConnectionUp(ctx context.Context, connName str } wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbSd *waveobj.SessionDaemon) { dbSd.JobId = "" - dbSd.Status = Status_Init + dbSd.Status = Status_Done }) - log.Printf("[sessiondaemon:%s] OnConnectionUp: dead, has blocks, reset to init", dbDaemon.OID) + log.Printf("[sessiondaemon:%s] OnConnectionUp: dead, has blocks, status -> done", dbDaemon.OID) + if OnDaemonJobDoneFn != nil { + OnDaemonJobDoneFn(ctx, dbDaemon.OID) + } } else { // No blocks referencing this daemon — safe to delete. if memDaemon != nil { diff --git a/pkg/wshrpc/wshserver/wshserver.go b/pkg/wshrpc/wshserver/wshserver.go index c0d3f73cfa..e9bc5af3ea 100644 --- a/pkg/wshrpc/wshserver/wshserver.go +++ b/pkg/wshrpc/wshserver/wshserver.go @@ -1729,6 +1729,7 @@ func (ws *WshServer) SessionAttachCommand(ctx context.Context, data wshrpc.Comma err = wstore.DBUpdateFn(ctx, data.BlockId, func(block *waveobj.Block) { block.Meta[waveobj.MetaKey_SessionDaemonId] = data.DaemonId + delete(block.Meta, blockcontroller.MetaKey_SessionNoAutoCreate) block.JobId = dbDaemon.JobId }) log.Printf("[sessiondaemon] SessionAttach: block=%s daemon=%s meta_updated daemon_job=%s block_job=%s", @@ -1770,6 +1771,7 @@ func (ws *WshServer) SessionDetachCommand(ctx context.Context, data wshrpc.Comma sessiondaemon.Manager.DetachBlock(ctx, data.DaemonId, blockId) err = wstore.DBUpdateFn(ctx, blockId, func(block *waveobj.Block) { delete(block.Meta, waveobj.MetaKey_SessionDaemonId) + block.Meta[blockcontroller.MetaKey_SessionNoAutoCreate] = true }) if err != nil { return fmt.Errorf("update block meta: %w", err) From 5bf44ef81f363f81016fb0cc55c256142e960ac5 Mon Sep 17 00:00:00 2001 From: lyx-tec Date: Sun, 21 Jun 2026 07:09:17 +0800 Subject: [PATCH 31/36] fix: converge dead shared-session job manager handling Startup/reconnect lifecycle: - classify session daemon job managers as alive/dead/unknown in sessiondaemon - treat SSH-not-ready and missing pid as unknown so app startup waits instead of falling back or recreating sessions - reuse the same classifier from block resync, SessionDaemonController.Start, and OnConnectionUp Runtime fallback cleanup: - keep confirmed dead job managers flowing through the shared fallback-to-shell path - make MarkDone the single daemon status update point and return DB errors - replace repeated controller teardown/recreate code with a small shell replacement helper --- pkg/blockcontroller/blockcontroller.go | 73 +++++------ .../sessiondaemoncontroller.go | 19 +-- pkg/sessiondaemon/sessiondaemon.go | 116 +++++++++--------- 3 files changed, 102 insertions(+), 106 deletions(-) diff --git a/pkg/blockcontroller/blockcontroller.go b/pkg/blockcontroller/blockcontroller.go index bbeb05b6ce..b74b1d853f 100644 --- a/pkg/blockcontroller/blockcontroller.go +++ b/pkg/blockcontroller/blockcontroller.go @@ -6,6 +6,7 @@ package blockcontroller import ( "context" "encoding/base64" + "errors" "fmt" "io/fs" "log" @@ -17,7 +18,6 @@ import ( "github.com/google/uuid" "github.com/wavetermdev/waveterm/pkg/blocklogger" "github.com/wavetermdev/waveterm/pkg/filestore" - "github.com/wavetermdev/waveterm/pkg/jobcontroller" "github.com/wavetermdev/waveterm/pkg/remote" "github.com/wavetermdev/waveterm/pkg/remote/conncontroller" "github.com/wavetermdev/waveterm/pkg/sessiondaemon" @@ -46,6 +46,8 @@ const ( Status_Init = "init" ) +var ErrSessionDaemonJobUnknown = errors.New("session daemon job state unknown") + const ( DefaultTermMaxFileSize = 2 * 1024 * 1024 DefaultHtmlMaxFileSize = 256 * 1024 @@ -223,14 +225,14 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts daemonId = "" } - // Validate existing daemon: if stale (done/disconnected/not found), clear it and fall through to ShellController + // Validate existing daemon: confirmed-done daemons fall back to ShellController; disconnected daemons wait for connection recovery. if daemonId != "" && controllerName == BlockController_Shell { dbDaemon, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, daemonId) staleStatus := false if err != nil { log.Printf("[sessiondaemon] staledaemon: daemon=%s block=%s not found in DB err=%v, clearing", daemonId, blockId, err) staleStatus = true - } else if dbDaemon.Status == sessiondaemon.Status_Done || dbDaemon.Status == sessiondaemon.Status_Disconnected { + } else if dbDaemon.Status == sessiondaemon.Status_Done { log.Printf("[sessiondaemon] staledaemon: daemon=%s block=%s status=%s, clearing and falling back to ShellController", daemonId, blockId, dbDaemon.Status) staleStatus = true } else { @@ -350,33 +352,27 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts if err != nil { return err } - stopBlockController(blockId) - deleteController(blockId) - time.Sleep(100 * time.Millisecond) existing = nil daemonId = "" - controller = MakeShellController(tabId, blockId, controllerName, connName) - registerController(blockId, controller) + controller = replaceBlockControllerWithShell(tabId, blockId, controllerName, connName) status = controller.GetRuntimeStatus() } else { - gone, goneErr := isSessionDaemonJobManagerGone(ctx, dbDaemon) - if goneErr != nil { - return fmt.Errorf("check session daemon job manager: %w", goneErr) + jobState, stateErr := sessiondaemon.ClassifyJobManagerState(ctx, dbDaemon) + if stateErr != nil { + return fmt.Errorf("check session daemon job manager: %w", stateErr) } - if gone { + if jobState == sessiondaemon.JobManagerState_Dead { log.Printf("[sessiondaemon] resync: daemon=%s block=%s job manager gone, falling back to shell", sdc.DaemonId, blockId) err = fallbackSessionDaemonToShell(ctx, sdc.DaemonId, blockId) if err != nil { return err } - stopBlockController(blockId) - deleteController(blockId) - time.Sleep(100 * time.Millisecond) - existing = nil daemonId = "" - controller = MakeShellController(tabId, blockId, controllerName, connName) - registerController(blockId, controller) + existing = nil + controller = replaceBlockControllerWithShell(tabId, blockId, controllerName, connName) status = controller.GetRuntimeStatus() + } else if jobState == sessiondaemon.JobManagerState_Unknown { + log.Printf("[sessiondaemon] resync: daemon=%s block=%s job state unknown, waiting", sdc.DaemonId, blockId) } else { log.Printf("[sessiondaemon] resync: daemon=%s block=%s job=%s alive, skipping", sdc.DaemonId, blockId, daemon.JobId) } @@ -398,6 +394,9 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts // Start controller err = controller.Start(ctx, blockData.Meta, rtOpts, force) if err != nil { + if errors.Is(err, ErrSessionDaemonJobUnknown) { + return nil + } return fmt.Errorf("error starting controller: %w", err) } } @@ -433,27 +432,6 @@ func DestroyBlockController(blockId string) { deleteController(blockId) } -func isSessionDaemonJobManagerGone(ctx context.Context, dbDaemon *waveobj.SessionDaemon) (bool, error) { - if dbDaemon == nil || dbDaemon.JobId == "" { - return true, nil - } - job, err := wstore.DBGet[*waveobj.Job](ctx, dbDaemon.JobId) - if err != nil { - return false, fmt.Errorf("get job %s: %w", dbDaemon.JobId, err) - } - if job == nil || job.JobManagerStatus == jobcontroller.JobManagerStatus_Done { - return true, nil - } - if job.JobManagerPid == 0 { - return false, nil - } - alive, err := conncontroller.CheckRemoteProcessAlive(ctx, dbDaemon.Connection, job.JobManagerPid) - if err != nil { - return false, err - } - return !alive, nil -} - func fallbackSessionDaemonToShell(ctx context.Context, daemonId string, currentBlockId string) error { log.Printf("[sessiondaemon] fallback: daemon=%s currentBlock=%s", daemonId, currentBlockId) blockIds := sessiondaemon.Manager.GetBlocksForDaemon(daemonId) @@ -461,11 +439,9 @@ func fallbackSessionDaemonToShell(ctx context.Context, daemonId string, currentB blockIds = append(blockIds, currentBlockId) } - sessiondaemon.Manager.MarkDone(ctx, daemonId) - _ = wstore.DBUpdateFn(ctx, daemonId, func(dbSd *waveobj.SessionDaemon) { - dbSd.JobId = "" - dbSd.Status = sessiondaemon.Status_Done - }) + if err := sessiondaemon.Manager.MarkDone(ctx, daemonId); err != nil { + return fmt.Errorf("mark daemon done: %w", err) + } seen := make(map[string]bool) for _, blockId := range blockIds { @@ -494,6 +470,15 @@ func fallbackSessionDaemonToShell(ctx context.Context, daemonId string, currentB return nil } +func replaceBlockControllerWithShell(tabId string, blockId string, controllerName string, connName string) Controller { + stopBlockController(blockId) + deleteController(blockId) + time.Sleep(100 * time.Millisecond) + controller := MakeShellController(tabId, blockId, controllerName, connName) + registerController(blockId, controller) + return controller +} + func resyncBlockController(ctx context.Context, blockId string) { tabs, err := wstore.DBGetAllObjsByType[*waveobj.Tab](ctx, waveobj.OType_Tab) if err != nil { diff --git a/pkg/blockcontroller/sessiondaemoncontroller.go b/pkg/blockcontroller/sessiondaemoncontroller.go index 1b36a18a99..db56f31e80 100644 --- a/pkg/blockcontroller/sessiondaemoncontroller.go +++ b/pkg/blockcontroller/sessiondaemoncontroller.go @@ -81,19 +81,20 @@ func (sdc *SessionDaemonController) Start(ctx context.Context, blockMeta waveobj log.Printf("[sessiondaemon] start: daemon=%s is done, falling back block=%s to shell", sdc.DaemonId, sdc.BlockId) return fallbackSessionDaemonToShell(ctx, sdc.DaemonId, sdc.BlockId) } - if dbDaemon.Status == sessiondaemon.Status_Disconnected { - return fmt.Errorf("daemon is disconnected, waiting for connection to recover") - } if dbDaemon.JobId != "" { - gone, goneErr := isSessionDaemonJobManagerGone(ctx, dbDaemon) - if goneErr != nil { - return fmt.Errorf("check session daemon job manager: %w", goneErr) + jobState, stateErr := sessiondaemon.ClassifyJobManagerState(ctx, dbDaemon) + if stateErr != nil { + return fmt.Errorf("check session daemon job manager: %w", stateErr) } - if gone { + if jobState == sessiondaemon.JobManagerState_Dead { log.Printf("[sessiondaemon] start: daemon=%s job manager gone, falling back block=%s to shell", sdc.DaemonId, sdc.BlockId) return fallbackSessionDaemonToShell(ctx, sdc.DaemonId, sdc.BlockId) } + if jobState == sessiondaemon.JobManagerState_Unknown { + log.Printf("[sessiondaemon] start: daemon=%s job state unknown, waiting block=%s", sdc.DaemonId, sdc.BlockId) + return ErrSessionDaemonJobUnknown + } err := sdc.tryReconnect(ctx, daemon, dbDaemon, rtOpts) if err != nil { dbDaemon2, dbErr := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, sdc.DaemonId) @@ -108,6 +109,10 @@ func (sdc *SessionDaemonController) Start(ctx context.Context, blockMeta waveobj return nil } + if dbDaemon.Status == sessiondaemon.Status_Disconnected { + return ErrSessionDaemonJobUnknown + } + return sdc.createJobAndSync(ctx, blockMeta, rtOpts) } diff --git a/pkg/sessiondaemon/sessiondaemon.go b/pkg/sessiondaemon/sessiondaemon.go index 929ace7dd7..99b9086703 100644 --- a/pkg/sessiondaemon/sessiondaemon.go +++ b/pkg/sessiondaemon/sessiondaemon.go @@ -30,6 +30,12 @@ const ( Status_Done = "done" ) +const ( + JobManagerState_Alive = "alive" + JobManagerState_Dead = "dead" + JobManagerState_Unknown = "unknown" +) + type SessionDaemon struct { Lock sync.Mutex @@ -328,29 +334,62 @@ func (sd *SessionDaemonManager) SendInput(daemonId string, inputData []byte, sig // MarkDone clears the daemon's JobId and sets its status to Done, // both in memory and in the database. Used when the resync controller // detects that a daemon's remote job manager has exited. -func (sd *SessionDaemonManager) MarkDone(ctx context.Context, daemonId string) { +func (sd *SessionDaemonManager) MarkDone(ctx context.Context, daemonId string) error { sd.Lock.Lock() daemon, ok := sd.Daemons[daemonId] sd.Lock.Unlock() - if !ok { - return + var oldJobId string + if ok { + daemon.Lock.Lock() + oldJobId = daemon.JobId + daemon.JobId = "" + daemon.Lock.Unlock() } - daemon.Lock.Lock() - oldJobId := daemon.JobId - daemon.JobId = "" - daemon.Lock.Unlock() if err := wstore.DBUpdateFn(ctx, daemonId, func(dbSd *waveobj.SessionDaemon) { dbSd.JobId = "" dbSd.Status = Status_Done }); err != nil { - // Roll back memory to avoid inconsistency. - daemon.Lock.Lock() - daemon.JobId = oldJobId - daemon.Lock.Unlock() - log.Printf("[sessiondaemon:%s] MarkDone: DB update failed, rolled back memory JobId: %v", daemonId, err) - return + if ok { + daemon.Lock.Lock() + daemon.JobId = oldJobId + daemon.Lock.Unlock() + } + log.Printf("[sessiondaemon:%s] MarkDone: DB update failed: %v", daemonId, err) + return err } log.Printf("[sessiondaemon:%s] MarkDone: job cleared, status=done", daemonId) + return nil +} + +func ClassifyJobManagerState(ctx context.Context, dbDaemon *waveobj.SessionDaemon) (string, error) { + if dbDaemon == nil || dbDaemon.JobId == "" { + return JobManagerState_Dead, nil + } + job, err := wstore.DBGet[*waveobj.Job](ctx, dbDaemon.JobId) + if err != nil { + return JobManagerState_Unknown, fmt.Errorf("get job %s: %w", dbDaemon.JobId, err) + } + if job == nil || job.JobManagerStatus == jobcontroller.JobManagerStatus_Done { + return JobManagerState_Dead, nil + } + if job.JobManagerPid == 0 { + return JobManagerState_Unknown, nil + } + connected, err := conncontroller.IsConnected(dbDaemon.Connection) + if err != nil { + return JobManagerState_Unknown, err + } + if !connected { + return JobManagerState_Unknown, nil + } + alive, err := conncontroller.CheckRemoteProcessAlive(ctx, dbDaemon.Connection, job.JobManagerPid) + if err != nil { + return JobManagerState_Unknown, nil + } + if alive { + return JobManagerState_Alive, nil + } + return JobManagerState_Dead, nil } // GetMemJobId returns the in-memory JobId for a daemon, used as a @@ -487,21 +526,14 @@ func (sd *SessionDaemonManager) OnConnectionUp(ctx context.Context, connName str continue } - // Read JobManagerPid from the job record. - job, err := wstore.DBMustGet[*waveobj.Job](ctx, dbDaemon.JobId) - if err != nil || job.JobManagerPid == 0 { - continue - } - - alive, err := conncontroller.CheckRemoteProcessAlive(ctx, connName, job.JobManagerPid) + jobState, err := ClassifyJobManagerState(ctx, dbDaemon) if err != nil { - log.Printf("[sessiondaemon:%s] OnConnectionUp: error checking remote process: %v", dbDaemon.OID, err) + log.Printf("[sessiondaemon:%s] OnConnectionUp: error checking job manager state: %v", dbDaemon.OID, err) continue } - if alive { - // Job manager is still running — try to reconnect and bring - // it back to running status. - log.Printf("[sessiondaemon:%s] OnConnectionUp: remote job manager alive (pid=%d), reconnecting", dbDaemon.OID, job.JobManagerPid) + switch jobState { + case JobManagerState_Alive: + log.Printf("[sessiondaemon:%s] OnConnectionUp: remote job manager alive, reconnecting", dbDaemon.OID) sd.Lock.Lock() memDaemon := sd.Daemons[dbDaemon.OID] sd.Lock.Unlock() @@ -511,39 +543,13 @@ func (sd *SessionDaemonManager) OnConnectionUp(ctx context.Context, connName str log.Printf("[sessiondaemon:%s] OnConnectionUp: reconnect failed: %v", dbDaemon.OID, err) } } - continue - } - // Job manager is dead. - log.Printf("[sessiondaemon:%s] OnConnectionUp: remote job manager dead (pid=%d)", dbDaemon.OID, job.JobManagerPid) - sd.Lock.Lock() - memDaemon := sd.Daemons[dbDaemon.OID] - sd.Lock.Unlock() - hasBlocks := memDaemon != nil && memDaemon.HasAttachedBlocks() - - if hasBlocks { - if memDaemon != nil { - memDaemon.Lock.Lock() - memDaemon.JobId = "" - memDaemon.Lock.Unlock() - } - wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbSd *waveobj.SessionDaemon) { - dbSd.JobId = "" - dbSd.Status = Status_Done - }) - log.Printf("[sessiondaemon:%s] OnConnectionUp: dead, has blocks, status -> done", dbDaemon.OID) + case JobManagerState_Dead: + log.Printf("[sessiondaemon:%s] OnConnectionUp: remote job manager dead, falling back", dbDaemon.OID) if OnDaemonJobDoneFn != nil { OnDaemonJobDoneFn(ctx, dbDaemon.OID) } - } else { - // No blocks referencing this daemon — safe to delete. - if memDaemon != nil { - sd.Remove(dbDaemon.OID) - } - if err := wstore.DBDelete(ctx, waveobj.OType_SessionDaemon, dbDaemon.OID); err != nil { - log.Printf("[sessiondaemon:%s] OnConnectionUp: error deleting dead daemon: %v", dbDaemon.OID, err) - } else { - log.Printf("[sessiondaemon:%s] OnConnectionUp: dead, no blocks, deleted", dbDaemon.OID) - } + case JobManagerState_Unknown: + log.Printf("[sessiondaemon:%s] OnConnectionUp: job manager state unknown, waiting", dbDaemon.OID) } } } From c601faa0858912035dc31453142fee9894823c70 Mon Sep 17 00:00:00 2001 From: lyx-tec Date: Sun, 21 Jun 2026 08:01:20 +0800 Subject: [PATCH 32/36] chore: remove wsh session commands - drop the experimental wsh session command group for create/list/attach/detach/info/tag/delete - keep the underlying session RPCs and frontend flows intact for now - extend the remote job manager idle timeout default from 2 days to 7 days, including remote-side fallback values --- cmd/wsh/cmd/wshcmd-session.go | 277 -------------------------- pkg/jobcontroller/jobcontroller.go | 2 +- pkg/wshrpc/wshremote/wshremote_job.go | 4 +- 3 files changed, 3 insertions(+), 280 deletions(-) delete mode 100644 cmd/wsh/cmd/wshcmd-session.go diff --git a/cmd/wsh/cmd/wshcmd-session.go b/cmd/wsh/cmd/wshcmd-session.go deleted file mode 100644 index 1c15f331d9..0000000000 --- a/cmd/wsh/cmd/wshcmd-session.go +++ /dev/null @@ -1,277 +0,0 @@ -// Copyright 2025, Command Line Inc. -// SPDX-License-Identifier: Apache-2.0 - -package cmd - -import ( - "fmt" - "time" - - "github.com/spf13/cobra" - "github.com/wavetermdev/waveterm/pkg/waveobj" - "github.com/wavetermdev/waveterm/pkg/wshrpc" - "github.com/wavetermdev/waveterm/pkg/wshrpc/wshclient" -) - -var sessionCmd = &cobra.Command{ - Use: "session", - Short: "manage session daemons", - Long: "Commands to create, list, attach to, and manage session daemons for persistent remote shells.", -} - -var sessionCreateCmd = &cobra.Command{ - Use: "create", - Short: "create a new session daemon", - Long: `Create a named session daemon. Anonymous daemons are created automatically for SSH blocks.`, - Args: cobra.NoArgs, - RunE: sessionCreateRun, - PreRunE: preRunSetupRpcClient, -} - -var sessionDeleteCmd = &cobra.Command{ - Use: "delete DAEMONID", - Short: "delete a session daemon", - Long: `Delete a session daemon, stopping any attached job and detaching all blocks.`, - Args: cobra.ExactArgs(1), - RunE: sessionDeleteRun, - PreRunE: preRunSetupRpcClient, -} - -var sessionListCmd = &cobra.Command{ - Use: "list", - Short: "list session daemons", - Long: `List all named session daemons. Use --all to include anonymous daemons.`, - Args: cobra.NoArgs, - RunE: sessionListRun, - PreRunE: preRunSetupRpcClient, -} - -var sessionAttachCmd = &cobra.Command{ - Use: "attach DAEMONID", - Short: "attach current block to a session daemon", - Long: `Attach the current block to the specified session daemon.`, - Args: cobra.ExactArgs(1), - RunE: sessionAttachRun, - PreRunE: preRunSetupRpcClient, -} - -var sessionDetachCmd = &cobra.Command{ - Use: "detach", - Short: "detach current block from its session daemon", - Long: `Detach the current block from its attached session daemon.`, - Args: cobra.NoArgs, - RunE: sessionDetachRun, - PreRunE: preRunSetupRpcClient, -} - -var sessionInfoCmd = &cobra.Command{ - Use: "info DAEMONID", - Short: "show session daemon info", - Long: `Show detailed information about a session daemon.`, - Args: cobra.ExactArgs(1), - RunE: sessionInfoRun, - PreRunE: preRunSetupRpcClient, -} - -var sessionTagCmd = &cobra.Command{ - Use: "tag DAEMONID", - Short: "tag an anonymous session daemon with a name", - Long: `Convert an anonymous session daemon to a named one, preventing auto-cleanup.`, - Args: cobra.ExactArgs(1), - RunE: sessionTagRun, - PreRunE: preRunSetupRpcClient, -} - -var sessionCreateFlagName string -var sessionCreateFlagConnection string -var sessionCreateFlagIdleTimeout int64 -var sessionListFlagAll bool -var sessionTagFlagName string - -func init() { - rootCmd.AddCommand(sessionCmd) - sessionCmd.AddCommand(sessionCreateCmd) - sessionCmd.AddCommand(sessionDeleteCmd) - sessionCmd.AddCommand(sessionListCmd) - sessionCmd.AddCommand(sessionAttachCmd) - sessionCmd.AddCommand(sessionDetachCmd) - sessionCmd.AddCommand(sessionInfoCmd) - sessionCmd.AddCommand(sessionTagCmd) - - sessionCreateCmd.Flags().StringVarP(&sessionCreateFlagName, "name", "n", "", "session name (creates a named daemon)") - sessionCreateCmd.Flags().StringVarP(&sessionCreateFlagConnection, "connection", "c", "", "connection name (e.g. ssh://host)") - sessionCreateCmd.Flags().Int64Var(&sessionCreateFlagIdleTimeout, "idle-timeout", 0, "idle timeout in seconds (default: 86400 for named, 60 for anonymous)") - - sessionListCmd.Flags().BoolVarP(&sessionListFlagAll, "all", "a", false, "include anonymous session daemons") - - sessionTagCmd.Flags().StringVarP(&sessionTagFlagName, "name", "n", "", "new name for the session daemon") - sessionTagCmd.MarkFlagRequired("name") -} - -func sessionCreateRun(cmd *cobra.Command, args []string) (rtnErr error) { - defer func() { - sendActivity("session:create", rtnErr == nil) - }() - - data := wshrpc.CommandSessionCreateData{ - Name: sessionCreateFlagName, - Connection: sessionCreateFlagConnection, - IdleTimeout: sessionCreateFlagIdleTimeout, - } - - info, err := wshclient.SessionCreateCommand(RpcClient, data, &wshrpc.RpcOpts{Timeout: 10000}) - if err != nil { - return fmt.Errorf("creating session daemon: %w", err) - } - - WriteStdout("session daemon %s created\n", info.DaemonId) - WriteStdout(" name: %s\n", info.Name) - WriteStdout(" connection: %s\n", info.Connection) - return nil -} - -func sessionDeleteRun(cmd *cobra.Command, args []string) (rtnErr error) { - defer func() { - sendActivity("session:delete", rtnErr == nil) - }() - - daemonId := args[0] - err := wshclient.SessionDeleteCommand(RpcClient, wshrpc.CommandSessionDeleteData{DaemonId: daemonId}, &wshrpc.RpcOpts{Timeout: 10000}) - if err != nil { - return fmt.Errorf("deleting session daemon: %w", err) - } - WriteStdout("session daemon %s deleted\n", daemonId) - return nil -} - -func sessionListRun(cmd *cobra.Command, args []string) (rtnErr error) { - defer func() { - sendActivity("session:list", rtnErr == nil) - }() - - data := wshrpc.CommandSessionListData{ShowAll: sessionListFlagAll} - sessions, err := wshclient.SessionListCommand(RpcClient, data, &wshrpc.RpcOpts{Timeout: 10000}) - if err != nil { - return fmt.Errorf("listing session daemons: %w", err) - } - - if len(sessions) == 0 { - WriteStdout("no session daemons\n") - return nil - } - - WriteStdout("%-36s %-20s %-30s %-12s %s\n", "daemonid", "name", "connection", "status", "blocks") - WriteStdout("----------------------------------------------------------------------\n") - for _, s := range sessions { - blocks := fmt.Sprintf("%d", len(s.Blocks)) - if s.IsAnonymous { - blocks += " (anon)" - } - WriteStdout("%-36s %-20s %-30s %-12s %s\n", s.DaemonId, s.Name, s.Connection, s.Status, blocks) - } - return nil -} - -func sessionAttachRun(cmd *cobra.Command, args []string) (rtnErr error) { - defer func() { - sendActivity("session:attach", rtnErr == nil) - }() - - daemonId := args[0] - fullORef, err := resolveBlockArg() - if err != nil { - return err - } - - data := wshrpc.CommandSessionAttachData{ - DaemonId: daemonId, - BlockId: fullORef.OID, - } - err = wshclient.SessionAttachCommand(RpcClient, data, &wshrpc.RpcOpts{Timeout: 10000}) - if err != nil { - return fmt.Errorf("attaching block: %w", err) - } - WriteStdout("block %s attached to session daemon %s\n", fullORef.OID, daemonId) - return nil -} - -func sessionDetachRun(cmd *cobra.Command, args []string) (rtnErr error) { - defer func() { - sendActivity("session:detach", rtnErr == nil) - }() - - fullORef, err := resolveBlockArg() - if err != nil { - return err - } - blockId := fullORef.OID - - info, err := wshclient.BlockInfoCommand(RpcClient, blockId, &wshrpc.RpcOpts{Timeout: 5000}) - if err != nil { - return fmt.Errorf("getting block info: %w", err) - } - if info.Block == nil { - return fmt.Errorf("block %s not found", blockId) - } - - daemonId := info.Block.Meta.GetString(waveobj.MetaKey_SessionDaemonId, "") - if daemonId == "" { - return fmt.Errorf("block %s is not attached to any session daemon", blockId) - } - - err = wshclient.SessionDetachCommand(RpcClient, wshrpc.CommandSessionDetachData{DaemonId: daemonId, BlockId: blockId}, &wshrpc.RpcOpts{Timeout: 10000}) - if err != nil { - return fmt.Errorf("detaching block: %w", err) - } - WriteStdout("block %s detached from session daemon %s\n", blockId, daemonId) - return nil -} - -func sessionInfoRun(cmd *cobra.Command, args []string) (rtnErr error) { - defer func() { - sendActivity("session:info", rtnErr == nil) - }() - - daemonId := args[0] - info, err := wshclient.SessionInfoCommand(RpcClient, wshrpc.CommandSessionInfoData{DaemonId: daemonId}, &wshrpc.RpcOpts{Timeout: 10000}) - if err != nil { - return fmt.Errorf("getting session info: %w", err) - } - - createdAt := time.UnixMilli(info.CreatedAt).Format("2006-01-02 15:04:05") - WriteStdout("daemonid: %s\n", info.DaemonId) - WriteStdout("name: %s\n", info.Name) - WriteStdout("connection: %s\n", info.Connection) - WriteStdout("jobid: %s\n", info.JobId) - WriteStdout("status: %s\n", info.Status) - WriteStdout("anonymous: %v\n", info.IsAnonymous) - WriteStdout("created: %s\n", createdAt) - WriteStdout("timeout: %ds\n", info.IdleTimeout) - if info.IdleSince > 0 { - WriteStdout("idle since: %s\n", time.UnixMilli(info.IdleSince).Format("2006-01-02 15:04:05")) - } - WriteStdout("blocks: %d\n", len(info.Blocks)) - for _, b := range info.Blocks { - WriteStdout(" - %s\n", b) - } - return nil -} - -func sessionTagRun(cmd *cobra.Command, args []string) (rtnErr error) { - defer func() { - sendActivity("session:tag", rtnErr == nil) - }() - - daemonId := args[0] - data := wshrpc.CommandSessionTagData{ - DaemonId: daemonId, - Name: sessionTagFlagName, - } - - err := wshclient.SessionTagCommand(RpcClient, data, &wshrpc.RpcOpts{Timeout: 10000}) - if err != nil { - return fmt.Errorf("tagging session daemon: %w", err) - } - WriteStdout("session daemon %s tagged as %q\n", daemonId, sessionTagFlagName) - return nil -} diff --git a/pkg/jobcontroller/jobcontroller.go b/pkg/jobcontroller/jobcontroller.go index cd7c1cf19e..70200ea671 100644 --- a/pkg/jobcontroller/jobcontroller.go +++ b/pkg/jobcontroller/jobcontroller.go @@ -40,7 +40,7 @@ import ( ) const DefaultTimeout = 2 * time.Second -const DefaultRemoteIdleTimeoutSeconds = 172800 // 2 days +const DefaultRemoteIdleTimeoutSeconds = 604800 // 7 days const ( JobManagerStatus_Init = "init" diff --git a/pkg/wshrpc/wshremote/wshremote_job.go b/pkg/wshrpc/wshremote/wshremote_job.go index df5cf4470b..ccf540505d 100644 --- a/pkg/wshrpc/wshremote/wshremote_job.go +++ b/pkg/wshrpc/wshremote/wshremote_job.go @@ -267,7 +267,7 @@ func (impl *ServerImpl) RemoteStartJobCommand(ctx context.Context, data wshrpc.C jobConn.StartTs = time.Now().UnixMilli() jobConn.RemoteIdleTimeoutSeconds = data.RemoteIdleTimeoutSeconds if jobConn.RemoteIdleTimeoutSeconds <= 0 { - jobConn.RemoteIdleTimeoutSeconds = 172800 + jobConn.RemoteIdleTimeoutSeconds = 604800 } } impl.Lock.Unlock() @@ -341,7 +341,7 @@ func (impl *ServerImpl) RemoteReconnectToJobManagerCommand(ctx context.Context, jobConn.StartTs = data.JobManagerStartTs jobConn.RemoteIdleTimeoutSeconds = data.RemoteIdleTimeoutSeconds if jobConn.RemoteIdleTimeoutSeconds <= 0 { - jobConn.RemoteIdleTimeoutSeconds = 172800 + jobConn.RemoteIdleTimeoutSeconds = 604800 } } impl.Lock.Unlock() From d1dfae4ab63e77157ef4651df7b4ba7354ab32f9 Mon Sep 17 00:00:00 2001 From: lyx-tec Date: Mon, 22 Jun 2026 19:57:44 +0800 Subject: [PATCH 33/36] Fix session ssh fullscreen terminal history --- frontend/app/store/services.ts | 4 +- frontend/app/view/term/termwrap.ts | 169 +++++++++++++++++------ pkg/jobcontroller/jobcontroller.go | 29 +++- pkg/jobmanager/jobcmd.go | 28 +++- pkg/service/blockservice/blockservice.go | 33 ++++- pkg/sessiondaemon/sessiondaemon.go | 3 + 6 files changed, 204 insertions(+), 62 deletions(-) diff --git a/frontend/app/store/services.ts b/frontend/app/store/services.ts index 9e6e156bc3..d4898c285e 100644 --- a/frontend/app/store/services.ts +++ b/frontend/app/store/services.ts @@ -30,8 +30,8 @@ export class BlockServiceType { return callBackendService(this?.waveEnv, "block", "GetControllerStatus", Array.from(arguments)) } - // save the terminal state to a blockfile - SaveTerminalState(blockId: string, state: string, stateType: string, ptyOffset: number, termSize: TermSize): Promise { + // save the terminal state to a zone file + SaveTerminalState(zoneId: string, state: string, stateType: string, ptyOffset: number, termSize: TermSize): Promise { return callBackendService(this?.waveEnv, "block", "SaveTerminalState", Array.from(arguments)) } } diff --git a/frontend/app/view/term/termwrap.ts b/frontend/app/view/term/termwrap.ts index d0588ab924..8136406fb9 100644 --- a/frontend/app/view/term/termwrap.ts +++ b/frontend/app/view/term/termwrap.ts @@ -53,6 +53,13 @@ const TermCacheFileName = "cache:term:full"; const MinDataProcessedForCache = 100 * 1024; export const SupportsImageInput = true; const MaxRepaintTransactionMs = 2000; +const AltScreenEnterSeq = "\x1b[?1049h"; +const AltScreenExitSeq = "\x1b[?1049l"; +const AppCursorKeysEnterSeq = "\x1b[?1h"; +const AppCursorKeysExitSeq = "\x1b[?1l"; +const CursorShowSeq = "\x1b[?25h"; +const ClearScreenSeq = "\x1b[2J"; +const HomeAndClearScreenSeq = "\x1b[H\x1b[2J"; // detect webgl support function detectWebGLSupport(): boolean { @@ -75,6 +82,104 @@ type TermWrapOptions = { nodeModel?: BlockNodeModel; }; +// Some remote full-screen programs clear the main screen without emitting +// smcup/rmcup. Keep that temporary drawing in xterm's alternate buffer. +class SyntheticAltScreenTracker { + pendingEnter: boolean = false; + active: boolean = false; + pendingExit: boolean = false; + + process(data: string): string | null { + let changed = false; + let rtn = ""; + + for (let idx = 0; idx < data.length; ) { + const handledSeq = this.matchAndAppendSeq(data, idx); + if (handledSeq != null) { + rtn += handledSeq.data; + idx += handledSeq.seqLen; + changed ||= handledSeq.changed; + continue; + } + rtn += data[idx]; + idx++; + } + + const exitSeq = this.flushPendingExit(); + if (exitSeq != null) { + rtn += exitSeq; + changed = true; + } + return changed ? rtn : null; + } + + matchAndAppendSeq(data: string, idx: number): { data: string; seqLen: number; changed: boolean } | null { + if (data.startsWith(AltScreenEnterSeq, idx)) { + this.reset(); + return { data: AltScreenEnterSeq, seqLen: AltScreenEnterSeq.length, changed: false }; + } + if (data.startsWith(AltScreenExitSeq, idx)) { + this.reset(); + return { data: AltScreenExitSeq, seqLen: AltScreenExitSeq.length, changed: false }; + } + if (data.startsWith(AppCursorKeysEnterSeq, idx)) { + if (!this.active) { + this.pendingEnter = true; + } + return { data: AppCursorKeysEnterSeq, seqLen: AppCursorKeysEnterSeq.length, changed: false }; + } + if (data.startsWith(AppCursorKeysExitSeq, idx)) { + let changed = false; + if (this.active) { + this.pendingExit = true; + changed = true; + } + this.pendingEnter = false; + return { data: AppCursorKeysExitSeq, seqLen: AppCursorKeysExitSeq.length, changed }; + } + if (data.startsWith(CursorShowSeq, idx)) { + const exitSeq = this.flushPendingExit(); + return { + data: exitSeq == null ? CursorShowSeq : CursorShowSeq + exitSeq, + seqLen: CursorShowSeq.length, + changed: exitSeq != null, + }; + } + if (data.startsWith(HomeAndClearScreenSeq, idx)) { + return this.maybeEnterAltScreen(HomeAndClearScreenSeq); + } + if (data.startsWith(ClearScreenSeq, idx)) { + return this.maybeEnterAltScreen(ClearScreenSeq); + } + return null; + } + + maybeEnterAltScreen(seq: string): { data: string; seqLen: number; changed: boolean } { + if (!this.pendingEnter || this.active) { + return { data: seq, seqLen: seq.length, changed: false }; + } + this.active = true; + this.pendingEnter = false; + this.pendingExit = false; + return { data: AltScreenEnterSeq + seq, seqLen: seq.length, changed: true }; + } + + flushPendingExit(): string | null { + if (!this.pendingExit) { + return null; + } + this.active = false; + this.pendingExit = false; + return AltScreenExitSeq; + } + + reset() { + this.pendingEnter = false; + this.active = false; + this.pendingExit = false; + } +} + export class TermWrap { tabId: string; blockId: string; @@ -88,7 +193,6 @@ export class TermWrap { serializeAddon: SerializeAddon; mainFileSubject: SubjectWithRef; _mainFileSub: Subscription | null = null; - _attachSeq: number = 0; loaded: boolean; heldData: Uint8Array[]; handleResize_debounced: () => void; @@ -125,6 +229,7 @@ export class TermWrap { lastMode2026ResetTs: number = 0; inSyncTransaction: boolean = false; inRepaintTransaction: boolean = false; + syntheticAltScreenTracker: SyntheticAltScreenTracker = new SyntheticAltScreenTracker(); constructor( tabId: string, @@ -333,48 +438,12 @@ export class TermWrap { return this.zoneId; } - async attachToDaemon(jobId: string): Promise { - this._attachSeq++; - const mySeq = this._attachSeq; - if (this.zoneId === jobId) { - return; - } - if (this._mainFileSub) { - this._mainFileSub.unsubscribe(); - this._mainFileSub = null; - } - if (this.mainFileSubject) { - this.mainFileSubject.release(); - } - this.terminal.clear(); - this.ptyOffset = 0; - this.heldData = []; - this.zoneId = jobId; - this.mainFileSubject = getFileSubject(this.getZoneId(), TermFileName); - this._mainFileSub = this.mainFileSubject.subscribe(this.handleNewFileSubjectData.bind(this)); - await this.loadInitialTerminalData(); + async attachToDaemon(_jobId: string): Promise { + this.zoneId = this.blockId; } async detachFromDaemon(): Promise { - this._attachSeq++; - const mySeq = this._attachSeq; - if (this.zoneId === this.blockId) { - return; - } - if (this._mainFileSub) { - this._mainFileSub.unsubscribe(); - this._mainFileSub = null; - } - if (this.mainFileSubject) { - this.mainFileSubject.release(); - } - this.terminal.clear(); - this.ptyOffset = 0; - this.heldData = []; this.zoneId = this.blockId; - this.mainFileSubject = getFileSubject(this.getZoneId(), TermFileName); - this._mainFileSub = this.mainFileSubject.subscribe(this.handleNewFileSubjectData.bind(this)); - await this.loadInitialTerminalData(); } setCursorStyle(cursorStyle: string) { @@ -543,6 +612,8 @@ export class TermWrap { } doTerminalWrite(data: string | Uint8Array, setPtyOffset?: number): Promise { + const rawDataLen = data.length; + const writeData = setPtyOffset == null ? this.maybeAddSyntheticAltScreen(data) : data; if (isDev() && this.loaded) { const dataStr = data instanceof Uint8Array ? new TextDecoder().decode(data) : data; this.recentWrites.push({ idx: this.recentWritesCounter++, ts: Date.now(), data: dataStr }); @@ -554,12 +625,12 @@ export class TermWrap { const prtn = new Promise((presolve, _) => { resolve = presolve; }); - this.terminal.write(data, () => { + this.terminal.write(writeData, () => { if (setPtyOffset != null) { this.ptyOffset = setPtyOffset; } else { - this.ptyOffset += data.length; - this.dataBytesProcessed += data.length; + this.ptyOffset += rawDataLen; + this.dataBytesProcessed += rawDataLen; } this.lastUpdated = Date.now(); resolve(); @@ -567,6 +638,18 @@ export class TermWrap { return prtn; } + maybeAddSyntheticAltScreen(data: string | Uint8Array): string | Uint8Array { + const dataStr = data instanceof Uint8Array ? new TextDecoder().decode(data) : data; + const syntheticData = this.syntheticAltScreenTracker.process(dataStr); + if (syntheticData == null) { + return data; + } + if (data instanceof Uint8Array) { + return new TextEncoder().encode(syntheticData); + } + return syntheticData; + } + async loadInitialTerminalData(): Promise { const startTs = Date.now(); const zoneId = this.getZoneId(); @@ -644,7 +727,7 @@ export class TermWrap { const termSize: TermSize = { rows: this.terminal.rows, cols: this.terminal.cols }; console.log("idle timeout term", this.dataBytesProcessed, serializedOutput.length, termSize); fireAndForget(() => - services.BlockService.SaveTerminalState(this.blockId, serializedOutput, "full", this.ptyOffset, termSize) + services.BlockService.SaveTerminalState(this.getZoneId(), serializedOutput, "full", this.ptyOffset, termSize) ); this.dataBytesProcessed = 0; } diff --git a/pkg/jobcontroller/jobcontroller.go b/pkg/jobcontroller/jobcontroller.go index 70200ea671..5e73202caa 100644 --- a/pkg/jobcontroller/jobcontroller.go +++ b/pkg/jobcontroller/jobcontroller.go @@ -80,6 +80,10 @@ var ClearSessionDaemonJobFn func(ctx context.Context, jobId string) // state reconciliation when an SSH connection becomes ready. var OnConnectionUpFn func(ctx context.Context, connName string) +// GetSessionDaemonBlocksFn is set by sessiondaemon so daemon-backed job +// output can still be mirrored into each attached block's terminal file. +var GetSessionDaemonBlocksFn func(daemonId string) []string + type connState struct { actual bool processed bool @@ -829,11 +833,28 @@ func handleAppendJobFile(ctx context.Context, jobId string, fileName string, dat if err != nil { return fmt.Errorf("error getting job: %w", err) } - if job != nil && job.AttachedBlockId != "" && !strings.HasPrefix(job.AttachedBlockId, "daemon:") { - err = doWFSAppend(ctx, waveobj.MakeORef(waveobj.OType_Block, job.AttachedBlockId), fileName, data) - if err != nil { - return fmt.Errorf("error appending to block file: %w", err) + if job == nil || job.AttachedBlockId == "" { + return nil + } + if strings.HasPrefix(job.AttachedBlockId, "daemon:") { + daemonId := strings.TrimPrefix(job.AttachedBlockId, "daemon:") + if GetSessionDaemonBlocksFn == nil { + return nil } + for _, blockId := range GetSessionDaemonBlocksFn(daemonId) { + if blockId == "" { + continue + } + err = doWFSAppend(ctx, waveobj.MakeORef(waveobj.OType_Block, blockId), fileName, data) + if err != nil { + return fmt.Errorf("error appending daemon job output to block file: %w", err) + } + } + return nil + } + err = doWFSAppend(ctx, waveobj.MakeORef(waveobj.OType_Block, job.AttachedBlockId), fileName, data) + if err != nil { + return fmt.Errorf("error appending to block file: %w", err) } return nil } diff --git a/pkg/jobmanager/jobcmd.go b/pkg/jobmanager/jobcmd.go index 8adfabefc4..0c82a690d6 100644 --- a/pkg/jobmanager/jobcmd.go +++ b/pkg/jobmanager/jobcmd.go @@ -7,7 +7,9 @@ import ( "encoding/base64" "fmt" "log" + "os" "os/exec" + "strings" "sync" "syscall" "time" @@ -53,12 +55,7 @@ func MakeJobCmd(jobId string, cmdDef CmdDef) (*JobCmd, error) { return nil, fmt.Errorf("invalid term size: %v", cmdDef.TermSize) } ecmd := exec.Command(cmdDef.Cmd, cmdDef.Args...) - if len(cmdDef.Env) > 0 { - ecmd.Env = make([]string, 0, len(cmdDef.Env)) - for key, val := range cmdDef.Env { - ecmd.Env = append(ecmd.Env, fmt.Sprintf("%s=%s", key, val)) - } - } + ecmd.Env = mergeEnv(os.Environ(), cmdDef.Env) cmdPty, err := pty.StartWithSize(ecmd, &pty.Winsize{Rows: uint16(cmdDef.TermSize.Rows), Cols: uint16(cmdDef.TermSize.Cols)}) if err != nil { return nil, fmt.Errorf("failed to start command: %w", err) @@ -72,6 +69,25 @@ func MakeJobCmd(jobId string, cmdDef CmdDef) (*JobCmd, error) { return jm, nil } +func mergeEnv(baseEnv []string, overrides map[string]string) []string { + envMap := make(map[string]string, len(baseEnv)+len(overrides)) + for _, envVar := range baseEnv { + key, val, found := strings.Cut(envVar, "=") + if !found { + continue + } + envMap[key] = val + } + for key, val := range overrides { + envMap[key] = val + } + rtn := make([]string, 0, len(envMap)) + for key, val := range envMap { + rtn = append(rtn, fmt.Sprintf("%s=%s", key, val)) + } + return rtn +} + func (jm *JobCmd) waitForProcess() { if jm.cmd == nil || jm.cmd.Process == nil { return diff --git a/pkg/service/blockservice/blockservice.go b/pkg/service/blockservice/blockservice.go index d2e6ca39da..1c1aeb204d 100644 --- a/pkg/service/blockservice/blockservice.go +++ b/pkg/service/blockservice/blockservice.go @@ -5,6 +5,7 @@ package blockservice import ( "context" + "errors" "fmt" "time" @@ -37,13 +38,13 @@ func (bs *BlockService) GetControllerStatus(ctx context.Context, blockId string) func (*BlockService) SaveTerminalState_Meta() tsgenmeta.MethodMeta { return tsgenmeta.MethodMeta{ - Desc: "save the terminal state to a blockfile", - ArgNames: []string{"ctx", "blockId", "state", "stateType", "ptyOffset", "termSize"}, + Desc: "save the terminal state to a zone file", + ArgNames: []string{"ctx", "zoneId", "state", "stateType", "ptyOffset", "termSize"}, } } -func (bs *BlockService) SaveTerminalState(ctx context.Context, blockId string, state string, stateType string, ptyOffset int64, termSize waveobj.TermSize) error { - _, err := wstore.DBMustGet[*waveobj.Block](ctx, blockId) +func (bs *BlockService) SaveTerminalState(ctx context.Context, zoneId string, state string, stateType string, ptyOffset int64, termSize waveobj.TermSize) error { + err := ensureTerminalStateZoneExists(ctx, zoneId) if err != nil { return err } @@ -51,8 +52,8 @@ func (bs *BlockService) SaveTerminalState(ctx context.Context, blockId string, s return fmt.Errorf("invalid state type: %q", stateType) } // ignore MakeFile error (already exists is ok) - filestore.WFS.MakeFile(ctx, blockId, "cache:term:"+stateType, nil, wshrpc.FileOpts{}) - err = filestore.WFS.WriteFile(ctx, blockId, "cache:term:"+stateType, []byte(state)) + filestore.WFS.MakeFile(ctx, zoneId, "cache:term:"+stateType, nil, wshrpc.FileOpts{}) + err = filestore.WFS.WriteFile(ctx, zoneId, "cache:term:"+stateType, []byte(state)) if err != nil { return fmt.Errorf("cannot save terminal state: %w", err) } @@ -60,13 +61,31 @@ func (bs *BlockService) SaveTerminalState(ctx context.Context, blockId string, s "ptyoffset": ptyOffset, "termsize": termSize, } - err = filestore.WFS.WriteMeta(ctx, blockId, "cache:term:"+stateType, fileMeta, true) + err = filestore.WFS.WriteMeta(ctx, zoneId, "cache:term:"+stateType, fileMeta, true) if err != nil { return fmt.Errorf("cannot save terminal state meta: %w", err) } return nil } +func ensureTerminalStateZoneExists(ctx context.Context, zoneId string) error { + _, blockErr := wstore.DBMustGet[*waveobj.Block](ctx, zoneId) + if blockErr == nil { + return nil + } + if !errors.Is(blockErr, wstore.ErrNotFound) { + return blockErr + } + _, jobErr := wstore.DBMustGet[*waveobj.Job](ctx, zoneId) + if jobErr == nil { + return nil + } + if !errors.Is(jobErr, wstore.ErrNotFound) { + return jobErr + } + return fmt.Errorf("terminal state zone %q not found", zoneId) +} + func (*BlockService) CleanupOrphanedBlocks_Meta() tsgenmeta.MethodMeta { return tsgenmeta.MethodMeta{ Desc: "queue a layout action to cleanup orphaned blocks in the tab", diff --git a/pkg/sessiondaemon/sessiondaemon.go b/pkg/sessiondaemon/sessiondaemon.go index 99b9086703..97624f8e49 100644 --- a/pkg/sessiondaemon/sessiondaemon.go +++ b/pkg/sessiondaemon/sessiondaemon.go @@ -65,6 +65,9 @@ func init() { jobcontroller.OnConnectionUpFn = func(ctx context.Context, connName string) { Manager.OnConnectionUp(ctx, connName) } + jobcontroller.GetSessionDaemonBlocksFn = func(daemonId string) []string { + return Manager.GetBlocksForDaemon(daemonId) + } } func (sd *SessionDaemon) GetNextInputSeq() (string, int) { From 9ad0a0bb369ac65874a1450cc2636168be917ccd Mon Sep 17 00:00:00 2001 From: lyx-tec Date: Tue, 23 Jun 2026 20:07:35 +0800 Subject: [PATCH 34/36] refactor(session): slim shared session implementation --- frontend/app/block/session-daemon-hooks.ts | 234 +++++++ .../app/block/session-daemon-indicator.tsx | 586 +----------------- frontend/app/block/session-daemon-popup.tsx | 79 +++ frontend/app/block/session-daemon-rows.tsx | 286 +++++++++ frontend/app/block/session-daemon-types.ts | 19 + frontend/app/view/term/term.tsx | 11 - pkg/blockcontroller/blockcontroller.go | 64 +- .../sessiondaemoncontroller.go | 89 +-- pkg/blockcontroller/shellcontroller.go | 12 +- pkg/sessiondaemon/reaper.go | 179 ++++++ pkg/sessiondaemon/sessiondaemon.go | 378 ++++------- pkg/wshrpc/wshserver/wshserver.go | 273 -------- pkg/wshrpc/wshserver/wshserver_session.go | 260 ++++++++ 13 files changed, 1242 insertions(+), 1228 deletions(-) create mode 100644 frontend/app/block/session-daemon-hooks.ts create mode 100644 frontend/app/block/session-daemon-popup.tsx create mode 100644 frontend/app/block/session-daemon-rows.tsx create mode 100644 frontend/app/block/session-daemon-types.ts create mode 100644 pkg/sessiondaemon/reaper.go create mode 100644 pkg/wshrpc/wshserver/wshserver_session.go diff --git a/frontend/app/block/session-daemon-hooks.ts b/frontend/app/block/session-daemon-hooks.ts new file mode 100644 index 0000000000..2d2b82c5e3 --- /dev/null +++ b/frontend/app/block/session-daemon-hooks.ts @@ -0,0 +1,234 @@ +// Copyright 2026, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +import { globalStore } from "@/app/store/jotaiStore"; +import { RpcApi } from "@/app/store/wshclientapi"; +import { TabRpcClient } from "@/app/store/wshrpcutil"; +import { useWaveEnv } from "@/app/waveenv/waveenv"; +import { fireAndForget } from "@/util/util"; +import { autoUpdate, flip, offset, shift, useFloating } from "@floating-ui/react"; +import * as jotai from "jotai"; +import type * as React from "react"; +import { Dispatch, SetStateAction, useCallback, useEffect, useMemo, useRef, useState } from "react"; +import { BlockEnv } from "./blockenv"; +import { SessionDisplayData, SessionInfo } from "./session-daemon-types"; + +const EmptySessionDisplayAtom = jotai.atom({ name: null, isanonymous: true }); +const sessionDisplayAtomMap = new Map>(); + +function getSessionDisplayAtom(daemonId: string): jotai.PrimitiveAtom { + let a = sessionDisplayAtomMap.get(daemonId); + if (!a) { + a = jotai.atom({ name: null, isanonymous: true }); + sessionDisplayAtomMap.set(daemonId, a); + } + return a; +} + +export interface SessionDaemonIndicatorState { + daemonId: string; + visible: boolean; + showPopup: boolean; + setShowPopup: Dispatch>; + sessions: SessionInfo[]; + sameConnSessions: SessionInfo[]; + sessionDisplay: SessionDisplayData; + editingId: string; + editName: string; + setEditName: Dispatch>; + creating: boolean; + showCreateInput: boolean; + setShowCreateInput: Dispatch>; + newSessionName: string; + setNewSessionName: Dispatch>; + popupRef: React.RefObject; + iconRef: React.RefObject; + editInputRef: React.RefObject; + createInputRef: React.RefObject; + floatingStyles: React.CSSProperties; + handleAttach: (targetDaemonId: string) => void; + handleStartEdit: (daemonId: string, currentName: string) => void; + handleSaveEdit: () => void; + handleCancelEdit: () => void; + handleCreateAndAttach: (name?: string) => Promise; + handleDelete: (daemonId: string) => void; +} + +export function useSessionDaemonIndicator(blockId: string): SessionDaemonIndicatorState { + const waveEnv = useWaveEnv(); + const daemonId = jotai.useAtomValue(waveEnv.getBlockMetaKeyAtom(blockId, "session:daemonid")); + const connName = jotai.useAtomValue(waveEnv.getBlockMetaKeyAtom(blockId, "connection")); + const [showPopup, setShowPopup] = useState(false); + const [sessions, setSessions] = useState([]); + const [editingId, setEditingId] = useState(null); + const [editName, setEditName] = useState(""); + const [creating, setCreating] = useState(false); + const creatingRef = useRef(false); + const [showCreateInput, setShowCreateInput] = useState(false); + const [newSessionName, setNewSessionName] = useState(""); + const createInputRef = useRef(null); + const editInputRef = useRef(null); + const popupRef = useRef(null); + const iconRef = useRef(null); + const sessionDisplayAtom = daemonId ? getSessionDisplayAtom(daemonId) : EmptySessionDisplayAtom; + const sessionDisplay = jotai.useAtomValue(sessionDisplayAtom); + const isSshConn = connName && !connName.startsWith("local") && !connName.startsWith("wsl://"); + const visible = !!daemonId || isSshConn; + const sameConnSessions = useMemo(() => sessions.filter((s) => s.connection === connName), [sessions, connName]); + const { floatingStyles } = useFloating({ + elements: { + reference: iconRef.current, + floating: popupRef.current, + }, + open: showPopup, + onOpenChange: setShowPopup, + placement: "bottom-end", + middleware: [offset(6), flip(), shift({ padding: 12 })], + whileElementsMounted: autoUpdate, + }); + + useEffect(() => { + if (!showPopup) return; + fireAndForget(async () => { + try { + const list = await RpcApi.SessionListCommand(TabRpcClient, { showall: true }); + setSessions((list ?? []) as SessionInfo[]); + } catch (e) { + console.log("error loading session list:", e); + } + }); + }, [showPopup]); + + useEffect(() => { + if (!daemonId) return; + fireAndForget(async () => { + try { + const info = await RpcApi.SessionInfoCommand(TabRpcClient, { daemonid: daemonId }); + if (info) { + const atom = getSessionDisplayAtom(daemonId); + globalStore.set(atom, { name: info.name || null, isanonymous: info.isanonymous }); + } + } catch (_) {} + }); + }, [daemonId]); + + useEffect(() => { + function handleClick(e: MouseEvent) { + if ( + popupRef.current && + !popupRef.current.contains(e.target as Node) && + iconRef.current && + !iconRef.current.contains(e.target as Node) + ) { + setShowPopup(false); + } + } + if (showPopup) { + document.addEventListener("mousedown", handleClick); + return () => document.removeEventListener("mousedown", handleClick); + } + }, [showPopup]); + + const handleAttach = useCallback((targetDaemonId: string) => { + if (targetDaemonId === daemonId) return; + if (editingId) return; + fireAndForget(async () => { + try { + await RpcApi.SessionAttachCommand(TabRpcClient, { daemonid: targetDaemonId, blockid: blockId, currentdaemonid: daemonId ?? undefined }); + setShowPopup(false); + } catch (e) { + console.log("error switching session:", e); + } + }); + }, [daemonId, editingId, blockId]); + + const handleStartEdit = useCallback((daemonId: string, currentName: string) => { + setEditingId(daemonId); + setEditName(currentName || ""); + setTimeout(() => editInputRef.current?.focus(), 0); + }, []); + + const handleSaveEdit = useCallback(() => { + const id = editingId; + const name = editName.trim(); + if (!id) return; + setEditingId(null); + const atom = getSessionDisplayAtom(id); + globalStore.set(atom, { name: name || null, isanonymous: !name }); + fireAndForget(async () => { + try { + await RpcApi.SessionTagCommand(TabRpcClient, { daemonid: id, name: name || "Unnamed session" }); + const list = await RpcApi.SessionListCommand(TabRpcClient, { showall: true }); + setSessions((list ?? []) as SessionInfo[]); + } catch (e) { + console.log("error renaming session:", e); + } + }); + }, [editingId, editName]); + + const handleCancelEdit = useCallback(() => { + setEditingId(null); + }, []); + + const handleCreateAndAttach = useCallback(async (name?: string) => { + if (!connName || creatingRef.current) return; + creatingRef.current = true; + setCreating(true); + try { + const info = await RpcApi.SessionCreateCommand(TabRpcClient, { connection: connName, name }); + if (info?.daemonid) { + await RpcApi.SessionAttachCommand(TabRpcClient, { + daemonid: info.daemonid, + blockid: blockId, + currentdaemonid: daemonId ?? undefined, + }); + setShowPopup(false); + } + } catch (e) { + console.log("error creating session:", e); + } finally { + creatingRef.current = false; + setCreating(false); + } + }, [connName, blockId, daemonId]); + + const handleDelete = useCallback((daemonId: string) => { + fireAndForget(async () => { + try { + await RpcApi.SessionDeleteCommand(TabRpcClient, { daemonid: daemonId }); + setSessions((prev) => prev.filter((x) => x.daemonid !== daemonId)); + } catch (e) { + console.log("error closing session:", e); + } + }); + }, []); + + return { + daemonId, + visible, + showPopup, + setShowPopup, + sessions, + sameConnSessions, + sessionDisplay, + editingId, + editName, + setEditName, + creating, + showCreateInput, + setShowCreateInput, + newSessionName, + setNewSessionName, + popupRef, + iconRef, + editInputRef, + createInputRef, + floatingStyles, + handleAttach, + handleStartEdit, + handleSaveEdit, + handleCancelEdit, + handleCreateAndAttach, + handleDelete, + }; +} diff --git a/frontend/app/block/session-daemon-indicator.tsx b/frontend/app/block/session-daemon-indicator.tsx index b13e1b16ee..d6c868f79c 100644 --- a/frontend/app/block/session-daemon-indicator.tsx +++ b/frontend/app/block/session-daemon-indicator.tsx @@ -1,276 +1,34 @@ // Copyright 2026, Command Line Inc. // SPDX-License-Identifier: Apache-2.0 -import { RpcApi } from "@/app/store/wshclientapi"; -import { TabRpcClient } from "@/app/store/wshrpcutil"; -import { globalStore } from "@/app/store/jotaiStore"; -import { useWaveEnv } from "@/app/waveenv/waveenv"; -import { fireAndForget } from "@/util/util"; -import { autoUpdate, flip, FloatingPortal, offset, shift, useFloating } from "@floating-ui/react"; -import * as jotai from "jotai"; -import { useCallback, useEffect, useMemo, useRef, useState } from "react"; -import { BlockEnv } from "./blockenv"; - -interface SessionDisplayData { - name: string | null; - isanonymous: boolean; -} - -const sessionDisplayAtomMap = new Map>(); - -function getSessionDisplayAtom(daemonId: string): jotai.PrimitiveAtom { - let a = sessionDisplayAtomMap.get(daemonId); - if (!a) { - a = jotai.atom({ name: null, isanonymous: true }); - sessionDisplayAtomMap.set(daemonId, a); - } - return a; -} - -function formatCreatedTime(ms: number | undefined): string { - if (ms == null) return ""; - const d = new Date(ms); - const now = new Date(); - const diffMs = now.getTime() - d.getTime(); - const diffMin = Math.floor(diffMs / 60000); - if (diffMin < 1) return "just now"; - if (diffMin < 60) return `${diffMin}m ago`; - const diffHr = Math.floor(diffMin / 60); - if (diffHr < 24) return `${diffHr}h ago`; - const diffDay = Math.floor(diffHr / 24); - if (diffDay < 7) return `${diffDay}d ago`; - return d.toLocaleDateString(undefined, { month: "short", day: "numeric", year: "numeric" }); -} - -interface SessionInfo { - daemonid: string; - name: string; - connection: string; - status: string; - isanonymous: boolean; - createdat?: number; - blocks?: string[]; - jobid?: string; - lastactiveat?: number; -} +import { useSessionDaemonIndicator } from "./session-daemon-hooks"; +import { SessionDaemonPopup } from "./session-daemon-popup"; interface SessionDaemonIndicatorProps { blockId: string; useTermHeader: boolean; } -const popupStyle = { - zIndex: 100, - width: "min(420px, calc(100vw - 24px))", - maxHeight: 360, - overflowY: "auto", - background: "color-mix(in srgb, var(--bg-secondary, #1e1e2e) 96%, black)", - border: "1px solid color-mix(in srgb, var(--border-primary, #45475a) 78%, transparent)", - borderRadius: 10, - padding: 8, - boxShadow: "0 18px 42px rgba(0,0,0,0.42), 0 2px 8px rgba(0,0,0,0.28)", -} as const; - -const truncateStyle = { - minWidth: 0, - overflow: "hidden", - textOverflow: "ellipsis", - whiteSpace: "nowrap", -} as const; - -function SessionStatusPill({ status }: { status: string }) { - const isRunning = status === "running"; - return ( - - - {status || "unknown"} - - ); -} - export function SessionDaemonIndicator({ blockId, useTermHeader }: SessionDaemonIndicatorProps) { - const waveEnv = useWaveEnv(); - const daemonId = jotai.useAtomValue(waveEnv.getBlockMetaKeyAtom(blockId, "session:daemonid")); - const connName = jotai.useAtomValue(waveEnv.getBlockMetaKeyAtom(blockId, "connection")); - const [showPopup, setShowPopup] = useState(false); - const [sessions, setSessions] = useState([]); - const [editingId, setEditingId] = useState(null); - const [editName, setEditName] = useState(""); - const [creating, setCreating] = useState(false); - const creatingRef = useRef(false); - const [showCreateInput, setShowCreateInput] = useState(false); - const [newSessionName, setNewSessionName] = useState(""); - const createInputRef = useRef(null); - const sessionDisplayAtom = daemonId ? getSessionDisplayAtom(daemonId) : null; - const sessionDisplay = jotai.useAtomValue(sessionDisplayAtom ?? jotai.atom({ name: null, isanonymous: true })); - const editInputRef = useRef(null); - const popupRef = useRef(null); - const iconRef = useRef(null); - const { floatingStyles } = useFloating({ - elements: { - reference: iconRef.current, - floating: popupRef.current, - }, - open: showPopup, - onOpenChange: setShowPopup, - placement: "bottom-end", - middleware: [offset(6), flip(), shift({ padding: 12 })], - whileElementsMounted: autoUpdate, - }); - - useEffect(() => { - if (!showPopup) return; - fireAndForget(async () => { - try { - const list = await RpcApi.SessionListCommand(TabRpcClient, { showall: true }); - setSessions((list ?? []) as SessionInfo[]); - } catch (e) { - console.log("error loading session list:", e); - } - }); - }, [showPopup]); - - useEffect(() => { - if (!daemonId) return; - fireAndForget(async () => { - try { - const info = await RpcApi.SessionInfoCommand(TabRpcClient, { daemonid: daemonId }); - if (info) { - const atom = getSessionDisplayAtom(daemonId); - globalStore.set(atom, { name: info.name || null, isanonymous: info.isanonymous }); - } - } catch (_) {} - }); - }, [daemonId]); - - useEffect(() => { - function handleClick(e: MouseEvent) { - if ( - popupRef.current && - !popupRef.current.contains(e.target as Node) && - iconRef.current && - !iconRef.current.contains(e.target as Node) - ) { - setShowPopup(false); - } - } - if (showPopup) { - document.addEventListener("mousedown", handleClick); - return () => document.removeEventListener("mousedown", handleClick); - } - }, [showPopup]); - - const handleAttach = useCallback((targetDaemonId: string) => { - if (targetDaemonId === daemonId) return; - if (editingId) return; - fireAndForget(async () => { - try { - await RpcApi.SessionAttachCommand(TabRpcClient, { daemonid: targetDaemonId, blockid: blockId, currentdaemonid: daemonId ?? undefined }); - setShowPopup(false); - } catch (e) { - console.log("error switching session:", e); - } - }); - }, [daemonId, editingId, blockId]); - - const handleStartEdit = useCallback((daemonId: string, currentName: string) => { - setEditingId(daemonId); - setEditName(currentName || ""); - setTimeout(() => editInputRef.current?.focus(), 0); - }, []); - - const handleSaveEdit = useCallback(() => { - const id = editingId; - const name = editName.trim(); - if (!id) return; - setEditingId(null); - const atom = getSessionDisplayAtom(id); - globalStore.set(atom, { name: name || null, isanonymous: !name }); - fireAndForget(async () => { - try { - await RpcApi.SessionTagCommand(TabRpcClient, { daemonid: id, name: name || "Unnamed session" }); - const list = await RpcApi.SessionListCommand(TabRpcClient, { showall: true }); - setSessions((list ?? []) as SessionInfo[]); - } catch (e) { - console.log("error renaming session:", e); - } - }); - }, [editingId, editName]); - - const handleCancelEdit = useCallback(() => { - setEditingId(null); - }, []); - - const handleCreateAndAttach = useCallback(async (name?: string) => { - if (!connName || creatingRef.current) return; - creatingRef.current = true; - setCreating(true); - try { - const info = await RpcApi.SessionCreateCommand(TabRpcClient, { connection: connName }); - if (info?.daemonid) { - if (name) { - await RpcApi.SessionTagCommand(TabRpcClient, { daemonid: info.daemonid, name }); - } - await RpcApi.SessionAttachCommand(TabRpcClient, { - daemonid: info.daemonid, - blockid: blockId, - currentdaemonid: daemonId ?? undefined, - }); - setShowPopup(false); - } - } catch (e) { - console.log("error creating session:", e); - } finally { - creatingRef.current = false; - setCreating(false); - } - }, [connName, blockId, daemonId]); + const state = useSessionDaemonIndicator(blockId); if (!useTermHeader) { return null; } - const isSshConn = connName && !connName.startsWith("local") && !connName.startsWith("wsl://"); - const visible = !!daemonId || isSshConn; - - const sameConnSessions = useMemo(() => sessions.filter((s) => s.connection === connName), [sessions, connName]); - return ( <>
setShowPopup((v) => !v)} - style={{ display: visible ? "inline-flex" : "none", alignItems: "center", gap: 4 }} + title={state.daemonId ? `Session: ${state.daemonId}` : "No session attached"} + onClick={() => state.setShowPopup((v) => !v)} + style={{ display: state.visible ? "inline-flex" : "none", alignItems: "center", gap: 4 }} > - - {daemonId ? ( + + {state.daemonId ? ( - {sessionDisplay.isanonymous ? daemonId.slice(0, 8) : (sessionDisplay.name || daemonId.slice(0, 8))} + {state.sessionDisplay.isanonymous ? state.daemonId.slice(0, 8) : (state.sessionDisplay.name || state.daemonId.slice(0, 8))} ) : ( @@ -278,329 +36,7 @@ export function SessionDaemonIndicator({ blockId, useTermHeader }: SessionDaemon )}
- {showPopup && ( - -
e.stopPropagation()} - onFocusCapture={(e) => e.stopPropagation()} - onClick={(e) => e.stopPropagation()} - > -
-
- - - Sessions - -
- - {sessions.length} - -
- {showCreateInput ? ( -
- - setNewSessionName(e.target.value)} - onKeyDown={(e) => { - if (e.key === "Enter") { - const name = newSessionName.trim(); - handleCreateAndAttach(name || undefined); - setShowCreateInput(false); - setNewSessionName(""); - } - if (e.key === "Escape") { - setShowCreateInput(false); - setNewSessionName(""); - } - }} - placeholder="Session name (optional)" - style={{ - flex: 1, - background: "transparent", - border: "none", - outline: "none", - color: "#7dd3fc", - fontSize: 13, - fontWeight: 600, - }} - /> -
- ) : ( -
{ - setShowCreateInput(true); - setTimeout(() => createInputRef.current?.focus(), 0); - }} - style={{ - display: "flex", - alignItems: "center", - gap: 8, - padding: "8px 10px", - marginBottom: 4, - cursor: creating ? "default" : "pointer", - borderRadius: 8, - background: "rgba(56, 189, 248, 0.08)", - border: "1px solid rgba(56, 189, 248, 0.18)", - opacity: creating ? 0.5 : 1, - }} - > - - - {creating ? "Creating..." : "Create new session"} - -
- )} - {sameConnSessions.length === 0 && ( -
- No sessions on this connection -
- )} - {sameConnSessions.map((s) => { - const isActive = s.daemonid === daemonId; - const blockCount = s.blocks?.length ?? 0; - const canClose = blockCount === 0 || s.status === "done"; - const displayStatus = s.status === "done" ? "done" : blockCount === 0 ? "idle" : s.status; - return ( -
handleAttach(s.daemonid)} - title={`${s.name || s.connection} · ${s.status}`} - style={{ - display: "grid", - gridTemplateColumns: "minmax(0, 1fr) auto", - gap: 10, - padding: "9px 10px", - marginTop: 4, - cursor: isActive ? "default" : "pointer", - borderRadius: 8, - fontSize: 13, - background: isActive ? "rgba(56, 189, 248, 0.12)" : "transparent", - border: isActive - ? "1px solid rgba(56, 189, 248, 0.24)" - : "1px solid transparent", - }} - onMouseEnter={(e) => { - if (!isActive) { - e.currentTarget.style.background = "rgba(148, 163, 184, 0.08)"; - } - }} - onMouseLeave={(e) => { - if (!isActive) { - e.currentTarget.style.background = "transparent"; - } - }} - > -
- - - -
- {editingId === s.daemonid ? ( - setEditName(e.target.value)} - onKeyDown={(e) => { - if (e.key === "Enter") handleSaveEdit(); - if (e.key === "Escape") handleCancelEdit(); - }} - onBlur={handleSaveEdit} - onClick={(e) => e.stopPropagation()} - style={{ - width: "100%", - fontWeight: 650, - color: "var(--text-primary)", - fontSize: 14, - lineHeight: "20px", - background: "rgba(148, 163, 184, 0.12)", - border: "1px solid rgba(56, 189, 248, 0.3)", - borderRadius: 4, - padding: "1px 6px", - outline: "none", - }} - /> - ) : ( -
{ - e.stopPropagation(); - handleStartEdit(s.daemonid, s.name); - }} - style={{ - display: "flex", - alignItems: "center", - gap: 6, - cursor: "text", - }} - title="Click to rename" - > - - {s.name || "Unnamed session"} - - -
- )} - {s.connection && ( -
- {s.connection} -
- )} -
- Sess: {s.daemonid.slice(0, 8)} -
- {s.jobid && ( -
- Job: {s.jobid.slice(0, 8)} -
- )} -
-
-
- - - {formatCreatedTime(s.createdat)} - - {canClose ? ( - { - e.stopPropagation(); - fireAndForget(async () => { - try { - await RpcApi.SessionDeleteCommand(TabRpcClient, { - daemonid: s.daemonid, - }); - setSessions((prev) => prev.filter((x) => x.daemonid !== s.daemonid)); - } catch (e) { - console.log("error closing session:", e); - } - }); - }} - style={{ - fontSize: 11, - color: "var(--text-muted)", - cursor: "pointer", - opacity: 0.6, - display: "inline-flex", - alignItems: "center", - gap: 3, - }} - title="Close session" - > - - Close - - ) : ( - - {isActive ? "active" : `${blockCount} block${blockCount === 1 ? "" : "s"}`} - - )} -
-
- ); - })} -
-
- )} + ); } diff --git a/frontend/app/block/session-daemon-popup.tsx b/frontend/app/block/session-daemon-popup.tsx new file mode 100644 index 0000000000..3daa92302a --- /dev/null +++ b/frontend/app/block/session-daemon-popup.tsx @@ -0,0 +1,79 @@ +// Copyright 2026, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +import { FloatingPortal } from "@floating-ui/react"; +import { SessionDaemonIndicatorState } from "./session-daemon-hooks"; +import { SessionCreateRow, SessionRow } from "./session-daemon-rows"; + +const popupStyle = { + zIndex: 100, + width: "min(420px, calc(100vw - 24px))", + maxHeight: 360, + overflowY: "auto", + background: "color-mix(in srgb, var(--bg-secondary, #1e1e2e) 96%, black)", + border: "1px solid color-mix(in srgb, var(--border-primary, #45475a) 78%, transparent)", + borderRadius: 10, + padding: 8, + boxShadow: "0 18px 42px rgba(0,0,0,0.42), 0 2px 8px rgba(0,0,0,0.28)", +} as const; + +interface SessionDaemonPopupProps { + state: SessionDaemonIndicatorState; +} + +export function SessionDaemonPopup({ state }: SessionDaemonPopupProps) { + if (!state.showPopup) { + return null; + } + return ( + +
e.stopPropagation()} + onFocusCapture={(e) => e.stopPropagation()} + onClick={(e) => e.stopPropagation()} + > +
+
+ + + Sessions + +
+ + {state.sessions.length} + +
+ + {state.sameConnSessions.length === 0 && ( +
+ No sessions on this connection +
+ )} + {state.sameConnSessions.map((session) => ( + + ))} +
+
+ ); +} diff --git a/frontend/app/block/session-daemon-rows.tsx b/frontend/app/block/session-daemon-rows.tsx new file mode 100644 index 0000000000..8ba52e86ca --- /dev/null +++ b/frontend/app/block/session-daemon-rows.tsx @@ -0,0 +1,286 @@ +// Copyright 2026, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +import { SessionDaemonIndicatorState } from "./session-daemon-hooks"; +import { SessionInfo } from "./session-daemon-types"; + +const truncateStyle = { + minWidth: 0, + overflow: "hidden", + textOverflow: "ellipsis", + whiteSpace: "nowrap", +} as const; + +function formatCreatedTime(ms: number | undefined): string { + if (ms == null) return ""; + const d = new Date(ms); + const now = new Date(); + const diffMs = now.getTime() - d.getTime(); + const diffMin = Math.floor(diffMs / 60000); + if (diffMin < 1) return "just now"; + if (diffMin < 60) return `${diffMin}m ago`; + const diffHr = Math.floor(diffMin / 60); + if (diffHr < 24) return `${diffHr}h ago`; + const diffDay = Math.floor(diffHr / 24); + if (diffDay < 7) return `${diffDay}d ago`; + return d.toLocaleDateString(undefined, { month: "short", day: "numeric", year: "numeric" }); +} + +function SessionStatusPill({ status }: { status: string }) { + const isRunning = status === "running"; + return ( + + + {status || "unknown"} + + ); +} + +interface SessionCreateRowProps { + state: SessionDaemonIndicatorState; +} + +export function SessionCreateRow({ state }: SessionCreateRowProps) { + if (state.showCreateInput) { + return ( +
+ + state.setNewSessionName(e.target.value)} + onKeyDown={(e) => { + if (e.key === "Enter") { + const name = state.newSessionName.trim(); + state.handleCreateAndAttach(name || undefined); + state.setShowCreateInput(false); + state.setNewSessionName(""); + } + if (e.key === "Escape") { + state.setShowCreateInput(false); + state.setNewSessionName(""); + } + }} + placeholder="Session name (optional)" + style={{ + flex: 1, + background: "transparent", + border: "none", + outline: "none", + color: "#7dd3fc", + fontSize: 13, + fontWeight: 600, + }} + /> +
+ ); + } + return ( +
{ + state.setShowCreateInput(true); + setTimeout(() => state.createInputRef.current?.focus(), 0); + }} + style={{ + display: "flex", + alignItems: "center", + gap: 8, + padding: "8px 10px", + marginBottom: 4, + cursor: state.creating ? "default" : "pointer", + borderRadius: 8, + background: "rgba(56, 189, 248, 0.08)", + border: "1px solid rgba(56, 189, 248, 0.18)", + opacity: state.creating ? 0.5 : 1, + }} + > + + + {state.creating ? "Creating..." : "Create new session"} + +
+ ); +} + +interface SessionRowProps { + session: SessionInfo; + state: SessionDaemonIndicatorState; +} + +export function SessionRow({ session, state }: SessionRowProps) { + const isActive = session.daemonid === state.daemonId; + const blockCount = session.blocks?.length ?? 0; + const canClose = blockCount === 0 || session.status === "done"; + const displayStatus = session.status === "done" ? "done" : blockCount === 0 ? "idle" : session.status; + return ( +
state.handleAttach(session.daemonid)} + title={`${session.name || session.connection} · ${session.status}`} + style={{ + display: "grid", + gridTemplateColumns: "minmax(0, 1fr) auto", + gap: 10, + padding: "9px 10px", + marginTop: 4, + cursor: isActive ? "default" : "pointer", + borderRadius: 8, + fontSize: 13, + background: isActive ? "rgba(56, 189, 248, 0.12)" : "transparent", + border: isActive ? "1px solid rgba(56, 189, 248, 0.24)" : "1px solid transparent", + }} + onMouseEnter={(e) => { + if (!isActive) { + e.currentTarget.style.background = "rgba(148, 163, 184, 0.08)"; + } + }} + onMouseLeave={(e) => { + if (!isActive) { + e.currentTarget.style.background = "transparent"; + } + }} + > +
+ + + +
+ {state.editingId === session.daemonid ? ( + state.setEditName(e.target.value)} + onKeyDown={(e) => { + if (e.key === "Enter") state.handleSaveEdit(); + if (e.key === "Escape") state.handleCancelEdit(); + }} + onBlur={state.handleSaveEdit} + onClick={(e) => e.stopPropagation()} + style={{ + width: "100%", + fontWeight: 650, + color: "var(--text-primary)", + fontSize: 14, + lineHeight: "20px", + background: "rgba(148, 163, 184, 0.12)", + border: "1px solid rgba(56, 189, 248, 0.3)", + borderRadius: 4, + padding: "1px 6px", + outline: "none", + }} + /> + ) : ( +
{ + e.stopPropagation(); + state.handleStartEdit(session.daemonid, session.name); + }} + style={{ display: "flex", alignItems: "center", gap: 6, cursor: "text" }} + title="Click to rename" + > + + {session.name || "Unnamed session"} + + +
+ )} + {session.connection && ( +
+ {session.connection} +
+ )} +
+ Sess: {session.daemonid.slice(0, 8)} +
+ {session.jobid && ( +
+ Job: {session.jobid.slice(0, 8)} +
+ )} +
+
+
+ + + {formatCreatedTime(session.createdat)} + + {canClose ? ( + { + e.stopPropagation(); + state.handleDelete(session.daemonid); + }} + style={{ + fontSize: 11, + color: "var(--text-muted)", + cursor: "pointer", + opacity: 0.6, + display: "inline-flex", + alignItems: "center", + gap: 3, + }} + title="Close session" + > + + Close + + ) : ( + + {isActive ? "active" : `${blockCount} block${blockCount === 1 ? "" : "s"}`} + + )} +
+
+ ); +} diff --git a/frontend/app/block/session-daemon-types.ts b/frontend/app/block/session-daemon-types.ts new file mode 100644 index 0000000000..df40712215 --- /dev/null +++ b/frontend/app/block/session-daemon-types.ts @@ -0,0 +1,19 @@ +// Copyright 2026, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +export interface SessionDisplayData { + name: string | null; + isanonymous: boolean; +} + +export interface SessionInfo { + daemonid: string; + name: string; + connection: string; + status: string; + isanonymous: boolean; + createdat?: number; + blocks?: string[]; + jobid?: string; + lastactiveat?: number; +} diff --git a/frontend/app/view/term/term.tsx b/frontend/app/view/term/term.tsx index 3792cf821f..fcaf8f5021 100644 --- a/frontend/app/view/term/term.tsx +++ b/frontend/app/view/term/term.tsx @@ -361,14 +361,10 @@ const TerminalView = ({ blockId, model }: ViewComponentProps) => React.useEffect(() => { const termWrap = model.termRef.current; const daemonId = blockData?.meta?.["session:daemonid"]; - const blockJobId = blockData?.jobid; - console.log("[term:daemon-effect] block=%s daemon=%s blockJob=%s zoneId=%s", - blockId, daemonId || "(none)", blockJobId || "(none)", termWrap?.zoneId || "(no-termwrap)"); if (termWrap == null) { return; } if (!daemonId) { - console.log("[term:daemon-effect] block=%s no daemon, detaching zoneId=%s", blockId, termWrap.zoneId); fireAndForget(termWrap.detachFromDaemon.bind(termWrap)); return undefined; } @@ -384,21 +380,14 @@ const TerminalView = ({ blockId, model }: ViewComponentProps) => // This handles the race where SessionAttach sends the WaveObj update // before the job is started by the resync controller (~20ms window). if (info.status === "init" && retry < 15) { - console.log("[term:daemon-effect] block=%s daemon=%s jobId not ready, will retry (attempt %d, status=%s)", - blockId, daemonId, retry, info.status); retryTimer = setTimeout(() => tryAttach(retry + 1), 200); return; } - console.log("[term:daemon-effect] block=%s daemon=%s jobId not ready after %d retries, info=%o", - blockId, daemonId, retry, info); return; } if (termWrap.zoneId === info.jobid) { - console.log("[term:daemon-effect] block=%s zoneId already=%s, skipping attach", blockId, info.jobid); return; } - console.log("[term:daemon-effect] block=%s attaching zoneId %s -> %s", - blockId, termWrap.zoneId, info.jobid); await termWrap.attachToDaemon(info.jobid); } catch (e) { if (!cancelled) { diff --git a/pkg/blockcontroller/blockcontroller.go b/pkg/blockcontroller/blockcontroller.go index b74b1d853f..d7e82a02f9 100644 --- a/pkg/blockcontroller/blockcontroller.go +++ b/pkg/blockcontroller/blockcontroller.go @@ -10,7 +10,6 @@ import ( "fmt" "io/fs" "log" - "runtime" "strings" "sync" "time" @@ -27,7 +26,9 @@ import ( "github.com/wavetermdev/waveterm/pkg/waveobj" "github.com/wavetermdev/waveterm/pkg/wcore" "github.com/wavetermdev/waveterm/pkg/wps" + "github.com/wavetermdev/waveterm/pkg/wshrpc" "github.com/wavetermdev/waveterm/pkg/wshrpc/wshclient" + "github.com/wavetermdev/waveterm/pkg/wshutil" "github.com/wavetermdev/waveterm/pkg/wslconn" "github.com/wavetermdev/waveterm/pkg/wstore" ) @@ -340,14 +341,11 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts // Check if we need to start/restart status := controller.GetRuntimeStatus() if status.ShellProcStatus == Status_Running { - // For SessionDaemonController, verify the job is still alive. - // The remote job manager may have died, leaving the daemon with a stale JobId. - // If so, clear the JobId so Start() runs again on the next ResyncController call. if sdc, ok := controller.(*SessionDaemonController); ok { if daemon := sessiondaemon.Manager.Get(sdc.DaemonId); daemon != nil && daemon.JobId != "" { - dbDaemon, dbErr := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, sdc.DaemonId) - if dbErr != nil { - log.Printf("[sessiondaemon] resync: daemon=%s block=%s missing DB record, falling back to shell: %v", sdc.DaemonId, blockId, dbErr) + ensureResult, err := sessiondaemon.Manager.EnsureJobState(ctx, sdc.DaemonId, rtOpts, false) + if err != nil { + log.Printf("[sessiondaemon] resync: daemon=%s block=%s ensure failed, falling back to shell: %v", sdc.DaemonId, blockId, err) err = fallbackSessionDaemonToShell(ctx, sdc.DaemonId, blockId) if err != nil { return err @@ -356,26 +354,18 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts daemonId = "" controller = replaceBlockControllerWithShell(tabId, blockId, controllerName, connName) status = controller.GetRuntimeStatus() - } else { - jobState, stateErr := sessiondaemon.ClassifyJobManagerState(ctx, dbDaemon) - if stateErr != nil { - return fmt.Errorf("check session daemon job manager: %w", stateErr) - } - if jobState == sessiondaemon.JobManagerState_Dead { - log.Printf("[sessiondaemon] resync: daemon=%s block=%s job manager gone, falling back to shell", sdc.DaemonId, blockId) - err = fallbackSessionDaemonToShell(ctx, sdc.DaemonId, blockId) - if err != nil { - return err - } - daemonId = "" - existing = nil - controller = replaceBlockControllerWithShell(tabId, blockId, controllerName, connName) - status = controller.GetRuntimeStatus() - } else if jobState == sessiondaemon.JobManagerState_Unknown { - log.Printf("[sessiondaemon] resync: daemon=%s block=%s job state unknown, waiting", sdc.DaemonId, blockId) - } else { - log.Printf("[sessiondaemon] resync: daemon=%s block=%s job=%s alive, skipping", sdc.DaemonId, blockId, daemon.JobId) + } else if ensureResult.Action == sessiondaemon.DaemonEnsure_Fallback { + log.Printf("[sessiondaemon] resync: daemon=%s block=%s job manager gone, falling back to shell", sdc.DaemonId, blockId) + err = fallbackSessionDaemonToShell(ctx, sdc.DaemonId, blockId) + if err != nil { + return err } + daemonId = "" + existing = nil + controller = replaceBlockControllerWithShell(tabId, blockId, controllerName, connName) + status = controller.GetRuntimeStatus() + } else if ensureResult.Action == sessiondaemon.DaemonEnsure_Wait { + log.Printf("[sessiondaemon] resync: daemon=%s block=%s job state unknown, waiting", sdc.DaemonId, blockId) } } } @@ -417,17 +407,11 @@ func stopBlockController(blockId string) { if controller == nil { return } - stackBuf := make([]byte, 4096) - stackLen := runtime.Stack(stackBuf, false) - log.Printf("[sessiondaemon] stopBlockController: block=%s stack:\n%s", blockId, string(stackBuf[:stackLen])) controller.Stop(true, Status_Done, true) wstore.DeleteRTInfo(waveobj.MakeORef(waveobj.OType_Block, blockId)) } func DestroyBlockController(blockId string) { - stackBuf := make([]byte, 4096) - stackLen := runtime.Stack(stackBuf, false) - log.Printf("[sessiondaemon] DestroyBlockController: block=%s stack:\n%s", blockId, string(stackBuf[:stackLen])) stopBlockController(blockId) deleteController(blockId) } @@ -688,3 +672,19 @@ func makeSwapToken(ctx context.Context, logCtx context.Context, blockId string, token.ScriptText = getCustomInitScript(logCtx, blockMeta, remoteName, shellType) return token } + +func attachRpcContextToSwapToken(swapToken *shellutil.TokenSwapEntry, blockId string, connName string, sockName string) error { + rpcContext := wshrpc.RpcContext{ + ProcRoute: true, + SockName: sockName, + BlockId: blockId, + Conn: connName, + } + jwtStr, err := wshutil.MakeClientJWTToken(rpcContext) + if err != nil { + return fmt.Errorf("error making jwt token: %w", err) + } + swapToken.RpcContext = &rpcContext + swapToken.Env[wshutil.WaveJwtTokenVarName] = jwtStr + return nil +} diff --git a/pkg/blockcontroller/sessiondaemoncontroller.go b/pkg/blockcontroller/sessiondaemoncontroller.go index db56f31e80..2bff0e1dc4 100644 --- a/pkg/blockcontroller/sessiondaemoncontroller.go +++ b/pkg/blockcontroller/sessiondaemoncontroller.go @@ -62,8 +62,6 @@ func (sdc *SessionDaemonController) getNextInputSeq() (string, int) { } func (sdc *SessionDaemonController) Start(ctx context.Context, blockMeta waveobj.MetaMapType, rtOpts *waveobj.RuntimeOpts, force bool) error { - log.Printf("[sessiondaemon] start: block=%s daemon=%s conn=%s force=%v", sdc.BlockId, sdc.DaemonId, sdc.ConnName, force) - daemon := sessiondaemon.Manager.Get(sdc.DaemonId) if daemon == nil { log.Printf("[sessiondaemon] start: daemon %s not found in manager", sdc.DaemonId) @@ -72,67 +70,31 @@ func (sdc *SessionDaemonController) Start(ctx context.Context, blockMeta waveobj sessiondaemon.Manager.AttachBlock(ctx, sdc.DaemonId, sdc.BlockId) - dbDaemon, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, sdc.DaemonId) + ensureResult, err := sessiondaemon.Manager.EnsureJobState(ctx, sdc.DaemonId, rtOpts, true) if err != nil { - return fmt.Errorf("error getting session daemon: %w", err) + return err } - - if dbDaemon.Status == sessiondaemon.Status_Done { - log.Printf("[sessiondaemon] start: daemon=%s is done, falling back block=%s to shell", sdc.DaemonId, sdc.BlockId) - return fallbackSessionDaemonToShell(ctx, sdc.DaemonId, sdc.BlockId) - } - - if dbDaemon.JobId != "" { - jobState, stateErr := sessiondaemon.ClassifyJobManagerState(ctx, dbDaemon) - if stateErr != nil { - return fmt.Errorf("check session daemon job manager: %w", stateErr) - } - if jobState == sessiondaemon.JobManagerState_Dead { - log.Printf("[sessiondaemon] start: daemon=%s job manager gone, falling back block=%s to shell", sdc.DaemonId, sdc.BlockId) - return fallbackSessionDaemonToShell(ctx, sdc.DaemonId, sdc.BlockId) - } - if jobState == sessiondaemon.JobManagerState_Unknown { - log.Printf("[sessiondaemon] start: daemon=%s job state unknown, waiting block=%s", sdc.DaemonId, sdc.BlockId) - return ErrSessionDaemonJobUnknown - } - err := sdc.tryReconnect(ctx, daemon, dbDaemon, rtOpts) - if err != nil { - dbDaemon2, dbErr := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, sdc.DaemonId) - if dbErr == nil && dbDaemon2.Status == sessiondaemon.Status_Done { - log.Printf("[sessiondaemon] start: daemon=%s reconnect confirmed done, falling back block=%s to shell", sdc.DaemonId, sdc.BlockId) - return fallbackSessionDaemonToShell(ctx, sdc.DaemonId, sdc.BlockId) - } - return err - } + switch ensureResult.Action { + case sessiondaemon.DaemonEnsure_Ready: sdc.incrementVersion() sdc.sendControllerStatus() return nil - } - - if dbDaemon.Status == sessiondaemon.Status_Disconnected { + case sessiondaemon.DaemonEnsure_Wait: return ErrSessionDaemonJobUnknown + case sessiondaemon.DaemonEnsure_Fallback: + log.Printf("[sessiondaemon] start: daemon=%s is done, falling back block=%s to shell", sdc.DaemonId, sdc.BlockId) + return fallbackSessionDaemonToShell(ctx, sdc.DaemonId, sdc.BlockId) + case sessiondaemon.DaemonEnsure_Start: + return sdc.createJobAndSync(ctx, blockMeta, rtOpts) } - return sdc.createJobAndSync(ctx, blockMeta, rtOpts) -} - -// tryReconnect attempts to reconnect to the daemon's existing job. -func (sdc *SessionDaemonController) tryReconnect(ctx context.Context, daemon *sessiondaemon.SessionDaemon, dbDaemon *waveobj.SessionDaemon, rtOpts *waveobj.RuntimeOpts) error { - log.Printf("[sessiondaemon] start: attempting reconnect to job=%s status=%s", dbDaemon.JobId, dbDaemon.Status) - err := daemon.Reconnect(ctx, dbDaemon, rtOpts) - if err == nil { - log.Printf("[sessiondaemon] start: reconnect ok block=%s job=%s", sdc.BlockId, dbDaemon.JobId) - return nil - } - log.Printf("[sessiondaemon] start: reconnect failed block=%s job=%s err=%v", sdc.BlockId, dbDaemon.JobId, err) - return err + return fmt.Errorf("unknown session daemon ensure action %q", ensureResult.Action) } // createJobAndSync starts a new remote job for the daemon and syncs // the resulting JobId to all attached blocks so the frontend can // switch its zoneId. func (sdc *SessionDaemonController) createJobAndSync(ctx context.Context, blockMeta waveobj.MetaMapType, rtOpts *waveobj.RuntimeOpts) error { - log.Printf("[sessiondaemon] start: starting new job block=%s", sdc.BlockId) fsErr := filestore.WFS.MakeFile(ctx, sdc.BlockId, wavebase.BlockFile_Term, nil, wshrpc.FileOpts{MaxSize: DefaultTermMaxFileSize, Circular: true}) if fsErr != nil && fsErr != fs.ErrExist { return fmt.Errorf("error creating block term file: %w", fsErr) @@ -142,14 +104,8 @@ func (sdc *SessionDaemonController) createJobAndSync(ctx context.Context, blockM log.Printf("[sessiondaemon] start: new job failed block=%s err=%v", sdc.BlockId, err) return fmt.Errorf("failed to start job: %w", err) } - log.Printf("[sessiondaemon] start: new job started block=%s job=%s", sdc.BlockId, jobId) - daemon := sessiondaemon.Manager.Get(sdc.DaemonId) - if daemon == nil { - return fmt.Errorf("session daemon %s not found in manager", sdc.DaemonId) - } - - err = daemon.SetJobId(ctx, jobId) + err = sessiondaemon.Manager.SetJobRunning(ctx, sdc.DaemonId, jobId) if err != nil { log.Printf("[sessiondaemon] start: set job id failed daemon=%s job=%s err=%v", sdc.DaemonId, jobId, err) return fmt.Errorf("failed to set job id on daemon: %w", err) @@ -157,7 +113,6 @@ func (sdc *SessionDaemonController) createJobAndSync(ctx context.Context, blockM sdc.syncJobIdToBlocks(ctx, jobId) - log.Printf("[sessiondaemon] start: done block=%s daemon=%s job=%s", sdc.BlockId, sdc.DaemonId, jobId) sdc.incrementVersion() sdc.sendControllerStatus() return nil @@ -168,18 +123,15 @@ func (sdc *SessionDaemonController) createJobAndSync(ctx context.Context, blockM // attachToDaemon, switching the terminal zoneId to the new job's output stream. func (sdc *SessionDaemonController) syncJobIdToBlocks(ctx context.Context, jobId string) { attachedBlocks := sessiondaemon.Manager.GetBlocksForDaemon(sdc.DaemonId) - log.Printf("[sessiondaemon] start: syncing jobId=%s to %d attached blocks for daemon=%s", jobId, len(attachedBlocks), sdc.DaemonId) for _, blockId := range attachedBlocks { wstore.DBUpdateFn(ctx, blockId, func(block *waveobj.Block) { block.JobId = jobId }) wcore.SendWaveObjUpdate(waveobj.MakeORef(waveobj.OType_Block, blockId)) - log.Printf("[sessiondaemon] start: synced jobId=%s to block=%s", jobId, blockId) } } func (sdc *SessionDaemonController) startNewJob(ctx context.Context, blockMeta waveobj.MetaMapType, rtOpts *waveobj.RuntimeOpts) (string, error) { - log.Printf("[sessiondaemon] startNewJob: block=%s conn=%s", sdc.BlockId, sdc.ConnName) termSize := waveobj.TermSize{ Rows: shellutil.DefaultTermRows, Cols: shellutil.DefaultTermCols, @@ -205,18 +157,10 @@ func (sdc *SessionDaemonController) startNewJob(ctx context.Context, blockMeta w shellType := shellutil.GetShellTypeFromShellPath(remoteInfo.Shell) swapToken := makeSwapToken(ctx, ctx, sdc.BlockId, blockMeta, sdc.ConnName, shellType) sockName := wavebase.GetPersistentRemoteSockName(wstore.GetClientId()) - rpcContext := wshrpc.RpcContext{ - ProcRoute: true, - SockName: sockName, - BlockId: sdc.BlockId, - Conn: sdc.ConnName, - } - jwtStr, err := wshutil.MakeClientJWTToken(rpcContext) + err = attachRpcContextToSwapToken(swapToken, sdc.BlockId, sdc.ConnName, sockName) if err != nil { - return "", fmt.Errorf("error making jwt token: %w", err) + return "", err } - swapToken.RpcContext = &rpcContext - swapToken.Env[wshutil.WaveJwtTokenVarName] = jwtStr cmdOpts := shellexec.CommandOptsType{ Interactive: true, Login: true, @@ -242,8 +186,6 @@ func (sdc *SessionDaemonController) Stop(graceful bool, newStatus string, destro } ctx := context.Background() sessiondaemon.Manager.DetachBlock(ctx, sdc.DaemonId, sdc.BlockId) - log.Printf("[sessiondaemon] stop: block=%s daemon=%s remaining=%d", - sdc.BlockId, sdc.DaemonId, len(sessiondaemon.Manager.GetBlocksForDaemon(sdc.DaemonId))) } func (sdc *SessionDaemonController) SendInput(inputUnion *BlockInputUnion) error { @@ -274,7 +216,6 @@ func (sdc *SessionDaemonController) GetRuntimeStatus() *BlockControllerRuntimeSt rtn.ShellProcStatus = "done" } }) - log.Printf("[sessiondaemon] GetRuntimeStatus: block=%s daemon=%s status=%s version=%d", rtn.BlockId, sdc.DaemonId, rtn.ShellProcStatus, rtn.Version) return &rtn } @@ -300,7 +241,6 @@ func (sdc *SessionDaemonController) sendControllerStatus() { } func autoCreateSessionDaemon(ctx context.Context, blockId string, blockMeta waveobj.MetaMapType, connName string, rtOpts *waveobj.RuntimeOpts) (string, error) { - log.Printf("[sessiondaemon] autoCreate: block=%s conn=%s", blockId, connName) dbDaemon := &waveobj.SessionDaemon{ OID: uuid.New().String(), Name: "", @@ -329,6 +269,5 @@ func autoCreateSessionDaemon(ctx context.Context, blockId string, blockMeta wave return "", fmt.Errorf("create session daemon in manager: %w", err) } - log.Printf("[sessiondaemon] autoCreate: done block=%s daemon=%s", blockId, dbDaemon.OID) return dbDaemon.OID, nil } diff --git a/pkg/blockcontroller/shellcontroller.go b/pkg/blockcontroller/shellcontroller.go index a410225394..7d17245aec 100644 --- a/pkg/blockcontroller/shellcontroller.go +++ b/pkg/blockcontroller/shellcontroller.go @@ -466,18 +466,10 @@ func (bc *ShellController) setupAndStartShellProcess(logCtx context.Context, rc } } else { sockName := conn.GetDomainSocketName() - rpcContext := wshrpc.RpcContext{ - ProcRoute: true, - SockName: sockName, - BlockId: bc.BlockId, - Conn: conn.Opts.String(), - } - jwtStr, err := wshutil.MakeClientJWTToken(rpcContext) + err = attachRpcContextToSwapToken(swapToken, bc.BlockId, conn.Opts.String(), sockName) if err != nil { - return nil, fmt.Errorf("error making jwt token: %w", err) + return nil, err } - swapToken.RpcContext = &rpcContext - swapToken.Env[wshutil.WaveJwtTokenVarName] = jwtStr shellProc, err = shellexec.StartRemoteShellProc(ctx, logCtx, rc.TermSize, cmdStr, cmdOpts, conn) if err != nil { conn.SetWshError(err) diff --git a/pkg/sessiondaemon/reaper.go b/pkg/sessiondaemon/reaper.go new file mode 100644 index 0000000000..06b85c1989 --- /dev/null +++ b/pkg/sessiondaemon/reaper.go @@ -0,0 +1,179 @@ +package sessiondaemon + +import ( + "context" + "log" + "time" + + "github.com/google/uuid" + "github.com/wavetermdev/waveterm/pkg/waveobj" + "github.com/wavetermdev/waveterm/pkg/wstore" +) + +func (sd *SessionDaemonManager) StartIdleReaper(ctx context.Context) { + go func() { + ticker := time.NewTicker(IdleCheckInterval * time.Second) + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + sd.reapIdleDaemons(ctx) + sd.verifyConsistency(ctx) + } + } + }() +} + +func (sd *SessionDaemonManager) cleanupDeadBlocks(ctx context.Context, daemonId string, memDaemon *SessionDaemon) { + memDaemon.Lock.Lock() + blockIds := make([]string, 0, len(memDaemon.Blocks)) + for blockId := range memDaemon.Blocks { + blockIds = append(blockIds, blockId) + } + memDaemon.Lock.Unlock() + + var deadBlocks []string + for _, blockId := range blockIds { + _, err := wstore.DBMustGet[*waveobj.Block](ctx, blockId) + if err != nil { + deadBlocks = append(deadBlocks, blockId) + } + } + + if len(deadBlocks) == 0 { + return + } + + log.Printf("[sessiondaemon] cleanupDeadBlocks: daemon=%s removing %d dead blocks: %v", daemonId, len(deadBlocks), deadBlocks) + + memDaemon.Lock.Lock() + for _, blockId := range deadBlocks { + delete(memDaemon.Blocks, blockId) + } + remaining := len(memDaemon.Blocks) + memDaemon.Lock.Unlock() + + if remaining == 0 { + sd.startIdleCountdown(ctx, daemonId) + } +} + +func (sd *SessionDaemonManager) reapIdleDaemons(ctx context.Context) { + allDaemons, err := wstore.DBGetAllObjsByType[*waveobj.SessionDaemon](ctx, waveobj.OType_SessionDaemon) + if err != nil { + return + } + + for _, dbDaemon := range allDaemons { + sd.Lock.Lock() + memDaemon, hasMem := sd.Daemons[dbDaemon.OID] + sd.Lock.Unlock() + + switch dbDaemon.Status { + case Status_Running: + sd.reapRunning(ctx, dbDaemon, memDaemon, hasMem) + case Status_Done: + sd.reapDone(ctx, dbDaemon, memDaemon, hasMem) + } + } +} + +func (sd *SessionDaemonManager) reapRunning(ctx context.Context, dbDaemon *waveobj.SessionDaemon, memDaemon *SessionDaemon, hasMem bool) { + if hasMem && memDaemon.HasAttachedBlocks() { + sd.cleanupDeadBlocks(ctx, dbDaemon.OID, memDaemon) + if memDaemon.HasAttachedBlocks() { + return + } + } + + if dbDaemon.IdleTimeout <= 0 { + return + } + + remaining := sd.advanceIdleTimer(ctx, dbDaemon.OID) + if remaining > 0 { + return + } + + log.Printf("[sessiondaemon:%s] idle timeout reached, terminating", dbDaemon.OID) + if hasMem { + err := memDaemon.Stop(ctx) + if err != nil { + log.Printf("[sessiondaemon:%s] error stopping daemon, will retry next cycle: %v", dbDaemon.OID, err) + return + } + sd.Remove(dbDaemon.OID) + } + if err := wstore.DBDelete(ctx, waveobj.OType_SessionDaemon, dbDaemon.OID); err != nil { + log.Printf("[sessiondaemon:%s] reapRunning: error deleting from DB: %v", dbDaemon.OID, err) + } +} + +func (sd *SessionDaemonManager) reapDone(ctx context.Context, dbDaemon *waveobj.SessionDaemon, memDaemon *SessionDaemon, hasMem bool) { + if hasMem && memDaemon.HasAttachedBlocks() { + return + } + + if dbDaemon.IdleTimeout <= 0 { + return + } + + if dbDaemon.IdleSince <= 0 { + if err := wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbD *waveobj.SessionDaemon) { + dbD.IdleSince = DoneReapTimeout + }); err != nil { + log.Printf("[sessiondaemon:%s] reapDone: error setting done reap timeout: %v", dbDaemon.OID, err) + } + return + } + + remaining := sd.advanceIdleTimer(ctx, dbDaemon.OID) + if remaining > 0 { + return + } + + log.Printf("[sessiondaemon:%s] done daemon reaped", dbDaemon.OID) + if hasMem { + sd.Remove(dbDaemon.OID) + } + if err := wstore.DBDelete(ctx, waveobj.OType_SessionDaemon, dbDaemon.OID); err != nil { + log.Printf("[sessiondaemon:%s] reapDone: error deleting from DB: %v", dbDaemon.OID, err) + } +} + +func (sd *SessionDaemonManager) verifyConsistency(ctx context.Context) { + daemons, err := wstore.DBGetAllObjsByType[*waveobj.SessionDaemon](ctx, waveobj.OType_SessionDaemon) + if err != nil { + return + } + + dbIds := make(map[string]bool) + for _, dbDaemon := range daemons { + dbIds[dbDaemon.OID] = true + } + + sd.Lock.Lock() + defer sd.Lock.Unlock() + + for id := range sd.Daemons { + if !dbIds[id] { + log.Printf("[sessiondaemon] consistency: daemon %s in memory but not in DB, removing from memory", id) + delete(sd.Daemons, id) + } + } + + for _, dbDaemon := range daemons { + if _, exists := sd.Daemons[dbDaemon.OID]; !exists { + log.Printf("[sessiondaemon] consistency: daemon %s in DB but not in memory, loading", dbDaemon.OID) + sd.Daemons[dbDaemon.OID] = &SessionDaemon{ + DaemonId: dbDaemon.OID, + Name: dbDaemon.Name, + JobId: dbDaemon.JobId, + InputSessionId: uuid.New().String(), + Blocks: make(map[string]bool), + } + } + } +} diff --git a/pkg/sessiondaemon/sessiondaemon.go b/pkg/sessiondaemon/sessiondaemon.go index 97624f8e49..a2e3bea59e 100644 --- a/pkg/sessiondaemon/sessiondaemon.go +++ b/pkg/sessiondaemon/sessiondaemon.go @@ -36,6 +36,13 @@ const ( JobManagerState_Unknown = "unknown" ) +const ( + DaemonEnsure_Ready = "ready" + DaemonEnsure_Wait = "wait" + DaemonEnsure_Fallback = "fallback" + DaemonEnsure_Start = "start" +) + type SessionDaemon struct { Lock sync.Mutex @@ -52,6 +59,11 @@ type SessionDaemonManager struct { Daemons map[string]*SessionDaemon } +type EnsureResult struct { + Action string + JobId string +} + var Manager = &SessionDaemonManager{ Daemons: make(map[string]*SessionDaemon), } @@ -89,29 +101,6 @@ func (sd *SessionDaemon) HasBlock(blockId string) bool { return sd.Blocks[blockId] } -func (sd *SessionDaemon) SetJobId(ctx context.Context, jobId string) error { - sd.Lock.Lock() - oldJobId := sd.JobId - sd.JobId = jobId - sd.Lock.Unlock() - log.Printf("[sessiondaemon:%s] SetJobId: %s -> %s", sd.DaemonId, oldJobId, jobId) - - err := wstore.DBUpdateFn(ctx, sd.DaemonId, func(sdDb *waveobj.SessionDaemon) { - sdDb.JobId = jobId - sdDb.Status = Status_Running - }) - if err != nil { - // Roll back memory to keep it consistent with DB. - sd.Lock.Lock() - sd.JobId = oldJobId - sd.Lock.Unlock() - log.Printf("[sessiondaemon:%s] SetJobId: DB update failed, rolled back to %s: %v", sd.DaemonId, oldJobId, err) - return err - } - log.Printf("[sessiondaemon:%s] SetJobId: DB updated (status=running job=%s)", sd.DaemonId, jobId) - return nil -} - func (sd *SessionDaemon) Reconnect(ctx context.Context, dbDaemon *waveobj.SessionDaemon, rtOpts *waveobj.RuntimeOpts) error { if dbDaemon.JobId == "" { return fmt.Errorf("no jobid to reconnect") @@ -200,7 +189,6 @@ func (sd *SessionDaemonManager) GetOrCreate(ctx context.Context, dbDaemon *waveo defer sd.Lock.Unlock() if existing, ok := sd.Daemons[dbDaemon.OID]; ok { - log.Printf("[sessiondaemon] GetOrCreate: found existing daemon=%s job=%s", dbDaemon.OID, dbDaemon.JobId) existing.Lock.Lock() if existing.JobId == "" { existing.JobId = dbDaemon.JobId @@ -209,7 +197,6 @@ func (sd *SessionDaemonManager) GetOrCreate(ctx context.Context, dbDaemon *waveo return existing, nil } - log.Printf("[sessiondaemon] GetOrCreate: creating new daemon=%s name=%q", dbDaemon.OID, dbDaemon.Name) daemon := &SessionDaemon{ DaemonId: dbDaemon.OID, Name: dbDaemon.Name, @@ -233,8 +220,57 @@ func (sd *SessionDaemonManager) Remove(daemonId string) { delete(sd.Daemons, daemonId) } +func (sd *SessionDaemonManager) SetJobRunning(ctx context.Context, daemonId string, jobId string) error { + daemon := sd.Get(daemonId) + var oldJobId string + if daemon != nil { + daemon.Lock.Lock() + oldJobId = daemon.JobId + daemon.JobId = jobId + daemon.Lock.Unlock() + } + + err := wstore.DBUpdateFn(ctx, daemonId, func(sdDb *waveobj.SessionDaemon) { + sdDb.JobId = jobId + sdDb.Status = Status_Running + }) + if err != nil { + if daemon != nil { + daemon.Lock.Lock() + daemon.JobId = oldJobId + daemon.Lock.Unlock() + } + log.Printf("[sessiondaemon:%s] SetJobRunning: DB update failed: %v", daemonId, err) + return err + } + return nil +} + +func (sd *SessionDaemonManager) clearJobDone(ctx context.Context, daemonId string) error { + daemon := sd.Get(daemonId) + var oldJobId string + if daemon != nil { + daemon.Lock.Lock() + oldJobId = daemon.JobId + daemon.JobId = "" + daemon.Lock.Unlock() + } + + if err := wstore.DBUpdateFn(ctx, daemonId, func(dbSd *waveobj.SessionDaemon) { + dbSd.JobId = "" + dbSd.Status = Status_Done + }); err != nil { + if daemon != nil { + daemon.Lock.Lock() + daemon.JobId = oldJobId + daemon.Lock.Unlock() + } + return err + } + return nil +} + func (sd *SessionDaemonManager) AttachBlock(ctx context.Context, daemonId string, blockId string) { - log.Printf("[sessiondaemon] AttachBlock: daemon=%s block=%s", daemonId, blockId) sd.Lock.Lock() daemon, ok := sd.Daemons[daemonId] if !ok { @@ -249,7 +285,6 @@ func (sd *SessionDaemonManager) AttachBlock(ctx context.Context, daemonId string } func (sd *SessionDaemonManager) DetachBlock(ctx context.Context, daemonId string, blockId string) { - log.Printf("[sessiondaemon] DetachBlock: daemon=%s block=%s", daemonId, blockId) sd.Lock.Lock() daemon, ok := sd.Daemons[daemonId] if !ok { @@ -334,29 +369,8 @@ func (sd *SessionDaemonManager) SendInput(daemonId string, inputData []byte, sig return daemon.SendInput(ctx, inputData, sigName, termSize) } -// MarkDone clears the daemon's JobId and sets its status to Done, -// both in memory and in the database. Used when the resync controller -// detects that a daemon's remote job manager has exited. func (sd *SessionDaemonManager) MarkDone(ctx context.Context, daemonId string) error { - sd.Lock.Lock() - daemon, ok := sd.Daemons[daemonId] - sd.Lock.Unlock() - var oldJobId string - if ok { - daemon.Lock.Lock() - oldJobId = daemon.JobId - daemon.JobId = "" - daemon.Lock.Unlock() - } - if err := wstore.DBUpdateFn(ctx, daemonId, func(dbSd *waveobj.SessionDaemon) { - dbSd.JobId = "" - dbSd.Status = Status_Done - }); err != nil { - if ok { - daemon.Lock.Lock() - daemon.JobId = oldJobId - daemon.Lock.Unlock() - } + if err := sd.clearJobDone(ctx, daemonId); err != nil { log.Printf("[sessiondaemon:%s] MarkDone: DB update failed: %v", daemonId, err) return err } @@ -395,6 +409,57 @@ func ClassifyJobManagerState(ctx context.Context, dbDaemon *waveobj.SessionDaemo return JobManagerState_Dead, nil } +func (sd *SessionDaemonManager) EnsureJobState(ctx context.Context, daemonId string, rtOpts *waveobj.RuntimeOpts, reconnect bool) (*EnsureResult, error) { + dbDaemon, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, daemonId) + if err != nil { + return nil, fmt.Errorf("get session daemon: %w", err) + } + + memDaemon, err := sd.GetOrCreate(ctx, dbDaemon) + if err != nil { + return nil, fmt.Errorf("create session daemon in manager: %w", err) + } + + if dbDaemon.Status == Status_Done { + return &EnsureResult{Action: DaemonEnsure_Fallback}, nil + } + + if dbDaemon.JobId == "" { + if dbDaemon.Status == Status_Disconnected { + return &EnsureResult{Action: DaemonEnsure_Wait}, nil + } + return &EnsureResult{Action: DaemonEnsure_Start}, nil + } + + jobState, err := ClassifyJobManagerState(ctx, dbDaemon) + if err != nil { + return nil, fmt.Errorf("check session daemon job manager: %w", err) + } + switch jobState { + case JobManagerState_Dead: + if err := sd.MarkDone(ctx, daemonId); err != nil { + return nil, err + } + return &EnsureResult{Action: DaemonEnsure_Fallback}, nil + case JobManagerState_Unknown: + return &EnsureResult{Action: DaemonEnsure_Wait}, nil + } + + if !reconnect { + return &EnsureResult{Action: DaemonEnsure_Ready, JobId: dbDaemon.JobId}, nil + } + + err = memDaemon.Reconnect(ctx, dbDaemon, rtOpts) + if err != nil { + dbDaemon2, dbErr := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, daemonId) + if dbErr == nil && dbDaemon2.Status == Status_Done { + return &EnsureResult{Action: DaemonEnsure_Fallback}, nil + } + return nil, err + } + return &EnsureResult{Action: DaemonEnsure_Ready, JobId: dbDaemon.JobId}, nil +} + // GetMemJobId returns the in-memory JobId for a daemon, used as a // fallback when the DB read returns stale data (e.g., SessionInfoCommand // called before a SetJobId transaction is visible). @@ -442,34 +507,26 @@ func (sd *SessionDaemonManager) RecordActivity(ctx context.Context, daemonId str return nil } -// ClearJobIdFromDaemons clears the JobId from all daemons (memory + DB) -// whose job matches jobId. Called when a remote job manager exits. func (sd *SessionDaemonManager) ClearJobIdFromDaemons(ctx context.Context, jobId string) { sd.Lock.Lock() - var affectedDaemonIds []string + var daemonIds []string for _, daemon := range sd.Daemons { daemon.Lock.Lock() if daemon.JobId == jobId { - oldDaemonJobId := daemon.JobId - daemonId := daemon.DaemonId - daemon.JobId = "" + daemonIds = append(daemonIds, daemon.DaemonId) daemon.Lock.Unlock() - if err := wstore.DBUpdateFn(ctx, daemonId, func(dbSd *waveobj.SessionDaemon) { - dbSd.JobId = "" - dbSd.Status = Status_Done - }); err != nil { - log.Printf("[sessiondaemon:%s] ClearJobIdFromDaemons: DB update failed, memory stale (was job=%s): %v", - daemonId, oldDaemonJobId, err) - } - affectedDaemonIds = append(affectedDaemonIds, daemonId) - log.Printf("[sessiondaemon:%s] ClearJobIdFromDaemons: job=%s cleared, status=done", daemonId, jobId) continue } daemon.Lock.Unlock() } sd.Lock.Unlock() - for _, daemonId := range affectedDaemonIds { + for _, daemonId := range daemonIds { + if err := sd.clearJobDone(ctx, daemonId); err != nil { + log.Printf("[sessiondaemon:%s] ClearJobIdFromDaemons: DB update failed: %v", daemonId, err) + continue + } + log.Printf("[sessiondaemon:%s] ClearJobIdFromDaemons: job=%s cleared, status=done", daemonId, jobId) if OnDaemonJobDoneFn != nil { OnDaemonJobDoneFn(ctx, daemonId) } @@ -529,202 +586,19 @@ func (sd *SessionDaemonManager) OnConnectionUp(ctx context.Context, connName str continue } - jobState, err := ClassifyJobManagerState(ctx, dbDaemon) + ensureResult, err := sd.EnsureJobState(ctx, dbDaemon.OID, nil, true) if err != nil { log.Printf("[sessiondaemon:%s] OnConnectionUp: error checking job manager state: %v", dbDaemon.OID, err) continue } - switch jobState { - case JobManagerState_Alive: - log.Printf("[sessiondaemon:%s] OnConnectionUp: remote job manager alive, reconnecting", dbDaemon.OID) - sd.Lock.Lock() - memDaemon := sd.Daemons[dbDaemon.OID] - sd.Lock.Unlock() - if memDaemon != nil { - err := memDaemon.Reconnect(ctx, dbDaemon, nil) - if err != nil { - log.Printf("[sessiondaemon:%s] OnConnectionUp: reconnect failed: %v", dbDaemon.OID, err) - } - } - case JobManagerState_Dead: + switch ensureResult.Action { + case DaemonEnsure_Fallback: log.Printf("[sessiondaemon:%s] OnConnectionUp: remote job manager dead, falling back", dbDaemon.OID) if OnDaemonJobDoneFn != nil { OnDaemonJobDoneFn(ctx, dbDaemon.OID) } - case JobManagerState_Unknown: + case DaemonEnsure_Wait: log.Printf("[sessiondaemon:%s] OnConnectionUp: job manager state unknown, waiting", dbDaemon.OID) } } } - -func (sd *SessionDaemonManager) StartIdleReaper(ctx context.Context) { - go func() { - ticker := time.NewTicker(IdleCheckInterval * time.Second) - defer ticker.Stop() - for { - select { - case <-ctx.Done(): - return - case <-ticker.C: - sd.reapIdleDaemons(ctx) - sd.verifyConsistency(ctx) - } - } - }() -} - -// cleanupDeadBlocks removes block IDs from the daemon's in-memory -// Blocks map that no longer exist in the database. This handles the -// case where a block was deleted without calling DetachBlock. -func (sd *SessionDaemonManager) cleanupDeadBlocks(ctx context.Context, daemonId string, memDaemon *SessionDaemon) { - // Collect block IDs under the daemon lock, then release it for DB queries. - memDaemon.Lock.Lock() - blockIds := make([]string, 0, len(memDaemon.Blocks)) - for blockId := range memDaemon.Blocks { - blockIds = append(blockIds, blockId) - } - memDaemon.Lock.Unlock() - - var deadBlocks []string - for _, blockId := range blockIds { - _, err := wstore.DBMustGet[*waveobj.Block](ctx, blockId) - if err != nil { - deadBlocks = append(deadBlocks, blockId) - } - } - - if len(deadBlocks) == 0 { - return - } - - log.Printf("[sessiondaemon] cleanupDeadBlocks: daemon=%s removing %d dead blocks: %v", daemonId, len(deadBlocks), deadBlocks) - - memDaemon.Lock.Lock() - for _, blockId := range deadBlocks { - delete(memDaemon.Blocks, blockId) - } - remaining := len(memDaemon.Blocks) - memDaemon.Lock.Unlock() - - if remaining == 0 { - sd.startIdleCountdown(ctx, daemonId) - } -} - -func (sd *SessionDaemonManager) reapIdleDaemons(ctx context.Context) { - allDaemons, err := wstore.DBGetAllObjsByType[*waveobj.SessionDaemon](ctx, waveobj.OType_SessionDaemon) - if err != nil { - return - } - - for _, dbDaemon := range allDaemons { - sd.Lock.Lock() - memDaemon, hasMem := sd.Daemons[dbDaemon.OID] - sd.Lock.Unlock() - - switch dbDaemon.Status { - case Status_Running: - sd.reapRunning(ctx, dbDaemon, memDaemon, hasMem) - case Status_Done: - sd.reapDone(ctx, dbDaemon, memDaemon, hasMem) - } - } -} - -func (sd *SessionDaemonManager) reapRunning(ctx context.Context, dbDaemon *waveobj.SessionDaemon, memDaemon *SessionDaemon, hasMem bool) { - if hasMem && memDaemon.HasAttachedBlocks() { - sd.cleanupDeadBlocks(ctx, dbDaemon.OID, memDaemon) - if memDaemon.HasAttachedBlocks() { - return - } - } - - if dbDaemon.IdleTimeout <= 0 { - return - } - - remaining := sd.advanceIdleTimer(ctx, dbDaemon.OID) - if remaining > 0 { - return - } - - log.Printf("[sessiondaemon:%s] idle timeout reached, terminating", dbDaemon.OID) - if hasMem { - err := memDaemon.Stop(ctx) - if err != nil { - log.Printf("[sessiondaemon:%s] error stopping daemon, will retry next cycle: %v", dbDaemon.OID, err) - return - } - sd.Remove(dbDaemon.OID) - } - if err := wstore.DBDelete(ctx, waveobj.OType_SessionDaemon, dbDaemon.OID); err != nil { - log.Printf("[sessiondaemon:%s] reapRunning: error deleting from DB: %v", dbDaemon.OID, err) - } -} - -func (sd *SessionDaemonManager) reapDone(ctx context.Context, dbDaemon *waveobj.SessionDaemon, memDaemon *SessionDaemon, hasMem bool) { - if hasMem && memDaemon.HasAttachedBlocks() { - return - } - - if dbDaemon.IdleTimeout <= 0 { - return - } - - if dbDaemon.IdleSince <= 0 { - if err := wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbD *waveobj.SessionDaemon) { - dbD.IdleSince = DoneReapTimeout - }); err != nil { - log.Printf("[sessiondaemon:%s] reapDone: error setting done reap timeout: %v", dbDaemon.OID, err) - } - return - } - - remaining := sd.advanceIdleTimer(ctx, dbDaemon.OID) - if remaining > 0 { - return - } - - log.Printf("[sessiondaemon:%s] done daemon reaped", dbDaemon.OID) - if hasMem { - sd.Remove(dbDaemon.OID) - } - if err := wstore.DBDelete(ctx, waveobj.OType_SessionDaemon, dbDaemon.OID); err != nil { - log.Printf("[sessiondaemon:%s] reapDone: error deleting from DB: %v", dbDaemon.OID, err) - } -} - -func (sd *SessionDaemonManager) verifyConsistency(ctx context.Context) { - daemons, err := wstore.DBGetAllObjsByType[*waveobj.SessionDaemon](ctx, waveobj.OType_SessionDaemon) - if err != nil { - return - } - - dbIds := make(map[string]bool) - for _, dbDaemon := range daemons { - dbIds[dbDaemon.OID] = true - } - - sd.Lock.Lock() - defer sd.Lock.Unlock() - - for id := range sd.Daemons { - if !dbIds[id] { - log.Printf("[sessiondaemon] consistency: daemon %s in memory but not in DB, removing from memory", id) - delete(sd.Daemons, id) - } - } - - for _, dbDaemon := range daemons { - if _, exists := sd.Daemons[dbDaemon.OID]; !exists { - log.Printf("[sessiondaemon] consistency: daemon %s in DB but not in memory, loading", dbDaemon.OID) - sd.Daemons[dbDaemon.OID] = &SessionDaemon{ - DaemonId: dbDaemon.OID, - Name: dbDaemon.Name, - JobId: dbDaemon.JobId, - InputSessionId: uuid.New().String(), - Blocks: make(map[string]bool), - } - } - } -} diff --git a/pkg/wshrpc/wshserver/wshserver.go b/pkg/wshrpc/wshserver/wshserver.go index e9bc5af3ea..239cc2b8b8 100644 --- a/pkg/wshrpc/wshserver/wshserver.go +++ b/pkg/wshrpc/wshserver/wshserver.go @@ -20,7 +20,6 @@ import ( "strings" "time" - "github.com/google/uuid" "github.com/skratchdot/open-golang/open" "github.com/wavetermdev/waveterm/pkg/aiusechat" "github.com/wavetermdev/waveterm/pkg/aiusechat/chatstore" @@ -38,7 +37,6 @@ import ( "github.com/wavetermdev/waveterm/pkg/remote/conncontroller" "github.com/wavetermdev/waveterm/pkg/remote/fileshare/wshfs" "github.com/wavetermdev/waveterm/pkg/secretstore" - "github.com/wavetermdev/waveterm/pkg/sessiondaemon" "github.com/wavetermdev/waveterm/pkg/suggestion" "github.com/wavetermdev/waveterm/pkg/telemetry" "github.com/wavetermdev/waveterm/pkg/telemetry/telemetrydata" @@ -1580,274 +1578,3 @@ func (ws *WshServer) JobControllerDetachJobCommand(ctx context.Context, jobId st func (ws *WshServer) BlockJobStatusCommand(ctx context.Context, blockId string) (*wshrpc.BlockJobStatusData, error) { return jobcontroller.GetBlockJobStatus(ctx, blockId) } - -func (ws *WshServer) SessionCreateCommand(ctx context.Context, data wshrpc.CommandSessionCreateData) (*wshrpc.SessionInfoRtnData, error) { - dbDaemon := &waveobj.SessionDaemon{ - OID: uuid.New().String(), - Name: data.Name, - Connection: data.Connection, - IsAnonymous: data.Name == "", - Status: sessiondaemon.Status_Init, - CreatedAt: time.Now().UnixMilli(), - IdleTimeout: data.IdleTimeout, - } - if dbDaemon.IsAnonymous { - dbDaemon.IdleTimeout = sessiondaemon.DefaultAnonymousIdleTimeout - } else if dbDaemon.IdleTimeout <= 0 { - dbDaemon.IdleTimeout = sessiondaemon.DefaultNamedIdleTimeout - } - - err := wstore.DBInsert(ctx, dbDaemon) - if err != nil { - return nil, fmt.Errorf("insert session daemon: %w", err) - } - - _, err = sessiondaemon.Manager.GetOrCreate(ctx, dbDaemon) - if err != nil { - return nil, fmt.Errorf("create session daemon in manager: %w", err) - } - - return buildSessionInfoRtnData(ctx, dbDaemon) -} - -func (ws *WshServer) SessionDeleteCommand(ctx context.Context, data wshrpc.CommandSessionDeleteData) error { - dbDaemon, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, data.DaemonId) - if err != nil { - return fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) - } - - memDaemon := sessiondaemon.Manager.Get(data.DaemonId) - forceDelete := false - if memDaemon != nil { - err = memDaemon.Stop(ctx) - if err != nil { - forceDelete = isRemoteProcessDead(ctx, dbDaemon) - if !forceDelete { - return fmt.Errorf("failed to stop session daemon: %w", err) - } - log.Printf("[sessiondaemon] SessionDelete: daemon=%s remote job dead, deleting despite stop failure", data.DaemonId) - } - sessiondaemon.Manager.Remove(data.DaemonId) - } - - err = wstore.DBDelete(ctx, waveobj.OType_SessionDaemon, data.DaemonId) - if err != nil { - return fmt.Errorf("delete session daemon: %w", err) - } - return nil -} - -// isRemoteProcessDead checks whether the daemon's remote job manager -// process has exited. Returns true if confirmed dead. -func isRemoteProcessDead(ctx context.Context, dbDaemon *waveobj.SessionDaemon) bool { - if dbDaemon.JobId == "" { - return false - } - job, err := wstore.DBMustGet[*waveobj.Job](ctx, dbDaemon.JobId) - if err != nil || job.JobManagerPid == 0 { - return false - } - alive, err := conncontroller.CheckRemoteProcessAlive(ctx, dbDaemon.Connection, job.JobManagerPid) - return err == nil && !alive -} - -func (ws *WshServer) SessionListCommand(ctx context.Context, data wshrpc.CommandSessionListData) ([]wshrpc.SessionInfoRtnData, error) { - allDaemons, err := wstore.DBGetAllObjsByType[*waveobj.SessionDaemon](ctx, waveobj.OType_SessionDaemon) - if err != nil { - return nil, fmt.Errorf("list session daemons: %w", err) - } - - rtn := make([]wshrpc.SessionInfoRtnData, 0) - for _, dbDaemon := range allDaemons { - if dbDaemon.IsAnonymous && !data.ShowAll { - continue - } - info, err := buildSessionInfoRtnData(ctx, dbDaemon) - if err != nil { - return nil, err - } - rtn = append(rtn, *info) - } - sort.Slice(rtn, func(i, j int) bool { - ai := rtn[i].LastActiveAt - aj := rtn[j].LastActiveAt - if ai != aj { - return ai > aj - } - return rtn[i].CreatedAt > rtn[j].CreatedAt - }) - return rtn, nil -} - -func (ws *WshServer) SessionAttachCommand(ctx context.Context, data wshrpc.CommandSessionAttachData) error { - log.Printf("[sessiondaemon] SessionAttach: block=%s old_daemon=%s new_daemon=%s new_daemon_job=%s", - data.BlockId, data.CurrentDaemonId, data.DaemonId, func() string { - if data.DaemonId != "" { - if db, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, data.DaemonId); err == nil && db != nil { - return db.JobId - } - } - return "" - }()) - - if data.CurrentDaemonId != "" && data.CurrentDaemonId == data.DaemonId { - log.Printf("[sessiondaemon] SessionAttach: block=%s already attached to daemon=%s, skipping", data.BlockId, data.DaemonId) - return nil - } - - if data.CurrentDaemonId != "" { - log.Printf("[sessiondaemon] SessionAttach: detaching block=%s from old_daemon=%s", data.BlockId, data.CurrentDaemonId) - sessiondaemon.Manager.DetachBlock(ctx, data.CurrentDaemonId, data.BlockId) - } - - dbDaemon, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, data.DaemonId) - if err != nil { - if data.CurrentDaemonId != "" { - sessiondaemon.Manager.AttachBlock(ctx, data.CurrentDaemonId, data.BlockId) - } - return fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) - } - - // Refuse cross-connection attach: a block on connection A cannot - // share a daemon whose remote job runs on connection B. - blockData, err := wstore.DBMustGet[*waveobj.Block](ctx, data.BlockId) - if err == nil { - blockConn := blockData.Meta.GetString(waveobj.MetaKey_Connection, "") - if blockConn != "" && blockConn != dbDaemon.Connection { - log.Printf("[sessiondaemon] SessionAttach: block=%s conn=%q daemon conn=%q mismatch, refusing", - data.BlockId, blockConn, dbDaemon.Connection) - return fmt.Errorf("cannot attach to session on connection %q from connection %q", dbDaemon.Connection, blockConn) - } - } - - _, err = sessiondaemon.Manager.GetOrCreate(ctx, dbDaemon) - if err != nil { - return fmt.Errorf("create session daemon in manager: %w", err) - } - - sessiondaemon.Manager.AttachBlock(ctx, data.DaemonId, data.BlockId) - - err = wstore.DBUpdateFn(ctx, data.BlockId, func(block *waveobj.Block) { - block.Meta[waveobj.MetaKey_SessionDaemonId] = data.DaemonId - delete(block.Meta, blockcontroller.MetaKey_SessionNoAutoCreate) - block.JobId = dbDaemon.JobId - }) - log.Printf("[sessiondaemon] SessionAttach: block=%s daemon=%s meta_updated daemon_job=%s block_job=%s", - data.BlockId, data.DaemonId, dbDaemon.JobId, func() string { - if b, err := wstore.DBMustGet[*waveobj.Block](ctx, data.BlockId); err == nil && b != nil { - return b.JobId - } - return "?" - }()) - - if err != nil { - sessiondaemon.Manager.DetachBlock(ctx, data.DaemonId, data.BlockId) - if data.CurrentDaemonId != "" { - sessiondaemon.Manager.AttachBlock(ctx, data.CurrentDaemonId, data.BlockId) - } - return fmt.Errorf("update block meta: %w", err) - } - - log.Printf("[sessiondaemon] SessionAttach: triggering resync for block=%s", data.BlockId) - resyncBlockController(ctx, data.BlockId) - wcore.SendWaveObjUpdate(waveobj.MakeORef(waveobj.OType_Block, data.BlockId)) - return nil -} - -func (ws *WshServer) SessionDetachCommand(ctx context.Context, data wshrpc.CommandSessionDetachData) error { - _, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, data.DaemonId) - if err != nil { - return fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) - } - - blockIds := []string{} - if data.BlockId != "" { - blockIds = append(blockIds, data.BlockId) - } else { - blockIds = sessiondaemon.Manager.GetBlocksForDaemon(data.DaemonId) - } - - for _, blockId := range blockIds { - sessiondaemon.Manager.DetachBlock(ctx, data.DaemonId, blockId) - err = wstore.DBUpdateFn(ctx, blockId, func(block *waveobj.Block) { - delete(block.Meta, waveobj.MetaKey_SessionDaemonId) - block.Meta[blockcontroller.MetaKey_SessionNoAutoCreate] = true - }) - if err != nil { - return fmt.Errorf("update block meta: %w", err) - } - resyncBlockController(ctx, blockId) - wcore.SendWaveObjUpdate(waveobj.MakeORef(waveobj.OType_Block, blockId)) - } - return nil -} - -func (ws *WshServer) SessionInfoCommand(ctx context.Context, data wshrpc.CommandSessionInfoData) (*wshrpc.SessionInfoRtnData, error) { - dbDaemon, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, data.DaemonId) - if err != nil { - return nil, fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) - } - // If the DB daemon has no JobId yet, check the in-memory daemon which - // may be more current (SetJobId updates memory before DB, so the block - // JobId sync can trigger a frontend SessionInfo call before the daemon - // DB write is visible). - if dbDaemon.JobId == "" { - if memJobId := sessiondaemon.Manager.GetMemJobId(dbDaemon.OID); memJobId != "" { - log.Printf("[sessiondaemon] SessionInfo: daemon=%s DB jobId empty, using in-memory jobId=%s", dbDaemon.OID, memJobId) - dbDaemon.JobId = memJobId - } - } - info, err := buildSessionInfoRtnData(ctx, dbDaemon) - log.Printf("[sessiondaemon] SessionInfo: daemon=%s job=%s status=%s blocks=%d err=%v", - data.DaemonId, dbDaemon.JobId, dbDaemon.Status, len(info.Blocks), err) - return info, err -} - -func (ws *WshServer) SessionTagCommand(ctx context.Context, data wshrpc.CommandSessionTagData) error { - _, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, data.DaemonId) - if err != nil { - return fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) - } - return sessiondaemon.Manager.Rename(ctx, data.DaemonId, data.Name) -} - -func (ws *WshServer) RecordSessionActivityCommand(ctx context.Context, data wshrpc.CommandRecordSessionActivityData) error { - return sessiondaemon.Manager.RecordActivity(ctx, data.DaemonId) -} - -func buildSessionInfoRtnData(ctx context.Context, dbDaemon *waveobj.SessionDaemon) (*wshrpc.SessionInfoRtnData, error) { - if dbDaemon == nil { - return nil, fmt.Errorf("session daemon is nil") - } - blocks := sessiondaemon.Manager.GetBlocksForDaemon(dbDaemon.OID) - return &wshrpc.SessionInfoRtnData{ - DaemonId: dbDaemon.OID, - Name: dbDaemon.Name, - Connection: dbDaemon.Connection, - JobId: dbDaemon.JobId, - IsAnonymous: dbDaemon.IsAnonymous, - Status: dbDaemon.Status, - Cwd: dbDaemon.Cwd, - CreatedAt: dbDaemon.CreatedAt, - IdleTimeout: dbDaemon.IdleTimeout, - IdleSince: dbDaemon.IdleSince, - LastActiveAt: dbDaemon.LastActiveAt, - Blocks: blocks, - }, nil -} - -func resyncBlockController(ctx context.Context, blockId string) { - tabs, err := wstore.DBGetAllObjsByType[*waveobj.Tab](ctx, waveobj.OType_Tab) - if err != nil { - log.Printf("[sessiondaemon] warning: error getting tabs for resync: %v", err) - return - } - for _, tab := range tabs { - for _, bid := range tab.BlockIds { - if bid == blockId { - blockcontroller.ResyncController(ctx, tab.OID, blockId, nil, true) - return - } - } - } -} diff --git a/pkg/wshrpc/wshserver/wshserver_session.go b/pkg/wshrpc/wshserver/wshserver_session.go new file mode 100644 index 0000000000..527ab55a98 --- /dev/null +++ b/pkg/wshrpc/wshserver/wshserver_session.go @@ -0,0 +1,260 @@ +// Copyright 2026, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +package wshserver + +import ( + "context" + "fmt" + "log" + "sort" + "time" + + "github.com/google/uuid" + "github.com/wavetermdev/waveterm/pkg/blockcontroller" + "github.com/wavetermdev/waveterm/pkg/remote/conncontroller" + "github.com/wavetermdev/waveterm/pkg/sessiondaemon" + "github.com/wavetermdev/waveterm/pkg/waveobj" + "github.com/wavetermdev/waveterm/pkg/wcore" + "github.com/wavetermdev/waveterm/pkg/wshrpc" + "github.com/wavetermdev/waveterm/pkg/wstore" +) + +func (ws *WshServer) SessionCreateCommand(ctx context.Context, data wshrpc.CommandSessionCreateData) (*wshrpc.SessionInfoRtnData, error) { + dbDaemon := &waveobj.SessionDaemon{ + OID: uuid.New().String(), + Name: data.Name, + Connection: data.Connection, + IsAnonymous: data.Name == "", + Status: sessiondaemon.Status_Init, + CreatedAt: time.Now().UnixMilli(), + IdleTimeout: data.IdleTimeout, + } + if dbDaemon.IsAnonymous { + dbDaemon.IdleTimeout = sessiondaemon.DefaultAnonymousIdleTimeout + } else if dbDaemon.IdleTimeout <= 0 { + dbDaemon.IdleTimeout = sessiondaemon.DefaultNamedIdleTimeout + } + + err := wstore.DBInsert(ctx, dbDaemon) + if err != nil { + return nil, fmt.Errorf("insert session daemon: %w", err) + } + + _, err = sessiondaemon.Manager.GetOrCreate(ctx, dbDaemon) + if err != nil { + return nil, fmt.Errorf("create session daemon in manager: %w", err) + } + + return buildSessionInfoRtnData(ctx, dbDaemon) +} + +func (ws *WshServer) SessionDeleteCommand(ctx context.Context, data wshrpc.CommandSessionDeleteData) error { + dbDaemon, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, data.DaemonId) + if err != nil { + return fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) + } + + memDaemon := sessiondaemon.Manager.Get(data.DaemonId) + forceDelete := false + if memDaemon != nil { + err = memDaemon.Stop(ctx) + if err != nil { + forceDelete = isRemoteProcessDead(ctx, dbDaemon) + if !forceDelete { + return fmt.Errorf("failed to stop session daemon: %w", err) + } + log.Printf("[sessiondaemon] SessionDelete: daemon=%s remote job dead, deleting despite stop failure", data.DaemonId) + } + sessiondaemon.Manager.Remove(data.DaemonId) + } + + err = wstore.DBDelete(ctx, waveobj.OType_SessionDaemon, data.DaemonId) + if err != nil { + return fmt.Errorf("delete session daemon: %w", err) + } + return nil +} + +func isRemoteProcessDead(ctx context.Context, dbDaemon *waveobj.SessionDaemon) bool { + if dbDaemon.JobId == "" { + return false + } + job, err := wstore.DBMustGet[*waveobj.Job](ctx, dbDaemon.JobId) + if err != nil || job.JobManagerPid == 0 { + return false + } + alive, err := conncontroller.CheckRemoteProcessAlive(ctx, dbDaemon.Connection, job.JobManagerPid) + return err == nil && !alive +} + +func (ws *WshServer) SessionListCommand(ctx context.Context, data wshrpc.CommandSessionListData) ([]wshrpc.SessionInfoRtnData, error) { + allDaemons, err := wstore.DBGetAllObjsByType[*waveobj.SessionDaemon](ctx, waveobj.OType_SessionDaemon) + if err != nil { + return nil, fmt.Errorf("list session daemons: %w", err) + } + + rtn := make([]wshrpc.SessionInfoRtnData, 0) + for _, dbDaemon := range allDaemons { + if dbDaemon.IsAnonymous && !data.ShowAll { + continue + } + info, err := buildSessionInfoRtnData(ctx, dbDaemon) + if err != nil { + return nil, err + } + rtn = append(rtn, *info) + } + sort.Slice(rtn, func(i, j int) bool { + ai := rtn[i].LastActiveAt + aj := rtn[j].LastActiveAt + if ai != aj { + return ai > aj + } + return rtn[i].CreatedAt > rtn[j].CreatedAt + }) + return rtn, nil +} + +func (ws *WshServer) SessionAttachCommand(ctx context.Context, data wshrpc.CommandSessionAttachData) error { + if data.CurrentDaemonId != "" && data.CurrentDaemonId == data.DaemonId { + return nil + } + + if data.CurrentDaemonId != "" { + sessiondaemon.Manager.DetachBlock(ctx, data.CurrentDaemonId, data.BlockId) + } + + dbDaemon, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, data.DaemonId) + if err != nil { + if data.CurrentDaemonId != "" { + sessiondaemon.Manager.AttachBlock(ctx, data.CurrentDaemonId, data.BlockId) + } + return fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) + } + + blockData, err := wstore.DBMustGet[*waveobj.Block](ctx, data.BlockId) + if err == nil { + blockConn := blockData.Meta.GetString(waveobj.MetaKey_Connection, "") + if blockConn != "" && blockConn != dbDaemon.Connection { + log.Printf("[sessiondaemon] SessionAttach: block=%s conn=%q daemon conn=%q mismatch, refusing", + data.BlockId, blockConn, dbDaemon.Connection) + return fmt.Errorf("cannot attach to session on connection %q from connection %q", dbDaemon.Connection, blockConn) + } + } + + _, err = sessiondaemon.Manager.GetOrCreate(ctx, dbDaemon) + if err != nil { + return fmt.Errorf("create session daemon in manager: %w", err) + } + + sessiondaemon.Manager.AttachBlock(ctx, data.DaemonId, data.BlockId) + + err = wstore.DBUpdateFn(ctx, data.BlockId, func(block *waveobj.Block) { + block.Meta[waveobj.MetaKey_SessionDaemonId] = data.DaemonId + delete(block.Meta, blockcontroller.MetaKey_SessionNoAutoCreate) + block.JobId = dbDaemon.JobId + }) + + if err != nil { + sessiondaemon.Manager.DetachBlock(ctx, data.DaemonId, data.BlockId) + if data.CurrentDaemonId != "" { + sessiondaemon.Manager.AttachBlock(ctx, data.CurrentDaemonId, data.BlockId) + } + return fmt.Errorf("update block meta: %w", err) + } + + resyncBlockController(ctx, data.BlockId) + wcore.SendWaveObjUpdate(waveobj.MakeORef(waveobj.OType_Block, data.BlockId)) + return nil +} + +func (ws *WshServer) SessionDetachCommand(ctx context.Context, data wshrpc.CommandSessionDetachData) error { + _, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, data.DaemonId) + if err != nil { + return fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) + } + + blockIds := []string{} + if data.BlockId != "" { + blockIds = append(blockIds, data.BlockId) + } else { + blockIds = sessiondaemon.Manager.GetBlocksForDaemon(data.DaemonId) + } + + for _, blockId := range blockIds { + sessiondaemon.Manager.DetachBlock(ctx, data.DaemonId, blockId) + err = wstore.DBUpdateFn(ctx, blockId, func(block *waveobj.Block) { + delete(block.Meta, waveobj.MetaKey_SessionDaemonId) + block.Meta[blockcontroller.MetaKey_SessionNoAutoCreate] = true + }) + if err != nil { + return fmt.Errorf("update block meta: %w", err) + } + resyncBlockController(ctx, blockId) + wcore.SendWaveObjUpdate(waveobj.MakeORef(waveobj.OType_Block, blockId)) + } + return nil +} + +func (ws *WshServer) SessionInfoCommand(ctx context.Context, data wshrpc.CommandSessionInfoData) (*wshrpc.SessionInfoRtnData, error) { + dbDaemon, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, data.DaemonId) + if err != nil { + return nil, fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) + } + if dbDaemon.JobId == "" { + if memJobId := sessiondaemon.Manager.GetMemJobId(dbDaemon.OID); memJobId != "" { + dbDaemon.JobId = memJobId + } + } + return buildSessionInfoRtnData(ctx, dbDaemon) +} + +func (ws *WshServer) SessionTagCommand(ctx context.Context, data wshrpc.CommandSessionTagData) error { + _, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, data.DaemonId) + if err != nil { + return fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) + } + return sessiondaemon.Manager.Rename(ctx, data.DaemonId, data.Name) +} + +func (ws *WshServer) RecordSessionActivityCommand(ctx context.Context, data wshrpc.CommandRecordSessionActivityData) error { + return sessiondaemon.Manager.RecordActivity(ctx, data.DaemonId) +} + +func buildSessionInfoRtnData(ctx context.Context, dbDaemon *waveobj.SessionDaemon) (*wshrpc.SessionInfoRtnData, error) { + if dbDaemon == nil { + return nil, fmt.Errorf("session daemon is nil") + } + blocks := sessiondaemon.Manager.GetBlocksForDaemon(dbDaemon.OID) + return &wshrpc.SessionInfoRtnData{ + DaemonId: dbDaemon.OID, + Name: dbDaemon.Name, + Connection: dbDaemon.Connection, + JobId: dbDaemon.JobId, + IsAnonymous: dbDaemon.IsAnonymous, + Status: dbDaemon.Status, + Cwd: dbDaemon.Cwd, + CreatedAt: dbDaemon.CreatedAt, + IdleTimeout: dbDaemon.IdleTimeout, + IdleSince: dbDaemon.IdleSince, + LastActiveAt: dbDaemon.LastActiveAt, + Blocks: blocks, + }, nil +} + +func resyncBlockController(ctx context.Context, blockId string) { + tabs, err := wstore.DBGetAllObjsByType[*waveobj.Tab](ctx, waveobj.OType_Tab) + if err != nil { + log.Printf("[sessiondaemon] warning: error getting tabs for resync: %v", err) + return + } + for _, tab := range tabs { + for _, bid := range tab.BlockIds { + if bid == blockId { + blockcontroller.ResyncController(ctx, tab.OID, blockId, nil, true) + return + } + } + } +} From e89da05661bb56ba706731fd6eea7d4ed305f81e Mon Sep 17 00:00:00 2001 From: lyx-tec Date: Tue, 23 Jun 2026 20:18:35 +0800 Subject: [PATCH 35/36] Fix session popup connection count --- frontend/app/block/session-daemon-popup.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/app/block/session-daemon-popup.tsx b/frontend/app/block/session-daemon-popup.tsx index 3daa92302a..80bb889cf9 100644 --- a/frontend/app/block/session-daemon-popup.tsx +++ b/frontend/app/block/session-daemon-popup.tsx @@ -61,7 +61,7 @@ export function SessionDaemonPopup({ state }: SessionDaemonPopupProps) { padding: "1px 7px", }} > - {state.sessions.length} + {state.sameConnSessions.length}
From cb65478b4eb8b927a6f3b1bc9e71e36fd7c210fa Mon Sep 17 00:00:00 2001 From: lyx-tec Date: Tue, 23 Jun 2026 21:49:49 +0800 Subject: [PATCH 36/36] fixed session switch --- frontend/app/view/term/termwrap.ts | 36 +++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/frontend/app/view/term/termwrap.ts b/frontend/app/view/term/termwrap.ts index 8136406fb9..54be322987 100644 --- a/frontend/app/view/term/termwrap.ts +++ b/frontend/app/view/term/termwrap.ts @@ -184,6 +184,7 @@ export class TermWrap { tabId: string; blockId: string; zoneId: string; + zoneLoadVersion: number; ptyOffset: number; dataBytesProcessed: number; terminal: Terminal; @@ -242,6 +243,7 @@ export class TermWrap { this.tabId = tabId; this.blockId = blockId; this.zoneId = blockId; + this.zoneLoadVersion = 0; this.sendDataHandler = waveOptions.sendDataHandler; this.nodeModel = waveOptions.nodeModel; this.ptyOffset = 0; @@ -438,12 +440,33 @@ export class TermWrap { return this.zoneId; } - async attachToDaemon(_jobId: string): Promise { - this.zoneId = this.blockId; + async switchZone(zoneId: string): Promise { + if (!zoneId || this.zoneId === zoneId) { + return; + } + this._mainFileSub?.unsubscribe(); + this._mainFileSub = null; + this.mainFileSubject?.release(); + + this.zoneId = zoneId; + this.zoneLoadVersion++; + this.ptyOffset = 0; + this.dataBytesProcessed = 0; + this.heldData = []; + this.syntheticAltScreenTracker.reset(); + this.terminal.clear(); + this.mainFileSubject = getFileSubject(this.getZoneId(), TermFileName); + this._mainFileSub = this.mainFileSubject.subscribe(this.handleNewFileSubjectData.bind(this)); + await this.loadInitialTerminalData(); + this.terminal.scrollToBottom(); + } + + async attachToDaemon(jobId: string): Promise { + await this.switchZone(jobId); } async detachFromDaemon(): Promise { - this.zoneId = this.blockId; + await this.switchZone(this.blockId); } setCursorStyle(cursorStyle: string) { @@ -653,8 +676,12 @@ export class TermWrap { async loadInitialTerminalData(): Promise { const startTs = Date.now(); const zoneId = this.getZoneId(); + const zoneLoadVersion = this.zoneLoadVersion; const { data: cacheData, fileInfo: cacheFile } = await fetchWaveFile(zoneId, TermCacheFileName); let ptyOffset = 0; + if (zoneId !== this.getZoneId() || zoneLoadVersion !== this.zoneLoadVersion) { + return; + } if (cacheFile != null) { ptyOffset = cacheFile.meta["ptyoffset"] ?? 0; if (cacheData.byteLength > 0) { @@ -676,6 +703,9 @@ export class TermWrap { } } const { data: mainData, fileInfo: mainFile } = await fetchWaveFile(zoneId, TermFileName, ptyOffset); + if (zoneId !== this.getZoneId() || zoneLoadVersion !== this.zoneLoadVersion) { + return; + } console.log( `terminal loaded cachefile:${cacheData?.byteLength ?? 0} main:${mainData?.byteLength ?? 0} bytes, ${Date.now() - startTs}ms` );