diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000000..ea0daa9425 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,18 @@ +@.kilocode/rules/rules.md + +--- + +## Skill Guides + +This project uses a set of "skill" guides — focused how-to documents for common implementation tasks. When your task matches one of the descriptions below, **read the linked SKILL.md file before proceeding** and follow its instructions precisely. + +| Skill | File | Description | +| ------------ | ---------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| add-config | `.kilocode/skills/add-config/SKILL.md` | Guide for adding new configuration settings to Wave Terminal. Use when adding a new setting to the configuration system, implementing a new config key, or adding user-customizable settings. | +| add-rpc | `.kilocode/skills/add-rpc/SKILL.md` | Guide for adding new RPC calls to Wave Terminal. Use when implementing new RPC commands, adding server-client communication methods, or extending the RPC interface with new functionality. | +| add-wshcmd | `.kilocode/skills/add-wshcmd/SKILL.md` | Guide for adding new wsh commands to Wave Terminal. Use when implementing new CLI commands, adding command-line functionality, or extending the wsh command interface. | +| context-menu | `.kilocode/skills/context-menu/SKILL.md` | Guide for creating and displaying context menus in Wave Terminal. Use when implementing right-click menus, adding context menu items, creating submenus, or handling menu interactions with checkboxes and separators. | +| create-view | `.kilocode/skills/create-view/SKILL.md` | Guide for implementing a new view type in Wave Terminal. Use when creating a new view component, implementing the ViewModel interface, registering a new view type in BlockRegistry, or adding a new content type to display within blocks. | +| electron-api | `.kilocode/skills/electron-api/SKILL.md` | Guide for adding new Electron APIs to Wave Terminal. Use when implementing new frontend-to-electron communications via preload/IPC. | +| waveenv | `.kilocode/skills/waveenv/SKILL.md` | Guide for creating WaveEnv narrowings in Wave Terminal. Use when writing a named subset type of WaveEnv for a component tree, documenting environmental dependencies, or enabling mock environments for preview/test server usage. | +| wps-events | `.kilocode/skills/wps-events/SKILL.md` | Guide for working with Wave Terminal's WPS (Wave PubSub) event system. Use when implementing new event types, publishing events, subscribing to events, or adding asynchronous communication between components. | diff --git a/Taskfile.yml b/Taskfile.yml index bf37a83e45..56937f1d58 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -124,12 +124,11 @@ tasks: package: desc: Package the application for the current platform. cmds: + - task: clean + - task: npm:install + - task: build:backend + - task: build:tsunamiscaffold - npm run build:prod && npm exec electron-builder -- -c electron-builder.config.cjs -p never {{.CLI_ARGS}} - deps: - - clean - - npm:install - - build:backend - - build:tsunamiscaffold build:frontend:dev: desc: Build the frontend in development mode. diff --git a/cmd/server/main-server.go b/cmd/server/main-server.go index b204643ee8..cd403a7620 100644 --- a/cmd/server/main-server.go +++ b/cmd/server/main-server.go @@ -25,6 +25,7 @@ import ( "github.com/wavetermdev/waveterm/pkg/remote/conncontroller" "github.com/wavetermdev/waveterm/pkg/remote/fileshare/wshfs" "github.com/wavetermdev/waveterm/pkg/secretstore" + "github.com/wavetermdev/waveterm/pkg/sessiondaemon" "github.com/wavetermdev/waveterm/pkg/service" "github.com/wavetermdev/waveterm/pkg/telemetry" "github.com/wavetermdev/waveterm/pkg/telemetry/telemetrydata" @@ -525,6 +526,10 @@ func main() { log.Printf("error initializing wstore: %v\n", err) return } + err = wstore.RunSessionDaemonMigration(context.Background()) + if err != nil { + log.Printf("error running session daemon migration: %v\n", err) + } panichandler.PanicTelemetryHandler = panicTelemetryHandler go func() { defer func() { @@ -554,6 +559,13 @@ func main() { return } + ctx := context.Background() + err = sessiondaemon.Manager.InitFromDB(ctx) + if err != nil { + log.Printf("error initializing session daemon manager: %v\n", err) + } + sessiondaemon.Manager.StartIdleReaper(ctx) + err = shellutil.FixupWaveZshHistory() if err != nil { log.Printf("error fixing up wave zsh history: %v\n", err) diff --git a/db/migrations-wstore/000012_sessiondaemon.down.sql b/db/migrations-wstore/000012_sessiondaemon.down.sql new file mode 100644 index 0000000000..83eff15470 --- /dev/null +++ b/db/migrations-wstore/000012_sessiondaemon.down.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS db_sessiondaemon; diff --git a/db/migrations-wstore/000012_sessiondaemon.up.sql b/db/migrations-wstore/000012_sessiondaemon.up.sql new file mode 100644 index 0000000000..6912568c6d --- /dev/null +++ b/db/migrations-wstore/000012_sessiondaemon.up.sql @@ -0,0 +1,5 @@ +CREATE TABLE IF NOT EXISTS db_sessiondaemon ( + oid varchar(36) PRIMARY KEY, + version int NOT NULL, + data json NOT NULL +); diff --git a/docs/design/session-daemon-design-v1.md b/docs/design/session-daemon-design-v1.md new file mode 100644 index 0000000000..160e0bf77b --- /dev/null +++ b/docs/design/session-daemon-design-v1.md @@ -0,0 +1,483 @@ +# Session Daemon — Design Document V1 + +## 1. Overview + +Session Daemon 是一个持久的远程终端 session 模型。与当前 "一个 block 对应一个远程 job" 的 1:1 架构不同,Session Daemon 将**远端连接**与**block 视图**解耦。Session Daemon 独立于任何 block 存在,多个 block 可以 attach/detach 到同一个 daemon,所有 block 共享同一份 raw 输出数据,各自独立渲染。 + +**核心目标**:持久化(跨重启保持)、多视图镜像、所有 block 均可输入。 + +## 2. Architecture + +``` +┌── Local WaveTerm ──────────────────────────────────────────────────┐ +│ │ +│ ┌── SessionDaemon ────────────────────────────────────────────┐ │ +│ │ id: "sd-abc" │ │ +│ │ name: "dev" │ │ +│ │ jobId: "job-xyz" │ │ +│ │ connName: "ssh:user@host" │ │ +│ │ status: "running" │ │ +│ │ │ │ +│ │ InputSessionId: "uuid-X" │ │ +│ │ seqNum: 42 │ │ +│ │ │ │ +│ │ StreamReader ──▶ runOutputLoop() ──▶ job:job-xyz/term │ │ +│ │ │ │ +│ │ AttachedBlocks: [block-A, block-B] │ │ +│ └──────────────────────────────┬───────────────────────────────┘ │ +│ │ │ +│ ┌────────────┴────────────┐ │ +│ │ │ │ +│ Block-A Block-B │ +│ ┌──────────────────┐ ┌──────────────────┐ │ +│ │ view: "term" │ │ view: "term" │ │ +│ │ meta: │ │ meta: │ │ +│ │ daemonid:sd-abc │ │ daemonid:sd-abc │ │ +│ │ │ │ │ │ +│ │ read job file │ │ read job file │ │ +│ │ sendInput ▶ D │ │ sendInput ▶ D │ │ +│ └──────────────────┘ └──────────────────┘ │ +│ │ │ │ +│ └────────┬────────────────┘ │ +│ ▼ │ +│ SessionDaemon.SendInput() │ +│ │ │ +│ ▼ │ +│ jobcontroller.SendInput() │ +│ │ │ +│ ▼ │ +│ Remote JobManager(job-xyz) │ +│ (single attached client, 不改动) │ +│ │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +## 3. Data Model + +### 3.1 SessionDaemon(DB 持久化) + +``` +SessionDaemon { + OID: string // "sd-abc",内部标识 + Name: string // "dev",用户别名,可选 + Connection: string // "ssh:user@host" + JobId: string // "job-xyz" + Status: string // "init" | "running" | "disconnected" | "done" + Cwd: string // 创建时的 CWD + CreatedAt: int64 + IdleTimeout: int64 // 超时回收(秒),默认 86400(24h) + Meta: MetaMapType +} +``` + +- **Name 唯一性**:全局唯一。创建时若冲突,自动追加时间后缀(`dev` → `dev-150623`),并提示用户实际名称。 +- **空闲回收**:无 block attach 超过 `IdleTimeout`(默认 24h)后自动回收(`TerminateAndDetachJob` + status=done)。 + +### 3.2 Status 状态机 + +``` + ┌─────────────────────────────────────────────────┐ + │ │ + ▼ │ + ┌──────┐ StartJob成功 ┌─────────┐ SSH断开 ┌───────────────┐ + │ init │ ───────────────▶ │ running │ ──────────▶ │ disconnected │ + └──┬───┘ └────┬─────┘ └───────┬───────┘ + │ │ │ + │ StartJob失败 │ 用户删除 (ssh在线) │ 用户删除、idle timeout + │ │ 或 shell退出 │ 或 shell退出 + ▼ ▼ │ + ┌──────┐ ┌──────┐ │ + │ done │ │ done │ │ + └──────┘ └──────┘ │ + ▲ │ + └────────────────────────────────────────────────────┘ +``` + +| 状态 | 含义 | 前端 block 显示 | +|------|------|----------------| +| `init` | 正在创建,job 尚未启动 | "Starting..." | +| `running` | SSH 在线,远端 shell 运行中 | 正常终端 | +| `disconnected` | SSH 断开但远端 shell 仍存活 | "Reconnecting..." | +| `done` | 已终止 | "Session Ended" | + +### 3.3 Block(扩展) + +``` +Block { + ... // 现有字段不变 + Meta: { + ... + "session:daemonid": string // 新增。为空 = 未 attach + } +} +``` + +### 3.4 Job(不变) + +现有结构完全保留。`AttachedBlockId` 仍为单值,指向 SessionDaemon(不直接指向 block)。 + +### 3.5 DurableShellController 移除 + +SessionDaemon 完全取代旧的 `DurableShellController`: + +- 移除 `pkg/blockcontroller/durableshellcontroller.go` +- 移除 `ResyncController` 中的 `DurableShellController` 分支 +- `IsBlockIdTermDurable` 不再需要 +- `handleAppendJobFile` 不再同时写 `block:blockId/term`,只写 `job:jobId/term` + +## 4. Backend Design + +### 4.1 Controller 调度(ResyncController) + +dispatch 只取决于 block 是否 attach 到 daemon,与 connection 无关: + +``` +if block.Meta["session:daemonid"] != "" { + → SessionDaemonController // 桥接到 daemon,无进程 +} else if controllerName == "shell" || controllerName == "cmd" { + → ShellController // 本地 shell +} else if controllerName == "tsunami" { + → TsunamiController +} +``` + +block 的 `connection` meta 在未 attach 时仅作为创建/attach daemon 时的提示信息,不影响 controller 类型。远端会话的概念完全由 SessionDaemon 承载。 + +block 三态: + +``` + create / detach + ShellController ◄─────────────────► SessionDaemonController + (本地 shell) (桥接到 daemon) + │ │ + │ block 创建时默认 │ attach + │ (或 detach 后恢复) │ + │ ▼ + └── 没有 attach 时跑本地 shell session 输出实时显示 + 行为与现有非 durable block 一致 所有 block 可输入 +``` + +### 4.2 SessionDaemonController + +``` +SessionDaemonController { + BlockId: string + ConnName: string + DaemonId: string +} + +Start(): + → SessionDaemonManager.AttachBlock(daemonId, blockId) + → 返回 daemon.JobId(前端据此读文件) + → 发 controllerstatus 事件 + +SendInput(input): + → SessionDaemonManager.SendInput(daemonId, input.InputData) + → 若 input.TermSize 非空,更新 daemon 的 PTY 尺寸 + (多个 block resize 时最后一个生效) + +Stop(): + → SessionDaemonManager.DetachBlock(daemonId, blockId) + +GetRuntimeStatus(): + → 返回 daemon 的连接状态 (running/disconnected/done) +``` + +### 4.3 职责分层 + +``` +SessionDaemon jobcontroller Remote +────────────── ───────────── ────── + 生命周期编排 原子操作 远端执行 + Start / Reconnect / StartJob() RemoteStartJob + Stop ReconnectJob() RemoteReconnect + runOutputLoop goroutine SendInput() RemoteTerminate + SendInput (入口) TerminateJob() + AttachBlock / DetachBlock RunOutputLoop() ← 函数保留,goroutine 由 daemon 启动 + handleAppendJobFile() +``` + +### 4.4 runOutputLoop 归属 + +当前 `RunOutputLoop` goroutine 由 `StartJob()` 和 `restartStreaming()` 内部启动。改为 **SessionDaemon 启动 goroutine,jobcontroller 提供函数**。 + +`StartJob()` 和 `restartStreaming()` 内部移除 `go runOutputLoop(...)`,改为返回 `(reader, streamMeta)`: + +```go +// SessionDaemon 组装生命周期 +func (sd *SessionDaemon) Start(ctx) error { + jobId, reader, streamMeta, err := jobcontroller.StartJob(ctx, params) + sd.jobId = jobId + go jobcontroller.RunOutputLoop(ctx, jobId, streamMeta.Id, reader) + return nil +} + +func (sd *SessionDaemon) Reconnect(ctx) error { + reader, streamMeta, err := jobcontroller.ReconnectJob(ctx, sd.jobId, rtOpts) + // jobStreamIds 已更新,旧 RunOutputLoop 因 currentStreamId != streamId 自动退出 + go jobcontroller.RunOutputLoop(ctx, sd.jobId, streamMeta.Id, reader) + return nil +} +``` + +`RunOutputLoop` 代码本身不动——自毁逻辑 `currentStreamId != streamId → break` 直接复用。 + +### 4.5 SessionDaemonManager(全局 in-memory) + +``` +SessionDaemonManager { + daemons: map[daemonId] *SessionDaemon + + // daemon 操作 + GetOrCreate(params) → (*SessionDaemon, error) + Get(daemonId) → (*SessionDaemon, error) + Remove(daemonId) + InitFromDB() // 启动时恢复所有 running daemon + + // block 操作 + AttachBlock(daemonId, blockId) + DetachBlock(daemonId, blockId) + GetBlocksForDaemon(daemonId) → []blockId + + // 输入 + SendInput(daemonId, data []byte) → error +} + +SessionDaemon (每个 daemon 一个实例) { + daemonId string + jobId string + InputSessionId string + seqNum int + reader *streamclient.Reader + cancel context.CancelFunc // 终止 runOutputLoop + blocks map[blockId] bool // attached blocks + + Start() // StartJob + runOutputLoop + Reconnect() // ReconnectJob + runOutputLoop + Stop(reason) // cancel loop, TerminateJob, notify blocks + Shutdown() // 进程退出时优雅断开 + + GetJobId() → string + GetStatus() → connected | disconnected | done +} +``` + +## 5. Data Flow + +### 5.1 Output(只写 job 文件) + +当前 `handleAppendJobFile` 同时写 `job:jobId/term` 和 `block:blockId/term`。 +改为只写 `job:jobId/term`,所有 block 读同一份 raw 数据: + +``` +runOutputLoop(job-xyz) + │ + ▼ +handleAppendJobFile(jobId, "term", data) + │ + ├── doWFSAppend(job:jobId, "term", data) + └── WPS Publish "blockfile" scope=job:{jobId} +``` + +前端 TermWrap 根据 block 的 daemonId 找到 JobId,以 `jobId` 作为 zoneId 读取。 + +### 5.2 Input(单路复用) + +所有 attached block 的输入汇聚到同一个 SessionDaemon,使用同一 `InputSessionId`: + +``` +Block-A.sendInput("ls\n") Block-B.sendInput("grep\n") + │ │ + └────────────┬────────────────┘ + ▼ + SessionDaemon.SendInput() + │ + InputSessionId: uuid-X, seqNum: ++ + │ + ▼ + jobcontroller.SendInput() + │ + ▼ + Remote JobManager.InputQueue + (QuickReorderQueue, 按 sessionId 排序) +``` + +远程 JobManager 不改动——它仍只看到一个 attachedClient,一条输入流。 + +## 6. Lifecycle + +### 6.1 创建 + +``` +wsh session create --name "dev" --connection ssh:user@host + │ + ├── DB: Insert SessionDaemon{status:"init"} + ├── StartRemoteShellJob() → job-xyz + ├── DB: Update SessionDaemon{status:"running", jobId:"job-xyz"} + ├── 注册到 SessionDaemonManager,启动 runOutputLoop + └── ✅ Daemon 存活,AttachedBlocks:[](无 block 连接) +``` + +### 6.2 Attach + +``` +wsh session attach dev --block block-A + │ + ├── Block.Meta["session:daemonid"] = "sd-abc" + ├── SessionDaemonManager.AttachBlock("sd-abc", "block-A") + ├── 前端 TermWrap.attachToDaemon(jobId) + │ ├── unsubscribe WPS blockfile scope=block:{blockId} + │ ├── subscribe WPS blockfile scope=job:{jobId} + │ └── loadInitialTerminalData(jobId) // raw data,全量历史 + └── ✅ Block 显示 session 输出,可以输入 +``` + +### 6.3 Detach + +``` +wsh session detach --block block-A + │ + ├── 清除 Block.Meta["session:daemonid"] + ├── SessionDaemonManager.DetachBlock("sd-abc", "block-A") + ├── ControllerResync → 重建 ShellController + │ └── ShellController.Start() → 启动本地 shell + ├── 前端 TermWrap.detachFromDaemon() + │ ├── unsubscribe WPS blockfile scope=job:{jobId} + │ ├── subscribe WPS blockfile scope=block:{blockId} + │ └── loadInitialTerminalData(blockId) + └── ✅ Block 恢复为本地终端,daemon 继续运行 +``` + +### 6.4 删除 + +``` +wsh session delete dev + │ + ├── TerminateAndDetachJob(job-xyz) + ├── 遍历 AttachedBlocks: + │ Block.Meta["session:daemonid"] = "" + │ 通知前端 → 显示 "Session Ended" + ├── DB: SessionDaemon{status:"done"} + └── ✅ 从 SessionDaemonManager 移除 +``` + +### 6.5 WaveTerm 重启恢复 + +``` +WaveTerm 重启 + │ + ├── SessionDaemonManager.InitFromDB() + │ + ├── for each daemon (status = running | disconnected): + │ 1. 创建内存 daemon 对象 + │ 2. ReconnectJob(jobId) → 重连远端 JobManager + │ ├── PrepareConnect(seq = job/term 当前大小) + │ ├── 新 StreamReader + 新 runOutputLoop + │ └── 新 InputSessionId + │ + └── 有 daemonid 的 block 在渲染时自动读取 job 文件 + 显示 "reconnecting..." → 重连完成后正常显示 +``` + +### 6.6 远端意外终止 + +``` +远端 shell 退出 / 机器重启 + │ + ├── 本地 StreamReader 读到 EOF/error + ├── runOutputLoop 退出 + ├── DB: SessionDaemon{status:"done"} + └── 通知所有 attached block → 显示 "Session Ended" +``` + +## 7. Migration(一次性,启动时执行) + +### 7.1 旧模型 + +``` +Block { JobId: "job-xyz", Meta: { "term:durable": true } } +Job { OID: "job-xyz", AttachedBlockId: "block-A" } +``` + +输出同时写 `job:job-xyz/term` 和 `block:block-A/term`。 + +### 7.2 迁移目标 + +``` +Block { Meta: { "session:daemonid": "sd-abc" }, JobId: "" } +SessionDaemon { OID: "sd-abc", JobId: "job-xyz" } +Job { OID: "job-xyz", AttachedBlockId: "" } +``` + +### 7.3 流程 + +``` +WaveTerm 启动,SchemaVersion 检测到需要迁移 + │ + └── 扫描 DB 中所有 Block.JobId != "" 的记录 + │ + for each block: + ├── 创建 SessionDaemon 记录 + │ OID: uuid.new("sd-*") + │ Name: 自动生成("ssh:user@host:timestamp") + │ JobId: block.JobId(复用) + │ Status: 根据 Job.JobManagerStatus 映射 + │ Connection: block.Meta["connection"] + │ + ├── Block: Meta["session:daemonid"] = daemonId, JobId = "" + ├── Job: AttachedBlockId = "" + │ + └── 输出连续性:将 block:blockId/term 内容追加到 job:jobId/term + 完成后删除 block:blockId/term + │ + └── 迁移完成,更新 SchemaVersion +``` + +### 7.4 不兼容警告 + +- 迁移**不可逆**。回退后旧版本无法识别这些 block。 +- 迁移前建议备份 DB。 + +## 8. WSH Commands + +``` +wsh session create --name --connection # 创建 daemon +wsh session delete # 删除 daemon +wsh session list # 列出所有 daemon +wsh session attach --block # block 加入 daemon +wsh session detach --block # block 离开 daemon +wsh session info # daemon 详情 +``` + +## 9. Frontend + +### 9.1 Block 状态显示 + +| 状态 | Header 显示 | 内容区 | +|------|------------|--------| +| No Session | 无 daemon 标识 | 本地 shell | +| Attached (running) | `dev ●` (绿) | session 输出 | +| Attached (disconnected) | `dev ◌` (黄) | "Reconnecting..." | +| Session Ended | `dev ✗` (灰) | "Session Ended" | + +### 9.2 Attach/Detach 入口 + +- Block header 下拉菜单 +- 右键菜单 +- 命令面板 + +### 9.3 TermWrap 切换 zoneId + +当前 TermWrap 构造时绑定 `blockId` 作为 zoneId,从 `block:{blockId}/term` 读取。attach/detach 时动态切换数据源: + +``` +TermWrap.attachToDaemon(jobId): + 1. unsubscribe WPS blockfile scope=block:{blockId} + 2. subscribe WPS blockfile scope=job:{jobId} + 3. loadInitialTerminalData(jobId) // raw data,全量历史 + +TermWrap.detachFromDaemon(): + 1. unsubscribe WPS blockfile scope=job:{jobId} + 2. subscribe WPS blockfile scope=block:{blockId} + 3. loadInitialTerminalData(blockId) // 本地 shell +``` diff --git a/docs/design/session-daemon-design-v2.md b/docs/design/session-daemon-design-v2.md new file mode 100644 index 0000000000..4beaa5cfe6 --- /dev/null +++ b/docs/design/session-daemon-design-v2.md @@ -0,0 +1,502 @@ +# Session Daemon — Design Document V2 + +## 1. Overview + +Session Daemon 是一个持久的远程终端 session 模型。与当前 "一个 block 对应一个远程 job" 的 1:1 架构不同,Session Daemon 将**远端连接**与**block 视图**解耦。Session Daemon 独立于任何 block 存在,多个 block 可以 attach/detach 到同一个 daemon,所有 block 共享同一份 raw 输出数据,各自独立渲染。 + +**核心目标**:持久化(跨重启保持)、多视图镜像、所有 block 均可输入。 + +### 与 V1 的关键差异 + +V2 在 V1 的基础上做了简化:**runOutputLoop 不动,留在 JobController 内部**。SessionDaemon 只做命名、多 block 追踪、空闲超时,不管理 PTY、不管理输出流。详见 4.4 节。 + +## 2. Architecture + +``` +┌── Local WaveTerm ──────────────────────────────────────────────────┐ +│ │ +│ ┌── SessionDaemon ────────────────────────────────────────────┐ │ +│ │ id: "sd-abc" name: "dev" │ │ +│ │ jobId: "job-xyz" │ │ +│ │ connName: "ssh:user@host" │ │ +│ │ status: "running" │ │ +│ │ │ │ +│ │ AttachedBlocks: [block-A, block-B] │ │ +│ │ │ │ +│ │ ─── 不管理 PTY、不管理输出流 ─── │ │ +│ │ runOutputLoop → JobController 内部管理 │ │ +│ │ InputSessionId → SessionDaemon 持有,用于输入排序 │ │ +│ └──────────────────────────────┬───────────────────────────────┘ │ +│ │ │ +│ ┌────────────┴────────────┐ │ +│ │ │ │ +│ Block-A Block-B │ +│ ┌──────────────────┐ ┌──────────────────┐ │ +│ │ view: "term" │ │ view: "term" │ │ +│ │ meta: │ │ meta: │ │ +│ │ daemonid:sd-abc │ │ daemonid:sd-abc │ │ +│ │ │ │ │ │ +│ │ read job file │ │ read job file │ │ +│ │ sendInput ▶ D │ │ sendInput ▶ D │ │ +│ └──────────────────┘ └──────────────────┘ │ +│ │ │ │ +│ └────────┬────────────────┘ │ +│ ▼ │ +│ SessionDaemon.SendInput() │ +│ │ │ +│ ▼ │ +│ jobcontroller.SendInput() │ +│ │ │ +│ ▼ │ +│ Remote JobManager(job-xyz) │ +│ (single attached client, 不改动) │ +│ │ +│ ┌────────── JobController ───────────────────────────────┐ │ +│ │ runOutputLoop (goroutine, 内部管理) │ │ +│ │ StartJob() → 启动 runOutputLoop │ │ +│ │ ReconnectJob() → 新流取代旧流 (自毁机制) │ │ +│ │ SendInput() / TerminateJob() / connReconcileWorker │ │ +│ └─────────────────────────────────────────────────────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +## 3. Data Model + +### 3.1 SessionDaemon(DB 持久化) + +``` +SessionDaemon { + OID: string // "sd-abc",内部标识 + Name: string // "dev",用户别名。空 = 匿名 daemon + Connection: string // "ssh:user@host" + JobId: string // "job-xyz" + IsAnonymous: bool // true = 自动创建,无 name + Status: string // "init" | "running" | "disconnected" | "done" + Cwd: string // 创建时的 CWD + CreatedAt: int64 + IdleTimeout: int64 // 超时回收(秒) + Meta: MetaMapType +} +``` + +- **命名 daemon**:通过 `wsh session create --name dev` 创建,`Name` 全局唯一。冲突时自动追加时间后缀(`dev` → `dev-150623`)。 +- **匿名 daemon**:SSH block 启动时自动创建,`Name=""`,`IsAnonymous=true`。 +- **空闲回收**:无 block attach 超过 `IdleTimeout` 后自动回收。默认值按类型区分: + - 匿名 daemon:**1h**(`3600` 秒) + - 命名 daemon:**24h**(`86400` 秒) + +### 3.2 Status 状态机 + +``` + ┌─────────────────────────────────────────────────┐ + │ │ + ▼ │ + ┌──────┐ StartJob成功 ┌─────────┐ SSH断开 ┌───────────────┐ + │ init │ ───────────────▶ │ running │ ──────────▶ │ disconnected │ + └──┬───┘ └────┬─────┘ └───────┬───────┘ + │ │ │ + │ StartJob失败 │ 用户删除 (ssh在线) │ 用户删除、idle timeout + │ │ 或 shell退出 │ 或 shell退出 + ▼ ▼ │ + ┌──────┐ ┌──────┐ │ + │ done │ │ done │ │ + └──────┘ └──────┘ │ + ▲ │ + └────────────────────────────────────────────────────┘ +``` + +| 状态 | 含义 | 前端 block 显示 | +|------|------|----------------| +| `init` | 正在创建,job 尚未启动 | "Starting..." | +| `running` | SSH 在线,远端 shell 运行中 | 正常终端 | +| `disconnected` | SSH 断开但远端 shell 仍存活 | "Reconnecting..." | +| `done` | 已终止 | "Session Ended" | + +### 3.3 Block(扩展) + +``` +Block { + ... // 现有字段不变 + Meta: { + ... + "session:daemonid": string // 新增。为空 = 未 attach + } +} +``` + +### 3.4 Job(不变) + +现有结构完全保留。`Job.AttachedBlockId` 仍为单值,指向 daemon(不直接指向 block)。 + +### 3.5 DurableShellController 被 SessionDaemon 取代 + +SessionDaemon 覆盖了 DurableShellController 的全部职责,且支持多 block attach: + +- 移除 `pkg/blockcontroller/durableshellcontroller.go` +- 移除 `ResyncController` 中的 `DurableShellController` 分支 +- `IsBlockIdTermDurable` 不再需要 +- SSH block 启动时自动创建匿名 daemon,行为与之前一致(持久化、自动重连),同时获得多 block 共享能力 +- `handleAppendJobFile` 不再同时写 `block:blockId/term`,只写 `job:jobId/term` + +## 4. Backend Design + +### 4.1 Controller 调度(ResyncController) + +``` +if block.Meta["session:daemonid"] != "" { + → SessionDaemonController // 桥接到 daemon +} else if connType == SSH { + → 创建匿名 SessionDaemon + Block.Meta["session:daemonid"] = newDaemonId + ControllerResync(下一轮进入 SessionDaemonController) +} else { + → ShellController // 本地 / WSL +} +``` + +SSH block 启动时自动创建匿名 daemon(`IsAnonymous=true`,`IdleTimeout=1h`),后续交互全通过 `SessionDaemonController`。daemon 的创建对用户透明——用户打开 SSH block 的体验与之前一致。 + +只有当用户主动 `wsh session create --name` 时,才会产生命名 daemon。命名 daemon 可被多个 block attach,空闲超时 24h。 + +block 三态: + +``` + create / detach + ShellController ◄─────────────────► SessionDaemonController + (本地 shell) (桥接到 daemon) + │ │ + │ block 创建时默认 │ attach + │ (或 detach 后恢复) │ + │ ▼ + └── 没有 attach 时跑本地 shell session 输出实时显示 + 行为与现有非 durable block 一致 所有 block 可输入 +``` + +### 4.2 SessionDaemonController + +``` +SessionDaemonController { + BlockId: string + ConnName: string + DaemonId: string +} + +Start(): + → SessionDaemonManager.AttachBlock(daemonId, blockId) + → 返回 daemon.JobId(前端据此读文件) + → 发 controllerstatus 事件 + +SendInput(input): + → SessionDaemonManager.SendInput(daemonId, input.InputData) + → 若 input.TermSize 非空,更新 daemon 的 PTY 尺寸 + (多个 block resize 时最后一个生效) + +Stop(): + → SessionDaemonManager.DetachBlock(daemonId, blockId) + +GetRuntimeStatus(): + → 返回 daemon 的连接状态 (running/disconnected/done) +``` + +### 4.3 职责分层 + +``` +SessionDaemon jobcontroller Remote +────────────── ───────────── ────── + session 管理 原子操作 远端执行 + Name → jobId 映射 StartJob() RemoteStartJob + 多 block attach/detach ReconnectJob() RemoteReconnect + 空闲超时回收 SendInput() RemoteTerminate + wsh CLI 入口 TerminateJob() + runOutputLoop (goroutine) + connReconcileWorker + jobPruningWorker + handleAppendJobFile() +``` + +### 4.4 runOutputLoop 保持原位(与 V1 的关键差异) + +V1 提议将 runOutputLoop 从 `StartJob()` 内部迁入 SessionDaemon,使 daemon 获得输出流的生命周期控制权。V2 决定不迁移,理由如下: + +**1. 现有自毁机制足够** + +JobController 的 `RestartStreaming()` 在重连时创建新 StreamReader,新的 `runOutputLoop` 通过 `currentStreamId != streamId` 自毁检查自动取代旧 loop。SessionDaemon 无需感知或干预这个过程。 + +重连流程完全在 JobController 内部闭环: + +``` +ReconnectJob(jobId) + → PrepareConnect() → 新 StreamReader + 新 streamId + → go runOutputLoop(ctx, jobId, newStreamId, newReader) + → 每次循环检查 currentStreamId == streamId? + → 旧 runOutputLoop 检测到 streamId 不匹配 → break +``` + +**2. 避免破坏已有契约** + +`StartJob()` 当前返回 `(string, error)`,内部启动 goroutine。将 runOutputLoop 迁出需要改为返回 `(string, *streamclient.Reader, *StreamMeta, error)`,影响所有现有调用方。V2 认为不值得为这个改动破坏已有 API。 + +**3. 职责分离** + +SessionDaemon 关注"有哪些 block 在看我",不关注"字节流怎么读怎么写"。输出流的生命周期是 JobController 的内部实现细节,SessionDaemon 不需要知道 StreamReader 的存在。 + +### 4.5 SessionDaemonManager(全局 in-memory) + +``` +SessionDaemonManager { + daemons: map[daemonId] *SessionDaemon + + // daemon 操作 + GetOrCreate(params) → (*SessionDaemon, error) + Get(daemonId) → (*SessionDaemon, error) + Remove(daemonId) + InitFromDB() // 启动时恢复所有 running daemon + + // block 操作 + AttachBlock(daemonId, blockId) + DetachBlock(daemonId, blockId) + GetBlocksForDaemon(daemonId) → []blockId + + // 输入 + SendInput(daemonId, data []byte) → error +} + +SessionDaemon (每个 daemon 一个实例) { + daemonId string + name string + jobId string + InputSessionId string // 输入排序用,所有 attached block 共用 + seqNum int // 单调递增 + blocks map[blockId] bool + + // 不管理 PTY / reader / runOutputLoop + // 这些全部由 JobController 内部管理 + + Start() // → jobcontroller.StartJob(),委托 + Reconnect() // → jobcontroller.ReconnectJob() + Stop() // → jobcontroller.TerminateJob() + SendInput() // → jobcontroller.SendInput() + Status() // → jobcontroller.GetJobManagerStatus() +} +``` + +## 5. Data Flow + +### 5.1 Output(只写 job 文件) + +当前 `handleAppendJobFile` 同时写 `job:jobId/term` 和 `block:blockId/term`。 +改为只写 `job:jobId/term`,所有 block 读同一份 raw 数据: + +``` +runOutputLoop(job-xyz) ← JobController 内部管理 + │ + ▼ +handleAppendJobFile(jobId, "term", data) + │ + ├── doWFSAppend(job:jobId, "term", data) + └── WPS Publish "blockfile" scope=job:{jobId} +``` + +前端 TermWrap 根据 block 的 daemonId 找到 JobId,以 `jobId` 作为 zoneId 读取。 + +### 5.2 Input(单路复用) + +所有 attached block 的输入汇聚到同一个 SessionDaemon,使用同一 `InputSessionId`: + +``` +Block-A.sendInput("ls\n") Block-B.sendInput("grep\n") + │ │ + └────────────┬────────────────┘ + ▼ + SessionDaemon.SendInput() + │ + InputSessionId: uuid-X, seqNum: ++ + │ + ▼ + jobcontroller.SendInput() + │ + ▼ + Remote JobManager.InputQueue + (QuickReorderQueue, 按 sessionId 排序) +``` + +远程 JobManager 不改动——它仍只看到一个 attachedClient,一条输入流。 + +## 6. Lifecycle + +### 6.1 创建 + +``` +wsh session create --name "dev" --connection ssh:user@host + │ + ├── DB: Insert SessionDaemon{status:"init"} + ├── StartRemoteShellJob() → job-xyz + │ (JobController 内部启动 runOutputLoop) + ├── DB: Update SessionDaemon{status:"running", jobId:"job-xyz"} + ├── 注册到 SessionDaemonManager + └── ✅ Daemon 存活,AttachedBlocks:[](无 block 连接) +``` + +### 6.2 Attach + +``` +wsh session attach dev --block block-A + │ + ├── Block.Meta["session:daemonid"] = "sd-abc" + ├── SessionDaemonManager.AttachBlock("sd-abc", "block-A") + ├── 前端 TermWrap.attachToDaemon(jobId) + │ ├── unsubscribe WPS blockfile scope=block:{blockId} + │ ├── subscribe WPS blockfile scope=job:{jobId} + │ └── loadInitialTerminalData(jobId) // raw data,全量历史 + └── ✅ Block 显示 session 输出,可以输入 +``` + +### 6.3 Detach + +``` +wsh session detach --block block-A + │ + ├── 清除 Block.Meta["session:daemonid"] + ├── SessionDaemonManager.DetachBlock("sd-abc", "block-A") + ├── ControllerResync → 重建 ShellController + │ └── ShellController.Start() → 启动本地 shell + ├── 前端 TermWrap.detachFromDaemon() + │ ├── unsubscribe WPS blockfile scope=job:{jobId} + │ ├── subscribe WPS blockfile scope=block:{blockId} + │ └── loadInitialTerminalData(blockId) + └── ✅ Block 恢复为本地终端,daemon 继续运行 +``` + +### 6.4 删除 + +``` +wsh session delete dev + │ + ├── TerminateAndDetachJob(job-xyz) + │ (JobController 终止进程 + 停止 runOutputLoop) + ├── 遍历 AttachedBlocks: + │ Block.Meta["session:daemonid"] = "" + │ 通知前端 → 显示 "Session Ended" + ├── DB: SessionDaemon{status:"done"} + └── ✅ 从 SessionDaemonManager 移除 +``` + +### 6.5 WaveTerm 重启恢复 + +``` +WaveTerm 重启 + │ + ├── SessionDaemonManager.InitFromDB() + │ + ├── for each daemon (status = running | disconnected): + │ 1. 创建内存 daemon 对象 + │ 2. JobController.ReconnectJob(jobId) → 内部管理新 runOutputLoop + │ 3. 新 InputSessionId + │ + └── 有 daemonid 的 block 在渲染时自动读取 job 文件 + 显示 "reconnecting..." → 重连完成后正常显示 +``` + +### 6.6 远端意外终止 + +``` +远端 shell 退出 / 机器重启 + │ + ├── 本地 StreamReader 读到 EOF/error (JobController 内部) + ├── runOutputLoop 退出 (JobController 内部) + ├── DB: SessionDaemon{status:"done"} + └── 通知所有 attached block → 显示 "Session Ended" +``` + +## 7. Migration(一次性,启动时执行) + +### 7.1 旧模型 + +``` +Block { JobId: "job-xyz", Meta: { "term:durable": true } } +Job { OID: "job-xyz", AttachedBlockId: "block-A" } +``` + +输出同时写 `job:job-xyz/term` 和 `block:block-A/term`。 + +### 7.2 迁移目标 + +``` +Block { Meta: { "session:daemonid": "sd-abc" }, JobId: "" } +SessionDaemon { OID: "sd-abc", JobId: "job-xyz" } +Job { OID: "job-xyz", AttachedBlockId: "" } +``` + +### 7.3 流程 + +``` +WaveTerm 启动,SchemaVersion 检测到需要迁移 + │ + └── 扫描 DB 中所有 Block.JobId != "" 的记录 + │ + for each block: + ├── 创建 SessionDaemon 记录 + │ OID: uuid.new("sd-*") + │ Name: 自动生成("ssh:user@host:timestamp") + │ JobId: block.JobId(复用) + │ Status: 根据 Job.JobManagerStatus 映射 + │ Connection: block.Meta["connection"] + │ + ├── Block: Meta["session:daemonid"] = daemonId, JobId = "" + ├── Job: AttachedBlockId = "" + │ + └── 输出连续性:将 block:blockId/term 内容追加到 job:jobId/term + 完成后删除 block:blockId/term + │ + └── 迁移完成,更新 SchemaVersion +``` + +### 7.4 不兼容警告 + +- 迁移**不可逆**。回退后旧版本无法识别这些 block。 +- 迁移前建议备份 DB。 + +## 8. WSH Commands + +``` +wsh session create --name --connection # 创建 daemon +wsh session delete # 删除 daemon +wsh session list # 列出所有 daemon +wsh session attach --block # block 加入 daemon +wsh session detach --block # block 离开 daemon +wsh session info # daemon 详情 +``` + +## 9. Frontend + +### 9.1 Block 状态显示 + +| 状态 | Header 显示 | 内容区 | +|------|------------|--------| +| No Session | 无 daemon 标识 | 本地 shell | +| Attached (running) | `dev ●` (绿) | session 输出 | +| Attached (disconnected) | `dev ◌` (黄) | "Reconnecting..." | +| Session Ended | `dev ✗` (灰) | "Session Ended" | + +### 9.2 Attach/Detach 入口 + +- Block header 下拉菜单 +- 右键菜单 +- 命令面板 + +### 9.3 TermWrap 切换 zoneId + +当前 TermWrap 构造时绑定 `blockId` 作为 zoneId,从 `block:{blockId}/term` 读取。attach/detach 时动态切换数据源: + +``` +TermWrap.attachToDaemon(jobId): + 1. unsubscribe WPS blockfile scope=block:{blockId} + 2. subscribe WPS blockfile scope=job:{jobId} + 3. loadInitialTerminalData(jobId) // raw data,全量历史 + +TermWrap.detachFromDaemon(): + 1. unsubscribe WPS blockfile scope=job:{jobId} + 2. subscribe WPS blockfile scope=block:{blockId} + 3. loadInitialTerminalData(blockId) // 本地 shell +``` diff --git a/emain/emain-wavesrv.ts b/emain/emain-wavesrv.ts index f58d214a7e..dbd14bf4df 100644 --- a/emain/emain-wavesrv.ts +++ b/emain/emain-wavesrv.ts @@ -3,6 +3,8 @@ import * as electron from "electron"; import * as child_process from "node:child_process"; +import * as fs from "node:fs"; +import * as path from "node:path"; import * as readline from "readline"; import { WebServerEndpointVarName, WSServerEndpointVarName } from "../frontend/util/endpoints"; import { AuthKey, WaveAuthKeyEnv } from "./authkey"; diff --git a/frontend/app/block/blockenv.ts b/frontend/app/block/blockenv.ts index 8a529be11b..fa7cd65b58 100644 --- a/frontend/app/block/blockenv.ts +++ b/frontend/app/block/blockenv.ts @@ -46,6 +46,7 @@ export type BlockEnv = WaveEnvSubset<{ | "icon:color" | "frame:title" | "frame:icon" + | "session:daemonid" >; getTabMetaKeyAtom: MetaKeyAtomFnType<"bg:activebordercolor" | "bg:bordercolor" | "tab:background">; getConfigBackgroundAtom: WaveEnv["getConfigBackgroundAtom"]; diff --git a/frontend/app/block/blockframe-header.tsx b/frontend/app/block/blockframe-header.tsx index a70f323e71..290e6cc8a3 100644 --- a/frontend/app/block/blockframe-header.tsx +++ b/frontend/app/block/blockframe-header.tsx @@ -9,7 +9,7 @@ import { renderHeaderElements, } from "@/app/block/blockutil"; import { ConnectionButton } from "@/app/block/connectionbutton"; -import { DurableSessionFlyover } from "@/app/block/durable-session-flyover"; +import { SessionDaemonIndicator } from "@/app/block/session-daemon-indicator"; import { getBlockBadgeAtom } from "@/app/store/badge"; import { createBlockSplitHorizontally, @@ -224,7 +224,6 @@ const BlockFrame_Header = ({ let viewIconUnion = util.useAtomValueSafe(viewModel?.viewIcon) ?? blockViewToIcon(metaView); const preIconButton = util.useAtomValueSafe(viewModel?.preIconButton); const useTermHeader = util.useAtomValueSafe(viewModel?.useTermHeader); - const termConfigedDurable = util.useAtomValueSafe(viewModel?.termConfigedDurable); const hideViewName = util.useAtomValueSafe(viewModel?.hideViewName); const badge = jotai.useAtomValue(getBlockBadgeAtom(useTermHeader ? nodeModel.blockId : null)); const magnified = jotai.useAtomValue(nodeModel.isMagnified); @@ -271,15 +270,7 @@ const BlockFrame_Header = ({ isTerminalBlock={isTerminalBlock} /> )} - {useTermHeader && termConfigedDurable != null && ( - - )} + {useTermHeader && badge && (
diff --git a/frontend/app/block/session-daemon-hooks.ts b/frontend/app/block/session-daemon-hooks.ts new file mode 100644 index 0000000000..2d2b82c5e3 --- /dev/null +++ b/frontend/app/block/session-daemon-hooks.ts @@ -0,0 +1,234 @@ +// Copyright 2026, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +import { globalStore } from "@/app/store/jotaiStore"; +import { RpcApi } from "@/app/store/wshclientapi"; +import { TabRpcClient } from "@/app/store/wshrpcutil"; +import { useWaveEnv } from "@/app/waveenv/waveenv"; +import { fireAndForget } from "@/util/util"; +import { autoUpdate, flip, offset, shift, useFloating } from "@floating-ui/react"; +import * as jotai from "jotai"; +import type * as React from "react"; +import { Dispatch, SetStateAction, useCallback, useEffect, useMemo, useRef, useState } from "react"; +import { BlockEnv } from "./blockenv"; +import { SessionDisplayData, SessionInfo } from "./session-daemon-types"; + +const EmptySessionDisplayAtom = jotai.atom({ name: null, isanonymous: true }); +const sessionDisplayAtomMap = new Map>(); + +function getSessionDisplayAtom(daemonId: string): jotai.PrimitiveAtom { + let a = sessionDisplayAtomMap.get(daemonId); + if (!a) { + a = jotai.atom({ name: null, isanonymous: true }); + sessionDisplayAtomMap.set(daemonId, a); + } + return a; +} + +export interface SessionDaemonIndicatorState { + daemonId: string; + visible: boolean; + showPopup: boolean; + setShowPopup: Dispatch>; + sessions: SessionInfo[]; + sameConnSessions: SessionInfo[]; + sessionDisplay: SessionDisplayData; + editingId: string; + editName: string; + setEditName: Dispatch>; + creating: boolean; + showCreateInput: boolean; + setShowCreateInput: Dispatch>; + newSessionName: string; + setNewSessionName: Dispatch>; + popupRef: React.RefObject; + iconRef: React.RefObject; + editInputRef: React.RefObject; + createInputRef: React.RefObject; + floatingStyles: React.CSSProperties; + handleAttach: (targetDaemonId: string) => void; + handleStartEdit: (daemonId: string, currentName: string) => void; + handleSaveEdit: () => void; + handleCancelEdit: () => void; + handleCreateAndAttach: (name?: string) => Promise; + handleDelete: (daemonId: string) => void; +} + +export function useSessionDaemonIndicator(blockId: string): SessionDaemonIndicatorState { + const waveEnv = useWaveEnv(); + const daemonId = jotai.useAtomValue(waveEnv.getBlockMetaKeyAtom(blockId, "session:daemonid")); + const connName = jotai.useAtomValue(waveEnv.getBlockMetaKeyAtom(blockId, "connection")); + const [showPopup, setShowPopup] = useState(false); + const [sessions, setSessions] = useState([]); + const [editingId, setEditingId] = useState(null); + const [editName, setEditName] = useState(""); + const [creating, setCreating] = useState(false); + const creatingRef = useRef(false); + const [showCreateInput, setShowCreateInput] = useState(false); + const [newSessionName, setNewSessionName] = useState(""); + const createInputRef = useRef(null); + const editInputRef = useRef(null); + const popupRef = useRef(null); + const iconRef = useRef(null); + const sessionDisplayAtom = daemonId ? getSessionDisplayAtom(daemonId) : EmptySessionDisplayAtom; + const sessionDisplay = jotai.useAtomValue(sessionDisplayAtom); + const isSshConn = connName && !connName.startsWith("local") && !connName.startsWith("wsl://"); + const visible = !!daemonId || isSshConn; + const sameConnSessions = useMemo(() => sessions.filter((s) => s.connection === connName), [sessions, connName]); + const { floatingStyles } = useFloating({ + elements: { + reference: iconRef.current, + floating: popupRef.current, + }, + open: showPopup, + onOpenChange: setShowPopup, + placement: "bottom-end", + middleware: [offset(6), flip(), shift({ padding: 12 })], + whileElementsMounted: autoUpdate, + }); + + useEffect(() => { + if (!showPopup) return; + fireAndForget(async () => { + try { + const list = await RpcApi.SessionListCommand(TabRpcClient, { showall: true }); + setSessions((list ?? []) as SessionInfo[]); + } catch (e) { + console.log("error loading session list:", e); + } + }); + }, [showPopup]); + + useEffect(() => { + if (!daemonId) return; + fireAndForget(async () => { + try { + const info = await RpcApi.SessionInfoCommand(TabRpcClient, { daemonid: daemonId }); + if (info) { + const atom = getSessionDisplayAtom(daemonId); + globalStore.set(atom, { name: info.name || null, isanonymous: info.isanonymous }); + } + } catch (_) {} + }); + }, [daemonId]); + + useEffect(() => { + function handleClick(e: MouseEvent) { + if ( + popupRef.current && + !popupRef.current.contains(e.target as Node) && + iconRef.current && + !iconRef.current.contains(e.target as Node) + ) { + setShowPopup(false); + } + } + if (showPopup) { + document.addEventListener("mousedown", handleClick); + return () => document.removeEventListener("mousedown", handleClick); + } + }, [showPopup]); + + const handleAttach = useCallback((targetDaemonId: string) => { + if (targetDaemonId === daemonId) return; + if (editingId) return; + fireAndForget(async () => { + try { + await RpcApi.SessionAttachCommand(TabRpcClient, { daemonid: targetDaemonId, blockid: blockId, currentdaemonid: daemonId ?? undefined }); + setShowPopup(false); + } catch (e) { + console.log("error switching session:", e); + } + }); + }, [daemonId, editingId, blockId]); + + const handleStartEdit = useCallback((daemonId: string, currentName: string) => { + setEditingId(daemonId); + setEditName(currentName || ""); + setTimeout(() => editInputRef.current?.focus(), 0); + }, []); + + const handleSaveEdit = useCallback(() => { + const id = editingId; + const name = editName.trim(); + if (!id) return; + setEditingId(null); + const atom = getSessionDisplayAtom(id); + globalStore.set(atom, { name: name || null, isanonymous: !name }); + fireAndForget(async () => { + try { + await RpcApi.SessionTagCommand(TabRpcClient, { daemonid: id, name: name || "Unnamed session" }); + const list = await RpcApi.SessionListCommand(TabRpcClient, { showall: true }); + setSessions((list ?? []) as SessionInfo[]); + } catch (e) { + console.log("error renaming session:", e); + } + }); + }, [editingId, editName]); + + const handleCancelEdit = useCallback(() => { + setEditingId(null); + }, []); + + const handleCreateAndAttach = useCallback(async (name?: string) => { + if (!connName || creatingRef.current) return; + creatingRef.current = true; + setCreating(true); + try { + const info = await RpcApi.SessionCreateCommand(TabRpcClient, { connection: connName, name }); + if (info?.daemonid) { + await RpcApi.SessionAttachCommand(TabRpcClient, { + daemonid: info.daemonid, + blockid: blockId, + currentdaemonid: daemonId ?? undefined, + }); + setShowPopup(false); + } + } catch (e) { + console.log("error creating session:", e); + } finally { + creatingRef.current = false; + setCreating(false); + } + }, [connName, blockId, daemonId]); + + const handleDelete = useCallback((daemonId: string) => { + fireAndForget(async () => { + try { + await RpcApi.SessionDeleteCommand(TabRpcClient, { daemonid: daemonId }); + setSessions((prev) => prev.filter((x) => x.daemonid !== daemonId)); + } catch (e) { + console.log("error closing session:", e); + } + }); + }, []); + + return { + daemonId, + visible, + showPopup, + setShowPopup, + sessions, + sameConnSessions, + sessionDisplay, + editingId, + editName, + setEditName, + creating, + showCreateInput, + setShowCreateInput, + newSessionName, + setNewSessionName, + popupRef, + iconRef, + editInputRef, + createInputRef, + floatingStyles, + handleAttach, + handleStartEdit, + handleSaveEdit, + handleCancelEdit, + handleCreateAndAttach, + handleDelete, + }; +} diff --git a/frontend/app/block/session-daemon-indicator.tsx b/frontend/app/block/session-daemon-indicator.tsx new file mode 100644 index 0000000000..d6c868f79c --- /dev/null +++ b/frontend/app/block/session-daemon-indicator.tsx @@ -0,0 +1,42 @@ +// Copyright 2026, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +import { useSessionDaemonIndicator } from "./session-daemon-hooks"; +import { SessionDaemonPopup } from "./session-daemon-popup"; + +interface SessionDaemonIndicatorProps { + blockId: string; + useTermHeader: boolean; +} + +export function SessionDaemonIndicator({ blockId, useTermHeader }: SessionDaemonIndicatorProps) { + const state = useSessionDaemonIndicator(blockId); + + if (!useTermHeader) { + return null; + } + + return ( + <> +
state.setShowPopup((v) => !v)} + style={{ display: state.visible ? "inline-flex" : "none", alignItems: "center", gap: 4 }} + > + + {state.daemonId ? ( + + {state.sessionDisplay.isanonymous ? state.daemonId.slice(0, 8) : (state.sessionDisplay.name || state.daemonId.slice(0, 8))} + + ) : ( + + non session + + )} +
+ + + ); +} diff --git a/frontend/app/block/session-daemon-popup.tsx b/frontend/app/block/session-daemon-popup.tsx new file mode 100644 index 0000000000..80bb889cf9 --- /dev/null +++ b/frontend/app/block/session-daemon-popup.tsx @@ -0,0 +1,79 @@ +// Copyright 2026, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +import { FloatingPortal } from "@floating-ui/react"; +import { SessionDaemonIndicatorState } from "./session-daemon-hooks"; +import { SessionCreateRow, SessionRow } from "./session-daemon-rows"; + +const popupStyle = { + zIndex: 100, + width: "min(420px, calc(100vw - 24px))", + maxHeight: 360, + overflowY: "auto", + background: "color-mix(in srgb, var(--bg-secondary, #1e1e2e) 96%, black)", + border: "1px solid color-mix(in srgb, var(--border-primary, #45475a) 78%, transparent)", + borderRadius: 10, + padding: 8, + boxShadow: "0 18px 42px rgba(0,0,0,0.42), 0 2px 8px rgba(0,0,0,0.28)", +} as const; + +interface SessionDaemonPopupProps { + state: SessionDaemonIndicatorState; +} + +export function SessionDaemonPopup({ state }: SessionDaemonPopupProps) { + if (!state.showPopup) { + return null; + } + return ( + +
e.stopPropagation()} + onFocusCapture={(e) => e.stopPropagation()} + onClick={(e) => e.stopPropagation()} + > +
+
+ + + Sessions + +
+ + {state.sameConnSessions.length} + +
+ + {state.sameConnSessions.length === 0 && ( +
+ No sessions on this connection +
+ )} + {state.sameConnSessions.map((session) => ( + + ))} +
+
+ ); +} diff --git a/frontend/app/block/session-daemon-rows.tsx b/frontend/app/block/session-daemon-rows.tsx new file mode 100644 index 0000000000..8ba52e86ca --- /dev/null +++ b/frontend/app/block/session-daemon-rows.tsx @@ -0,0 +1,286 @@ +// Copyright 2026, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +import { SessionDaemonIndicatorState } from "./session-daemon-hooks"; +import { SessionInfo } from "./session-daemon-types"; + +const truncateStyle = { + minWidth: 0, + overflow: "hidden", + textOverflow: "ellipsis", + whiteSpace: "nowrap", +} as const; + +function formatCreatedTime(ms: number | undefined): string { + if (ms == null) return ""; + const d = new Date(ms); + const now = new Date(); + const diffMs = now.getTime() - d.getTime(); + const diffMin = Math.floor(diffMs / 60000); + if (diffMin < 1) return "just now"; + if (diffMin < 60) return `${diffMin}m ago`; + const diffHr = Math.floor(diffMin / 60); + if (diffHr < 24) return `${diffHr}h ago`; + const diffDay = Math.floor(diffHr / 24); + if (diffDay < 7) return `${diffDay}d ago`; + return d.toLocaleDateString(undefined, { month: "short", day: "numeric", year: "numeric" }); +} + +function SessionStatusPill({ status }: { status: string }) { + const isRunning = status === "running"; + return ( + + + {status || "unknown"} + + ); +} + +interface SessionCreateRowProps { + state: SessionDaemonIndicatorState; +} + +export function SessionCreateRow({ state }: SessionCreateRowProps) { + if (state.showCreateInput) { + return ( +
+ + state.setNewSessionName(e.target.value)} + onKeyDown={(e) => { + if (e.key === "Enter") { + const name = state.newSessionName.trim(); + state.handleCreateAndAttach(name || undefined); + state.setShowCreateInput(false); + state.setNewSessionName(""); + } + if (e.key === "Escape") { + state.setShowCreateInput(false); + state.setNewSessionName(""); + } + }} + placeholder="Session name (optional)" + style={{ + flex: 1, + background: "transparent", + border: "none", + outline: "none", + color: "#7dd3fc", + fontSize: 13, + fontWeight: 600, + }} + /> +
+ ); + } + return ( +
{ + state.setShowCreateInput(true); + setTimeout(() => state.createInputRef.current?.focus(), 0); + }} + style={{ + display: "flex", + alignItems: "center", + gap: 8, + padding: "8px 10px", + marginBottom: 4, + cursor: state.creating ? "default" : "pointer", + borderRadius: 8, + background: "rgba(56, 189, 248, 0.08)", + border: "1px solid rgba(56, 189, 248, 0.18)", + opacity: state.creating ? 0.5 : 1, + }} + > + + + {state.creating ? "Creating..." : "Create new session"} + +
+ ); +} + +interface SessionRowProps { + session: SessionInfo; + state: SessionDaemonIndicatorState; +} + +export function SessionRow({ session, state }: SessionRowProps) { + const isActive = session.daemonid === state.daemonId; + const blockCount = session.blocks?.length ?? 0; + const canClose = blockCount === 0 || session.status === "done"; + const displayStatus = session.status === "done" ? "done" : blockCount === 0 ? "idle" : session.status; + return ( +
state.handleAttach(session.daemonid)} + title={`${session.name || session.connection} · ${session.status}`} + style={{ + display: "grid", + gridTemplateColumns: "minmax(0, 1fr) auto", + gap: 10, + padding: "9px 10px", + marginTop: 4, + cursor: isActive ? "default" : "pointer", + borderRadius: 8, + fontSize: 13, + background: isActive ? "rgba(56, 189, 248, 0.12)" : "transparent", + border: isActive ? "1px solid rgba(56, 189, 248, 0.24)" : "1px solid transparent", + }} + onMouseEnter={(e) => { + if (!isActive) { + e.currentTarget.style.background = "rgba(148, 163, 184, 0.08)"; + } + }} + onMouseLeave={(e) => { + if (!isActive) { + e.currentTarget.style.background = "transparent"; + } + }} + > +
+ + + +
+ {state.editingId === session.daemonid ? ( + state.setEditName(e.target.value)} + onKeyDown={(e) => { + if (e.key === "Enter") state.handleSaveEdit(); + if (e.key === "Escape") state.handleCancelEdit(); + }} + onBlur={state.handleSaveEdit} + onClick={(e) => e.stopPropagation()} + style={{ + width: "100%", + fontWeight: 650, + color: "var(--text-primary)", + fontSize: 14, + lineHeight: "20px", + background: "rgba(148, 163, 184, 0.12)", + border: "1px solid rgba(56, 189, 248, 0.3)", + borderRadius: 4, + padding: "1px 6px", + outline: "none", + }} + /> + ) : ( +
{ + e.stopPropagation(); + state.handleStartEdit(session.daemonid, session.name); + }} + style={{ display: "flex", alignItems: "center", gap: 6, cursor: "text" }} + title="Click to rename" + > + + {session.name || "Unnamed session"} + + +
+ )} + {session.connection && ( +
+ {session.connection} +
+ )} +
+ Sess: {session.daemonid.slice(0, 8)} +
+ {session.jobid && ( +
+ Job: {session.jobid.slice(0, 8)} +
+ )} +
+
+
+ + + {formatCreatedTime(session.createdat)} + + {canClose ? ( + { + e.stopPropagation(); + state.handleDelete(session.daemonid); + }} + style={{ + fontSize: 11, + color: "var(--text-muted)", + cursor: "pointer", + opacity: 0.6, + display: "inline-flex", + alignItems: "center", + gap: 3, + }} + title="Close session" + > + + Close + + ) : ( + + {isActive ? "active" : `${blockCount} block${blockCount === 1 ? "" : "s"}`} + + )} +
+
+ ); +} diff --git a/frontend/app/block/session-daemon-types.ts b/frontend/app/block/session-daemon-types.ts new file mode 100644 index 0000000000..df40712215 --- /dev/null +++ b/frontend/app/block/session-daemon-types.ts @@ -0,0 +1,19 @@ +// Copyright 2026, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +export interface SessionDisplayData { + name: string | null; + isanonymous: boolean; +} + +export interface SessionInfo { + daemonid: string; + name: string; + connection: string; + status: string; + isanonymous: boolean; + createdat?: number; + blocks?: string[]; + jobid?: string; + lastactiveat?: number; +} diff --git a/frontend/app/modals/confirmmodal.tsx b/frontend/app/modals/confirmmodal.tsx new file mode 100644 index 0000000000..158beffdb3 --- /dev/null +++ b/frontend/app/modals/confirmmodal.tsx @@ -0,0 +1,25 @@ +// Copyright 2025, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +import { Modal } from "@/app/modals/modal"; +import { modalsModel } from "@/app/store/modalmodel"; + +const ConfirmModal = ({ message, onOk }: { message: string; onOk?: () => void }) => { + const handleOk = () => { + modalsModel.popModal(); + if (onOk) onOk(); + }; + const handleClose = () => { + modalsModel.popModal(); + }; + + return ( + +
{message}
+
+ ); +}; + +ConfirmModal.displayName = "ConfirmModal"; + +export { ConfirmModal }; diff --git a/frontend/app/modals/modalregistry.tsx b/frontend/app/modals/modalregistry.tsx index 88d19e732c..ec583b4cbe 100644 --- a/frontend/app/modals/modalregistry.tsx +++ b/frontend/app/modals/modalregistry.tsx @@ -8,6 +8,7 @@ import { UpgradeOnboardingPatch } from "@/app/onboarding/onboarding-upgrade-patc import { DeleteFileModal, PublishAppModal, RenameFileModal } from "@/builder/builder-apppanel"; import { SetSecretDialog } from "@/builder/tabs/builder-secrettab"; import { AboutModal } from "./about"; +import { ConfirmModal } from "./confirmmodal"; import { UserInputModal } from "./userinputmodal"; const modalRegistry: { [key: string]: React.ComponentType } = { @@ -17,6 +18,7 @@ const modalRegistry: { [key: string]: React.ComponentType } = { [UserInputModal.displayName || "UserInputModal"]: UserInputModal, [AboutModal.displayName || "AboutModal"]: AboutModal, [MessageModal.displayName || "MessageModal"]: MessageModal, + [ConfirmModal.displayName || "ConfirmModal"]: ConfirmModal, [PublishAppModal.displayName || "PublishAppModal"]: PublishAppModal, [RenameFileModal.displayName || "RenameFileModal"]: RenameFileModal, [DeleteFileModal.displayName || "DeleteFileModal"]: DeleteFileModal, diff --git a/frontend/app/store/focusManager.ts b/frontend/app/store/focusManager.ts index 58f78951ee..cc00c0de4d 100644 --- a/frontend/app/store/focusManager.ts +++ b/frontend/app/store/focusManager.ts @@ -3,8 +3,11 @@ import { waveAIHasFocusWithin } from "@/app/aipanel/waveai-focus-utils"; import { WaveAIModel } from "@/app/aipanel/waveai-model"; -import { getBlockComponentModel } from "@/app/store/global"; +import { getBlockComponentModel, getBlockMetaKeyAtom } from "@/app/store/global"; import { globalStore } from "@/app/store/jotaiStore"; +import { RpcApi } from "@/app/store/wshclientapi"; +import { TabRpcClient } from "@/app/store/wshrpcutil"; +import { fireAndForget } from "@/util/util"; import { getLayoutModelForStaticTab } from "@/layout/index"; import { focusedBlockId } from "@/util/focusutil"; import { Atom, atom, type PrimitiveAtom } from "jotai"; @@ -26,6 +29,22 @@ export class FocusManager { const lnode = get(layoutModel.focusedNode); return lnode?.data?.blockId; }); + + let prevBlockId: string | null = null; + globalStore.sub(this.blockFocusAtom, () => { + const blockId = globalStore.get(this.blockFocusAtom); + if (blockId && blockId !== prevBlockId) { + prevBlockId = blockId; + try { + const daemonId = globalStore.get(getBlockMetaKeyAtom(blockId, "session:daemonid")); + if (daemonId) { + fireAndForget(() => RpcApi.RecordSessionActivityCommand(TabRpcClient, { daemonid: daemonId })); + } + } catch (_) {} + } else if (!blockId) { + prevBlockId = null; + } + }); } static getInstance(): FocusManager { diff --git a/frontend/app/store/services.ts b/frontend/app/store/services.ts index 9e6e156bc3..d4898c285e 100644 --- a/frontend/app/store/services.ts +++ b/frontend/app/store/services.ts @@ -30,8 +30,8 @@ export class BlockServiceType { return callBackendService(this?.waveEnv, "block", "GetControllerStatus", Array.from(arguments)) } - // save the terminal state to a blockfile - SaveTerminalState(blockId: string, state: string, stateType: string, ptyOffset: number, termSize: TermSize): Promise { + // save the terminal state to a zone file + SaveTerminalState(zoneId: string, state: string, stateType: string, ptyOffset: number, termSize: TermSize): Promise { return callBackendService(this?.waveEnv, "block", "SaveTerminalState", Array.from(arguments)) } } diff --git a/frontend/app/store/wshclientapi.ts b/frontend/app/store/wshclientapi.ts index 8482be260d..5155a22652 100644 --- a/frontend/app/store/wshclientapi.ts +++ b/frontend/app/store/wshclientapi.ts @@ -672,6 +672,12 @@ export class RpcApiType { return client.wshRpcCall("readappfile", data, opts); } + // command "recordsessionactivity" [call] + RecordSessionActivityCommand(client: WshClient, data: CommandRecordSessionActivityData, opts?: RpcOpts): Promise { + if (this.mockClient) return this.mockClient.mockWshRpcCall(client, "recordsessionactivity", data, opts); + return client.wshRpcCall("recordsessionactivity", data, opts); + } + // command "recordtevent" [call] RecordTEventCommand(client: WshClient, data: TEvent, opts?: RpcOpts): Promise { if (this.mockClient) return this.mockClient.mockWshRpcCall(client, "recordtevent", data, opts); @@ -834,6 +840,48 @@ export class RpcApiType { return client.wshRpcCall("sendtelemetry", null, opts); } + // command "sessionattach" [call] + SessionAttachCommand(client: WshClient, data: CommandSessionAttachData, opts?: RpcOpts): Promise { + if (this.mockClient) return this.mockClient.mockWshRpcCall(client, "sessionattach", data, opts); + return client.wshRpcCall("sessionattach", data, opts); + } + + // command "sessioncreate" [call] + SessionCreateCommand(client: WshClient, data: CommandSessionCreateData, opts?: RpcOpts): Promise { + if (this.mockClient) return this.mockClient.mockWshRpcCall(client, "sessioncreate", data, opts); + return client.wshRpcCall("sessioncreate", data, opts); + } + + // command "sessiondelete" [call] + SessionDeleteCommand(client: WshClient, data: CommandSessionDeleteData, opts?: RpcOpts): Promise { + if (this.mockClient) return this.mockClient.mockWshRpcCall(client, "sessiondelete", data, opts); + return client.wshRpcCall("sessiondelete", data, opts); + } + + // command "sessiondetach" [call] + SessionDetachCommand(client: WshClient, data: CommandSessionDetachData, opts?: RpcOpts): Promise { + if (this.mockClient) return this.mockClient.mockWshRpcCall(client, "sessiondetach", data, opts); + return client.wshRpcCall("sessiondetach", data, opts); + } + + // command "sessioninfo" [call] + SessionInfoCommand(client: WshClient, data: CommandSessionInfoData, opts?: RpcOpts): Promise { + if (this.mockClient) return this.mockClient.mockWshRpcCall(client, "sessioninfo", data, opts); + return client.wshRpcCall("sessioninfo", data, opts); + } + + // command "sessionlist" [call] + SessionListCommand(client: WshClient, data: CommandSessionListData, opts?: RpcOpts): Promise { + if (this.mockClient) return this.mockClient.mockWshRpcCall(client, "sessionlist", data, opts); + return client.wshRpcCall("sessionlist", data, opts); + } + + // command "sessiontag" [call] + SessionTagCommand(client: WshClient, data: CommandSessionTagData, opts?: RpcOpts): Promise { + if (this.mockClient) return this.mockClient.mockWshRpcCall(client, "sessiontag", data, opts); + return client.wshRpcCall("sessiontag", data, opts); + } + // command "setblockfocus" [call] SetBlockFocusCommand(client: WshClient, data: string, opts?: RpcOpts): Promise { if (this.mockClient) return this.mockClient.mockWshRpcCall(client, "setblockfocus", data, opts); diff --git a/frontend/app/view/term/term-model.ts b/frontend/app/view/term/term-model.ts index a256929e7d..4999628f54 100644 --- a/frontend/app/view/term/term-model.ts +++ b/frontend/app/view/term/term-model.ts @@ -766,7 +766,7 @@ export class TermViewModel implements ViewModel { return false; } const shellProcStatus = globalStore.get(this.shellProcStatus); - if ((shellProcStatus == "done" || shellProcStatus == "init") && keyutil.checkKeyPressed(waveEvent, "Enter")) { + if (shellProcStatus == "done" && keyutil.checkKeyPressed(waveEvent, "Enter")) { fireAndForget(() => this.forceRestartController()); return false; } @@ -1353,6 +1353,37 @@ export class TermViewModel implements ViewModel { }); } + const sessionDaemonId = blockData?.meta?.["session:daemonid"]; + if (sessionDaemonId) { + advancedSubmenu.push({ type: "separator" }); + advancedSubmenu.push({ + label: "Session Info", + click: () => { + fireAndForget(async () => { + try { + const info = await RpcApi.SessionInfoCommand(TabRpcClient, { daemonid: sessionDaemonId }); + const msg = `Session Daemon: ${info.name || "(unnamed)"}\nID: ${info.daemonid}\nStatus: ${info.status}\nConnection: ${info.connection || "N/A"}\nBlocks: ${(info.blocks || []).length}`; + modalsModel.pushModal("MessageModal", { children: msg }); + } catch (e) { + modalsModel.pushModal("MessageModal", { children: `Error: ${e?.message || e}` }); + } + }); + }, + }); + advancedSubmenu.push({ + label: "Detach from Session", + click: () => { + fireAndForget(async () => { + try { + await RpcApi.SessionDetachCommand(TabRpcClient, { daemonid: sessionDaemonId, blockid: this.blockId }); + } catch (e) { + modalsModel.pushModal("MessageModal", { children: `Error: ${e?.message || e}` }); + } + }); + }, + }); + } + fullMenu.push({ label: "Advanced", submenu: advancedSubmenu, diff --git a/frontend/app/view/term/term.tsx b/frontend/app/view/term/term.tsx index a9dcd17bf0..fcaf8f5021 100644 --- a/frontend/app/view/term/term.tsx +++ b/frontend/app/view/term/term.tsx @@ -51,15 +51,19 @@ const TermResyncHandler = React.memo(({ blockId, model }: TerminalViewProps) => React.useEffect(() => { if (!model.termRef.current?.hasResized) { + console.log("[TermResyncHandler] hasResized=false, skipping resync", blockId); + setLastConnStatus(connStatus); return; } const isConnected = connStatus?.status == "connected"; const wasConnected = lastConnStatus?.status == "connected"; const curConnName = connStatus?.connection; const lastConnName = lastConnStatus?.connection; + console.log("[TermResyncHandler] check", blockId, "cur:", connStatus?.status, "last:", lastConnStatus?.status, "conn:", curConnName); if (isConnected == wasConnected && curConnName == lastConnName) { return; } + console.log("[TermResyncHandler] triggering resync", blockId); model.termRef.current?.resyncController("resync handler"); setLastConnStatus(connStatus); }, [connStatus]); @@ -354,6 +358,52 @@ const TerminalView = ({ blockId, model }: ViewComponentProps) => termModeRef.current = termMode; }, [termMode]); + React.useEffect(() => { + const termWrap = model.termRef.current; + const daemonId = blockData?.meta?.["session:daemonid"]; + if (termWrap == null) { + return; + } + if (!daemonId) { + fireAndForget(termWrap.detachFromDaemon.bind(termWrap)); + return undefined; + } + let cancelled = false; + let retryTimer: ReturnType | null = null; + const tryAttach = async (retry: number) => { + if (cancelled) return; + try { + const info = await RpcApi.SessionInfoCommand(TabRpcClient, { daemonid: daemonId }); + if (cancelled) return; + if (!info.jobid) { + // If the daemon is still initializing (job not started yet), retry. + // This handles the race where SessionAttach sends the WaveObj update + // before the job is started by the resync controller (~20ms window). + if (info.status === "init" && retry < 15) { + retryTimer = setTimeout(() => tryAttach(retry + 1), 200); + return; + } + return; + } + if (termWrap.zoneId === info.jobid) { + return; + } + await termWrap.attachToDaemon(info.jobid); + } catch (e) { + if (!cancelled) { + console.log("error attaching terminal to session daemon", daemonId, e); + } + } + }; + fireAndForget(() => tryAttach(0)); + return () => { + cancelled = true; + if (retryTimer != null) { + clearTimeout(retryTimer); + } + }; + }, [blockData?.meta?.["session:daemonid"], blockData?.jobid, termWrapInst]); + React.useEffect(() => { if (isMI && isBasicTerm && isFocused && model.termRef.current != null) { model.termRef.current.multiInputCallback = (data: string) => { diff --git a/frontend/app/view/term/termwrap.ts b/frontend/app/view/term/termwrap.ts index 4840b5d914..54be322987 100644 --- a/frontend/app/view/term/termwrap.ts +++ b/frontend/app/view/term/termwrap.ts @@ -24,6 +24,7 @@ import { SearchAddon } from "@xterm/addon-search"; import { SerializeAddon } from "@xterm/addon-serialize"; import { WebLinksAddon } from "@xterm/addon-web-links"; import { WebglAddon } from "@xterm/addon-webgl"; +import { Subscription } from "rxjs"; import * as TermTypes from "@xterm/xterm"; import { Terminal } from "@xterm/xterm"; import debug from "debug"; @@ -52,6 +53,13 @@ const TermCacheFileName = "cache:term:full"; const MinDataProcessedForCache = 100 * 1024; export const SupportsImageInput = true; const MaxRepaintTransactionMs = 2000; +const AltScreenEnterSeq = "\x1b[?1049h"; +const AltScreenExitSeq = "\x1b[?1049l"; +const AppCursorKeysEnterSeq = "\x1b[?1h"; +const AppCursorKeysExitSeq = "\x1b[?1l"; +const CursorShowSeq = "\x1b[?25h"; +const ClearScreenSeq = "\x1b[2J"; +const HomeAndClearScreenSeq = "\x1b[H\x1b[2J"; // detect webgl support function detectWebGLSupport(): boolean { @@ -74,9 +82,109 @@ type TermWrapOptions = { nodeModel?: BlockNodeModel; }; +// Some remote full-screen programs clear the main screen without emitting +// smcup/rmcup. Keep that temporary drawing in xterm's alternate buffer. +class SyntheticAltScreenTracker { + pendingEnter: boolean = false; + active: boolean = false; + pendingExit: boolean = false; + + process(data: string): string | null { + let changed = false; + let rtn = ""; + + for (let idx = 0; idx < data.length; ) { + const handledSeq = this.matchAndAppendSeq(data, idx); + if (handledSeq != null) { + rtn += handledSeq.data; + idx += handledSeq.seqLen; + changed ||= handledSeq.changed; + continue; + } + rtn += data[idx]; + idx++; + } + + const exitSeq = this.flushPendingExit(); + if (exitSeq != null) { + rtn += exitSeq; + changed = true; + } + return changed ? rtn : null; + } + + matchAndAppendSeq(data: string, idx: number): { data: string; seqLen: number; changed: boolean } | null { + if (data.startsWith(AltScreenEnterSeq, idx)) { + this.reset(); + return { data: AltScreenEnterSeq, seqLen: AltScreenEnterSeq.length, changed: false }; + } + if (data.startsWith(AltScreenExitSeq, idx)) { + this.reset(); + return { data: AltScreenExitSeq, seqLen: AltScreenExitSeq.length, changed: false }; + } + if (data.startsWith(AppCursorKeysEnterSeq, idx)) { + if (!this.active) { + this.pendingEnter = true; + } + return { data: AppCursorKeysEnterSeq, seqLen: AppCursorKeysEnterSeq.length, changed: false }; + } + if (data.startsWith(AppCursorKeysExitSeq, idx)) { + let changed = false; + if (this.active) { + this.pendingExit = true; + changed = true; + } + this.pendingEnter = false; + return { data: AppCursorKeysExitSeq, seqLen: AppCursorKeysExitSeq.length, changed }; + } + if (data.startsWith(CursorShowSeq, idx)) { + const exitSeq = this.flushPendingExit(); + return { + data: exitSeq == null ? CursorShowSeq : CursorShowSeq + exitSeq, + seqLen: CursorShowSeq.length, + changed: exitSeq != null, + }; + } + if (data.startsWith(HomeAndClearScreenSeq, idx)) { + return this.maybeEnterAltScreen(HomeAndClearScreenSeq); + } + if (data.startsWith(ClearScreenSeq, idx)) { + return this.maybeEnterAltScreen(ClearScreenSeq); + } + return null; + } + + maybeEnterAltScreen(seq: string): { data: string; seqLen: number; changed: boolean } { + if (!this.pendingEnter || this.active) { + return { data: seq, seqLen: seq.length, changed: false }; + } + this.active = true; + this.pendingEnter = false; + this.pendingExit = false; + return { data: AltScreenEnterSeq + seq, seqLen: seq.length, changed: true }; + } + + flushPendingExit(): string | null { + if (!this.pendingExit) { + return null; + } + this.active = false; + this.pendingExit = false; + return AltScreenExitSeq; + } + + reset() { + this.pendingEnter = false; + this.active = false; + this.pendingExit = false; + } +} + export class TermWrap { tabId: string; blockId: string; + zoneId: string; + zoneLoadVersion: number; ptyOffset: number; dataBytesProcessed: number; terminal: Terminal; @@ -85,6 +193,7 @@ export class TermWrap { searchAddon: SearchAddon; serializeAddon: SerializeAddon; mainFileSubject: SubjectWithRef; + _mainFileSub: Subscription | null = null; loaded: boolean; heldData: Uint8Array[]; handleResize_debounced: () => void; @@ -121,6 +230,7 @@ export class TermWrap { lastMode2026ResetTs: number = 0; inSyncTransaction: boolean = false; inRepaintTransaction: boolean = false; + syntheticAltScreenTracker: SyntheticAltScreenTracker = new SyntheticAltScreenTracker(); constructor( tabId: string, @@ -132,6 +242,8 @@ export class TermWrap { this.loaded = false; this.tabId = tabId; this.blockId = blockId; + this.zoneId = blockId; + this.zoneLoadVersion = 0; this.sendDataHandler = waveOptions.sendDataHandler; this.nodeModel = waveOptions.nodeModel; this.ptyOffset = 0; @@ -325,7 +437,36 @@ export class TermWrap { } getZoneId(): string { - return this.blockId; + return this.zoneId; + } + + async switchZone(zoneId: string): Promise { + if (!zoneId || this.zoneId === zoneId) { + return; + } + this._mainFileSub?.unsubscribe(); + this._mainFileSub = null; + this.mainFileSubject?.release(); + + this.zoneId = zoneId; + this.zoneLoadVersion++; + this.ptyOffset = 0; + this.dataBytesProcessed = 0; + this.heldData = []; + this.syntheticAltScreenTracker.reset(); + this.terminal.clear(); + this.mainFileSubject = getFileSubject(this.getZoneId(), TermFileName); + this._mainFileSub = this.mainFileSubject.subscribe(this.handleNewFileSubjectData.bind(this)); + await this.loadInitialTerminalData(); + this.terminal.scrollToBottom(); + } + + async attachToDaemon(jobId: string): Promise { + await this.switchZone(jobId); + } + + async detachFromDaemon(): Promise { + await this.switchZone(this.blockId); } setCursorStyle(cursorStyle: string) { @@ -409,7 +550,8 @@ export class TermWrap { } this.mainFileSubject = getFileSubject(this.getZoneId(), TermFileName); - this.mainFileSubject.subscribe(this.handleNewFileSubjectData.bind(this)); + this._mainFileSub = this.mainFileSubject.subscribe(this.handleNewFileSubjectData.bind(this)); + console.log("[termwrap] initTerminal: zoneId=", this.getZoneId(), "blockId=", this.blockId); try { const rtInfo = await RpcApi.GetRTInfoCommand(TabRpcClient, { @@ -493,6 +635,8 @@ export class TermWrap { } doTerminalWrite(data: string | Uint8Array, setPtyOffset?: number): Promise { + const rawDataLen = data.length; + const writeData = setPtyOffset == null ? this.maybeAddSyntheticAltScreen(data) : data; if (isDev() && this.loaded) { const dataStr = data instanceof Uint8Array ? new TextDecoder().decode(data) : data; this.recentWrites.push({ idx: this.recentWritesCounter++, ts: Date.now(), data: dataStr }); @@ -504,12 +648,12 @@ export class TermWrap { const prtn = new Promise((presolve, _) => { resolve = presolve; }); - this.terminal.write(data, () => { + this.terminal.write(writeData, () => { if (setPtyOffset != null) { this.ptyOffset = setPtyOffset; } else { - this.ptyOffset += data.length; - this.dataBytesProcessed += data.length; + this.ptyOffset += rawDataLen; + this.dataBytesProcessed += rawDataLen; } this.lastUpdated = Date.now(); resolve(); @@ -517,11 +661,27 @@ export class TermWrap { return prtn; } + maybeAddSyntheticAltScreen(data: string | Uint8Array): string | Uint8Array { + const dataStr = data instanceof Uint8Array ? new TextDecoder().decode(data) : data; + const syntheticData = this.syntheticAltScreenTracker.process(dataStr); + if (syntheticData == null) { + return data; + } + if (data instanceof Uint8Array) { + return new TextEncoder().encode(syntheticData); + } + return syntheticData; + } + async loadInitialTerminalData(): Promise { const startTs = Date.now(); const zoneId = this.getZoneId(); + const zoneLoadVersion = this.zoneLoadVersion; const { data: cacheData, fileInfo: cacheFile } = await fetchWaveFile(zoneId, TermCacheFileName); let ptyOffset = 0; + if (zoneId !== this.getZoneId() || zoneLoadVersion !== this.zoneLoadVersion) { + return; + } if (cacheFile != null) { ptyOffset = cacheFile.meta["ptyoffset"] ?? 0; if (cacheData.byteLength > 0) { @@ -543,6 +703,9 @@ export class TermWrap { } } const { data: mainData, fileInfo: mainFile } = await fetchWaveFile(zoneId, TermFileName, ptyOffset); + if (zoneId !== this.getZoneId() || zoneLoadVersion !== this.zoneLoadVersion) { + return; + } console.log( `terminal loaded cachefile:${cacheData?.byteLength ?? 0} main:${mainData?.byteLength ?? 0} bytes, ${Date.now() - startTs}ms` ); @@ -552,7 +715,7 @@ export class TermWrap { } async resyncController(reason: string) { - dlog("resync controller", this.blockId, reason); + console.log("[termwrap] resync controller", this.blockId, reason); const rtOpts: RuntimeOpts = { termsize: { rows: this.terminal.rows, cols: this.terminal.cols } }; try { await RpcApi.ControllerResyncCommand(TabRpcClient, { @@ -594,7 +757,7 @@ export class TermWrap { const termSize: TermSize = { rows: this.terminal.rows, cols: this.terminal.cols }; console.log("idle timeout term", this.dataBytesProcessed, serializedOutput.length, termSize); fireAndForget(() => - services.BlockService.SaveTerminalState(this.blockId, serializedOutput, "full", this.ptyOffset, termSize) + services.BlockService.SaveTerminalState(this.getZoneId(), serializedOutput, "full", this.ptyOffset, termSize) ); this.dataBytesProcessed = 0; } diff --git a/frontend/types/gotypes.d.ts b/frontend/types/gotypes.d.ts index bb07f466b0..dd7b12b9ce 100644 --- a/frontend/types/gotypes.d.ts +++ b/frontend/types/gotypes.d.ts @@ -528,6 +528,11 @@ declare global { modts?: number; }; + // wshrpc.CommandRecordSessionActivityData + type CommandRecordSessionActivityData = { + daemonid: string; + }; + // wshrpc.CommandRemoteDisconnectFromJobManagerData type CommandRemoteDisconnectFromJobManagerData = { jobid: string; @@ -582,6 +587,7 @@ declare global { mainserverjwttoken: string; jobmanagerpid: number; jobmanagerstartts: number; + remoteidletimeoutseconds?: number; }; // wshrpc.CommandRemoteReconnectToJobManagerRtnData @@ -603,6 +609,7 @@ declare global { mainserverjwttoken: string; clientid: string; publickeybase64: string; + remoteidletimeoutseconds?: number; }; // wshrpc.CommandRemoteTerminateJobManagerData @@ -635,6 +642,47 @@ declare global { builderid: string; }; + // wshrpc.CommandSessionAttachData + type CommandSessionAttachData = { + daemonid: string; + blockid: string; + currentdaemonid?: string; + }; + + // wshrpc.CommandSessionCreateData + type CommandSessionCreateData = { + name?: string; + connection?: string; + idletimeout?: number; + }; + + // wshrpc.CommandSessionDeleteData + type CommandSessionDeleteData = { + daemonid: string; + }; + + // wshrpc.CommandSessionDetachData + type CommandSessionDetachData = { + daemonid: string; + blockid?: string; + }; + + // wshrpc.CommandSessionInfoData + type CommandSessionInfoData = { + daemonid: string; + }; + + // wshrpc.CommandSessionListData + type CommandSessionListData = { + showall?: boolean; + }; + + // wshrpc.CommandSessionTagData + type CommandSessionTagData = { + daemonid: string; + name: string; + }; + // wshrpc.CommandSetMetaData type CommandSetMetaData = { oref: ORef; @@ -1133,6 +1181,7 @@ declare global { "cmd:initscript.zsh"?: string; "cmd:initscript.pwsh"?: string; "cmd:initscript.fish"?: string; + "session:daemonid"?: string; "ai:*"?: boolean; "ai:preset"?: string; "ai:apitype"?: string; @@ -1374,6 +1423,36 @@ declare global { optional: boolean; }; + // waveobj.SessionDaemon + type SessionDaemon = WaveObj & { + name?: string; + connection?: string; + jobid?: string; + isanonymous?: boolean; + status?: string; + cwd?: string; + createdat?: number; + idletimeout?: number; + idlesince?: number; + lastactiveat?: number; + }; + + // wshrpc.SessionInfoRtnData + type SessionInfoRtnData = { + daemonid: string; + name: string; + connection: string; + jobid?: string; + isanonymous: boolean; + status: string; + cwd?: string; + createdat: number; + idletimeout: number; + idlesince?: number; + lastactiveat?: number; + blocks?: string[]; + }; + // wconfig.SettingsType type SettingsType = { "app:*"?: boolean; diff --git a/openspec/changes/session-daemon/.openspec.yaml b/openspec/changes/session-daemon/.openspec.yaml new file mode 100644 index 0000000000..e8d4ccfe90 --- /dev/null +++ b/openspec/changes/session-daemon/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-06-08 diff --git a/openspec/changes/session-daemon/design.md b/openspec/changes/session-daemon/design.md new file mode 100644 index 0000000000..c8fa40df41 --- /dev/null +++ b/openspec/changes/session-daemon/design.md @@ -0,0 +1,83 @@ +## Context + +当前 Wave 使用 `DurableShellController` + `JobController` 管理远端 SSH session,采用 1 block ↔ 1 job 的 1:1 模型。不支持多 block 共享会话、会话命名、空闲超时回收等能力。 + +已有设计文档 `docs/design/session-daemon-design-v2.md` 包含完整架构图和数据流。 + +### 现有架构 + +``` +Block → DurableShellController → JobController → Remote JobManager + 1:1 函数库 每个 job 一个 +``` + +### 目标架构 + +``` +Block → SessionDaemonController → SessionDaemon → JobController → Remote + N:1 桥接 命名/超时/attach 函数库 (不动) +``` + +## Goals / Non-Goals + +**Goals:** +- SSH block 启动时自动创建匿名 daemon(`IdleTimeout=1h`),行为与当前一致 +- 用户可通过 `wsh session create` 创建命名 daemon(`IdleTimeout=24h`) +- 多个 block 可 attach 到同一 daemon,共享输出、各自可输入 +- daemon 空闲超时自动回收 +- 网络重连后的 TerminateOnReconnect 机制保持不变 +- 前端显示 daemon 名称和状态 + +**Non-Goals:** +- 本地/WSL block 不受影响(继续用 ShellController) +- runOutputLoop 不搬(留在 JobController 内部) +- SessionDaemon 不做进程管理,只做 session 管理 + +## Decisions + +### 1. runOutputLoop 保持原位(vs 迁入 SessionDaemon) + +详见 `docs/design/session-daemon-design-v2.md#44-runoutputloop-保持原位与-v1-的关键差异`。 + +理由:JobController 的 `currentStreamId != streamId` 自毁机制已能处理重连流切换,无需 SessionDaemon 介入。不破坏 `StartJob()` 的现有返回值契约。 + +### 2. 匿名 daemon vs 命名 daemon 区分 + +匿名 daemon: +- SSH block 启动时自动创建,`Name=""`, `IsAnonymous=true` +- `IdleTimeout=1h` +- 用户无感知,不能 attach 其他 block(除非先命名) +- 可通过 `wsh session tag sd-xxx --name dev` 转为命名 + +命名 daemon: +- 通过 `wsh session create --name dev` 创建 +- `IsAnonymous=false`, `IdleTimeout=24h` +- 可被多个 block attach + +### 3. DurableShellController 完全移除 + +SessionDaemon + 匿名 daemon 覆盖了 DurableShellController 的全部能力(持久化、自动重连),同时新增多 block attach 和空闲超时。 + +### 4. ControllerResync 调度 + +``` +if block.Meta["session:daemonid"] != "": + → SessionDaemonController +else if connType == SSH: + → 创建匿名 daemon → Block.Meta 写入 daemonId → 下一轮进入 SessionDaemonController +else: + → ShellController +``` + +### 5. 输出流共享 + +所有 attached block 读同一份 `job:jobId/term`。现有 WPS `scope=job:{jobId}` 发布机制已支持多订阅者。前端 TermWrap 在 attach/detach 时切换 zoneId。 + +### 6. 输入汇聚 + +所有 attached block 的输入使用同一个 `InputSessionId`,远端 QuickReorderQueue 按 sessionId 排序去重。 + +## Risks / Trade-offs + +- **远端 jobmanager 无心跳超时**:如果网络永远不恢复且 shell 进程不退出,jobmanager 会一直存在。可接受——1h 内无 block attach 则本地 daemon 标记为 done,但远端进程不受影响。远端侧可后续加 `wsh session prune` 命令手动清理。 +- **迁移不可逆**:从 DurableShellController 迁移后,回退到旧版本无法识别 `session:daemonid`。建议迁移前备份 DB。 diff --git a/openspec/changes/session-daemon/proposal.md b/openspec/changes/session-daemon/proposal.md new file mode 100644 index 0000000000..7ef3c66c56 --- /dev/null +++ b/openspec/changes/session-daemon/proposal.md @@ -0,0 +1,37 @@ +## Why + +当前 Wave 的远端 SSH session 模型是 "一个 block 对应一个远程 job" 的 1:1 架构。无法实现多个 block 共享同一个远端会话、会话跨重启持久保持、以及会话有名称可管理。Session Daemon 将**远端连接**与**block 视图**解耦,允许用户创建命名的持久 session,多个 block 可以 attach/detach 到同一个 session。 + +## What Changes + +- **SessionDaemon** — 新增持久化实体(DB 记录),每个 SSH block 启动时自动创建匿名 daemon,用户也可通过 `wsh session create` 创建命名 daemon +- **SessionDaemonController** — 新增 Controller 类型,桥接到 daemon,不管理进程。取代现有 `DurableShellController` +- **DurableShellController** — 移除,功能由 SessionDaemon 覆盖 +- **空闲超时** — 匿名 daemon 默认 1h 超时回收,命名 daemon 默认 24h +- **wsh 命令** — 新增 `wsh session create/delete/list/attach/detach/info` 一组 CLI 命令 +- **前端** — block header 显示 daemon 名称和状态(`dev ●`),支持 attach/detach 操作 +- TermWrap 支持动态切换数据源 zoneId(block ↔ job) + +## Capabilities + +### New Capabilities +- `session-create-delete`: 创建和删除 SessionDaemon(命名 daemon 和匿名 daemon) +- `session-attach-detach`: Block attach/detach 到 daemon,前端切换 zoneId +- `session-idle-timeout`: 空闲超时回收,区分匿名 daemon(1h)和命名 daemon(24h) +- `session-auto-create`: SSH block 启动时自动创建匿名 daemon,IdleTimeout=1h +- `session-reconnect`: 网络重连后恢复,TerminateOnReconnect 机制确保关闭的 block 的远端 job 被清理 +- `session-wsh-cli`: `wsh session` 命令组(create/delete/list/attach/detach/info) + +### Modified Capabilities +- (无现有 spec 变更) + +## Impact + +- **新增** `pkg/sessiondaemon/` 包(SessionDaemon + SessionDaemonManager) +- **新增** `pkg/blockcontroller/sessiondaemoncontroller.go`(Controller 实现) +- **移除** `pkg/blockcontroller/durableshellcontroller.go` +- **修改** `pkg/blockcontroller/blockcontroller.go`(ResyncController 调度分支) +- **修改** `pkg/jobcontroller/jobcontroller.go`(runOutputLoop 不变,IsBlockTermDurable 不再需要) +- **新增** `cmd/wsh/cmd/wshcmd-session.go`(wsh CLI 命令) +- **新增** DB migration(创建 session_daemon 表,迁移旧 Job 记录) +- 前端新增 attach/detach 逻辑 diff --git a/openspec/changes/session-daemon/specs/session-attach-detach/spec.md b/openspec/changes/session-daemon/specs/session-attach-detach/spec.md new file mode 100644 index 0000000000..8208386587 --- /dev/null +++ b/openspec/changes/session-daemon/specs/session-attach-detach/spec.md @@ -0,0 +1,42 @@ +## ADDED Requirements + +### Requirement: Attach block to daemon + +The system SHALL allow attaching a term block to an existing SessionDaemon via `wsh session attach --block `. + +On attach: +- `Block.Meta["session:daemonid"]` is set to the daemon OID +- `SessionDaemonManager.AttachBlock()` is called +- The block's ControllerResync creates a `SessionDaemonController` +- Frontend `TermWrap.attachToDaemon(jobId)` switches zoneId from `block:{blockId}` to `job:{jobId}` +- The block displays the daemon's terminal output in real time +- The block can send input, which goes through the daemon's InputSessionId + +#### Scenario: Attach block to named daemon +- **WHEN** user runs `wsh session attach dev --block block-A` +- **THEN** block-A's `session:daemonid` is set to the daemon's OID +- **AND** the block's controller becomes `SessionDaemonController` +- **AND** the frontend shows the daemon's terminal output + +#### Scenario: Attach same block to multiple daemons +- **WHEN** user runs `wsh session attach dev --block block-A` +- **AND** block-A is already attached to daemon `dev` +- **THEN** the system returns an error (block can only attach to one daemon at a time) + +### Requirement: Detach block from daemon + +The system SHALL allow detaching a block from its SessionDaemon via `wsh session detach --block `. + +On detach: +- `Block.Meta["session:daemonid"]` is cleared +- `SessionDaemonManager.DetachBlock()` is called +- ControllerResync creates a `ShellController` for local/WSL or a new anonymous daemon for SSH +- Frontend `TermWrap.detachFromDaemon()` switches zoneId back to `block:{blockId}` +- The daemon continues running (unless idle timeout triggers) + +#### Scenario: Detach block from daemon +- **WHEN** user runs `wsh session detach --block block-A` +- **AND** block-A is attached to daemon `dev` +- **THEN** block-A's `session:daemonid` is cleared +- **AND** the block reverts to its default controller +- **AND** daemon `dev` continues running (no attached blocks) diff --git a/openspec/changes/session-daemon/specs/session-auto-create/spec.md b/openspec/changes/session-daemon/specs/session-auto-create/spec.md new file mode 100644 index 0000000000..6da6a51ce6 --- /dev/null +++ b/openspec/changes/session-daemon/specs/session-auto-create/spec.md @@ -0,0 +1,31 @@ +## ADDED Requirements + +### Requirement: Auto-create daemon on SSH block start + +When a term block is created with an SSH connection and has no `session:daemonid` meta, the system SHALL automatically create an anonymous SessionDaemon. + +The flow: +1. `ControllerResync` detects SSH connection, no `session:daemonid` +2. Creates anonymous `SessionDaemon` (IsAnonymous=true, IdleTimeout=1h) +3. Writes `session:daemonid` to block meta +4. Triggers `ControllerResync` again +5. Second round detects `session:daemonid` → creates `SessionDaemonController` +6. `SessionDaemonController.Start()` → `daemon.EnsureStarted()` → `jobcontroller.StartJob()` + +#### Scenario: New SSH block creates anonymous daemon +- **WHEN** a user opens a new term block with an SSH connection +- **AND** the block has no `session:daemonid` +- **THEN** an anonymous SessionDaemon is created +- **AND** the block's controller becomes SessionDaemonController +- **AND** a remote job is started +- **AND** the user sees the terminal normally + +#### Scenario: Existing daemonid skips creation +- **WHEN** a user opens a term block +- **AND** `block.Meta["session:daemonid"]` is already set +- **THEN** the system uses the existing daemon directly (no auto-creation) + +#### Scenario: Local/WSL block does not create daemon +- **WHEN** a user opens a term block with a local or WSL connection +- **THEN** the system uses ShellController directly +- **AND** no SessionDaemon is created diff --git a/openspec/changes/session-daemon/specs/session-create-delete/spec.md b/openspec/changes/session-daemon/specs/session-create-delete/spec.md new file mode 100644 index 0000000000..fdb2ba29e4 --- /dev/null +++ b/openspec/changes/session-daemon/specs/session-create-delete/spec.md @@ -0,0 +1,57 @@ +## ADDED Requirements + +### Requirement: Create named SessionDaemon + +The system SHALL allow a user to create a named SessionDaemon via `wsh session create --name --connection `. + +A named SessionDaemon SHALL: +- Have a globally unique `Name` (conflict appends timestamp suffix like `dev-150623`) +- Have `IsAnonymous=false` +- Have `IdleTimeout=86400` (24h) by default +- Be persisted to DB with status `init` +- Start a remote job immediately via `jobcontroller.StartJob()` +- Transition to `running` when the remote JobManager confirms startup + +#### Scenario: Create named daemon successfully +- **WHEN** user runs `wsh session create --name dev --connection ssh:user@host` +- **THEN** a SessionDaemon record is created in DB with status `init` +- **AND** a remote job is started +- **AND** status transitions to `running` +- **AND** the daemon is registered in SessionDaemonManager + +#### Scenario: Create daemon with duplicate name +- **WHEN** user runs `wsh session create --name dev --connection ssh:host1` +- **AND** a daemon named `dev` already exists +- **THEN** the system creates with name `dev-` and notifies the user + +### Requirement: Create anonymous SessionDaemon + +The system SHALL automatically create an anonymous SessionDaemon when a new SSH block is started without a `session:daemonid`. + +An anonymous SessionDaemon SHALL: +- Have `Name=""` and `IsAnonymous=true` +- Have `IdleTimeout=3600` (1h) by default +- Be invisible to `wsh session list` by default (unless `--all` flag) +- Be upgradable to named via `wsh session tag --name ` + +#### Scenario: Auto-create anonymous daemon +- **WHEN** a user opens a new SSH term block +- **AND** the block has no `session:daemonid` meta +- **THEN** an anonymous SessionDaemon is created and attached to the block +- **AND** the process is transparent to the user (no UI indication) + +### Requirement: Delete SessionDaemon + +The system SHALL allow deleting a SessionDaemon via `wsh session delete `. + +Deletion SHALL: +- Call `TerminateAndDetachJob` on the associated job +- Detach all currently attached blocks (clear their `session:daemonid`) +- Set daemon status to `done` +- Remove daemon from SessionDaemonManager + +#### Scenario: Delete daemon +- **WHEN** user runs `wsh session delete dev` +- **THEN** the remote job is terminated +- **AND** all attached blocks have their `session:daemonid` cleared +- **AND** daemon status is set to `done` diff --git a/openspec/changes/session-daemon/specs/session-idle-timeout/spec.md b/openspec/changes/session-daemon/specs/session-idle-timeout/spec.md new file mode 100644 index 0000000000..5661701e0d --- /dev/null +++ b/openspec/changes/session-daemon/specs/session-idle-timeout/spec.md @@ -0,0 +1,42 @@ +## ADDED Requirements + +### Requirement: Idle timeout for anonymous daemons + +An anonymous SessionDaemon (`IsAnonymous=true`) SHALL have a default `IdleTimeout` of 3600 seconds (1 hour). + +When the last block is detached: +- A countdown timer starts for `IdleTimeout` +- If a block re-attaches before timeout, the timer is cancelled +- If the timer expires, `TerminateAndDetachJob` is called on the associated job +- Daemon status is set to `done` +- The daemon is removed from SessionDaemonManager + +#### Scenario: Anonymous daemon auto-reclaim +- **WHEN** the last block is detached from an anonymous daemon +- **AND** no block re-attaches within 1 hour +- **THEN** the daemon is terminated and status set to `done` + +#### Scenario: Re-attach cancels timer +- **WHEN** the last block is detached from an anonymous daemon +- **AND** a block re-attaches within 1 hour +- **THEN** the idle timer is cancelled +- **AND** the daemon continues running + +### Requirement: Idle timeout for named daemons + +A named SessionDaemon (`IsAnonymous=false`) SHALL have a default `IdleTimeout` of 86400 seconds (24 hours). + +The same timer mechanism applies. Named daemons have a longer timeout because they are intentionally created by the user. + +#### Scenario: Named daemon idle timeout +- **WHEN** all blocks are detached from a named daemon +- **AND** no block re-attaches within 24 hours +- **THEN** the daemon is terminated and status set to `done` + +### Requirement: Configurable idle timeout + +The system SHALL allow overriding `IdleTimeout` on daemon creation via `--idle-timeout ` flag. + +#### Scenario: Custom idle timeout +- **WHEN** user runs `wsh session create --name dev --connection ssh:host --idle-timeout 7200` +- **THEN** the daemon's IdleTimeout is set to 7200 seconds (2 hours) diff --git a/openspec/changes/session-daemon/specs/session-reconnect/spec.md b/openspec/changes/session-daemon/specs/session-reconnect/spec.md new file mode 100644 index 0000000000..9042b65b99 --- /dev/null +++ b/openspec/changes/session-daemon/specs/session-reconnect/spec.md @@ -0,0 +1,51 @@ +## ADDED Requirements + +### Requirement: Reconnect daemon after network recovery + +When the network reconnects after a disconnection, the system SHALL automatically attempt to reconnect all SessionDaemons whose status is `running` or `disconnected`. + +On reconnection: +- `onConnectionUp` finds all daemons with this connection name +- Calls `daemon.Reconnect()` → `jobcontroller.ReconnectJob()` +- If the remote jobmanager process is still alive, streaming resumes +- If the remote jobmanager is gone (`JobManagerGone: true`), daemon status set to `done` +- All attached blocks resume displaying output from the stream + +#### Scenario: Reconnect daemon after transient SSH drop +- **WHEN** the SSH connection drops and re-establishes +- **AND** the daemon status is `disconnected` +- **AND** the remote jobmanager is still alive +- **THEN** the daemon reconnects and streaming resumes +- **AND** attached blocks display the continued output + +### Requirement: TerminateOnReconnect for closed blocks + +When a block is closed while the network is down, the system SHALL set `TerminateOnReconnect=true` on the associated Job. On reconnection, the remote jobmanager SHALL be terminated instead of reconnected. + +This ensures that a user closing a block while offline does not leave a stale remote process. + +#### Scenario: Block closed offline, remote cleaned on reconnect +- **WHEN** a user closes a block while the SSH connection is down +- **THEN** `TerminateOnReconnect=true` is persisted in the DB +- **WHEN** the network reconnects +- **THEN** the jobmanager is terminated via SIGTERM +- **AND** no orphaned processes remain on the remote side + +### Requirement: Restart recovery + +When WaveTerm restarts, `SessionDaemonManager.InitFromDB()` SHALL: +1. Load all daemons with status `running` or `disconnected` from DB +2. For each, call `jobcontroller.ReconnectJob()` to reconnect +3. Blocks with `session:daemonid` pointing to a daemon that no longer exists SHALL have their `session:daemonid` cleared and trigger a new ControllerResync + +#### Scenario: Restart with active daemon +- **WHEN** WaveTerm restarts +- **AND** a daemon has status `running` in DB +- **THEN** InitFromDB() loads the daemon and reconnects +- **AND** attached blocks display the resumed output + +#### Scenario: Restart with stale daemonid +- **WHEN** WaveTerm restarts +- **AND** a block has `session:daemonid` pointing to a non-existent daemon +- **THEN** the daemonid is cleared +- **AND** the block falls back to its default controller diff --git a/openspec/changes/session-daemon/specs/session-wsh-cli/spec.md b/openspec/changes/session-daemon/specs/session-wsh-cli/spec.md new file mode 100644 index 0000000000..47c5159244 --- /dev/null +++ b/openspec/changes/session-daemon/specs/session-wsh-cli/spec.md @@ -0,0 +1,88 @@ +## ADDED Requirements + +### Requirement: wsh session create + +The system SHALL provide a `wsh session create` command. + +``` +wsh session create --name --connection [--idle-timeout ] +``` + +This command SHALL: +- Create a new named SessionDaemon and persist to DB +- Start a remote job immediately +- Return the daemon OID + +### Requirement: wsh session delete + +The system SHALL provide a `wsh session delete` command. + +``` +wsh session delete +``` + +This command SHALL: +- Terminate the associated remote job +- Detach all attached blocks +- Set daemon status to `done` + +### Requirement: wsh session list + +The system SHALL provide a `wsh session list` command. + +``` +wsh session list [--all] +``` + +Without `--all`, only named daemons (IsAnonymous=false) are shown. +With `--all`, anonymous daemons are also shown. + +### Requirement: wsh session attach + +The system SHALL provide a `wsh session attach` command. + +``` +wsh session attach --block +``` + +### Requirement: wsh session detach + +The system SHALL provide a `wsh session detach` command. + +``` +wsh session detach --block +``` + +### Requirement: wsh session info + +The system SHALL provide a `wsh session info` command. + +``` +wsh session info +``` + +This command SHALL display: +- Name, Status, Connection, CreatedAt +- JobId +- List of currently attached block IDs +- Time remaining before idle timeout (if no blocks attached) + +### Requirement: wsh session tag + +The system SHALL provide a `wsh session tag` command to convert an anonymous daemon to a named one. + +``` +wsh session tag --name +``` + +After tagging, the daemon SHALL: +- Have `Name` set to the provided name +- Have `IsAnonymous=false` +- Have `IdleTimeout` updated to 24h +- Appear in `wsh session list` output + +#### Scenario: Tag anonymous daemon +- **WHEN** user runs `wsh session tag sd-abc --name dev` +- **THEN** the daemon's Name is set to `dev` +- **AND** IsAnonymous is set to `false` +- **AND** IdleTimeout is updated to 24h diff --git a/openspec/changes/session-daemon/tasks.md b/openspec/changes/session-daemon/tasks.md new file mode 100644 index 0000000000..0876e78579 --- /dev/null +++ b/openspec/changes/session-daemon/tasks.md @@ -0,0 +1,65 @@ +## 1. Data Model — SessionDaemon DB 记录 + +- [x] 1.1 在 `pkg/waveobj/` 新增 `SessionDaemon` struct(OID, Name, Connection, JobId, IsAnonymous, Status, Cwd, CreatedAt, IdleTimeout, Meta) +- [x] 1.2 在 DB 创建 `sessiondaemon` 表(或扩展现有 schema) +- [x] 1.3 新增 `MetaKey_SessionDaemonId = "session:daemonid"` 常量 +- [x] 1.4 在 `MetaTSType` 新增 `SessionDaemonId string` 字段 + +## 2. SessionDaemon + SessionDaemonManager + +- [x] 2.1 新建 `pkg/sessiondaemon/` 包 +- [x] 2.2 实现 `SessionDaemon` struct(daemonId, name, jobId, InputSessionId, seqNum, blocks, status) +- [x] 2.3 实现 `SessionDaemon.Start()` → `jobcontroller.StartJob()` +- [x] 2.4 实现 `SessionDaemon.Reconnect()` → `jobcontroller.ReconnectJob()` +- [x] 2.5 实现 `SessionDaemon.Stop()` → `jobcontroller.TerminateJob()` +- [x] 2.6 实现 `SessionDaemon.SendInput()` → `jobcontroller.SendInput()` +- [x] 2.7 实现 `SessionDaemonManager`(map, GetOrCreate, Get, Remove, InitFromDB) +- [x] 2.8 实现 AttachBlock / DetachBlock / GetBlocksForDaemon +- [x] 2.9 实现空闲超时回收 goroutine(检查 IdleTimeout,定时扫描) + +## 3. SessionDaemonController + +- [x] 3.1 新建 `pkg/blockcontroller/sessiondaemoncontroller.go` +- [x] 3.2 实现 `SessionDaemonController` struct + Controller 接口方法(Start, SendInput, Stop, GetRuntimeStatus, Resync) +- [x] 3.3 修改 `ResyncController` 调度:检测 `session:daemonid` 走 SessionDaemonController +- [x] 3.4 修改 ResyncController:SSH block + 无 daemonid 时自动创建匿名 daemon + +## 4. DurableShellController 移除 + +- [x] 4.1 删除 `pkg/blockcontroller/durableshellcontroller.go` +- [x] 4.2 移除 `ResyncController` 中的 DurableShellController 分支 +- [x] 4.3 移除 `IsBlockIdTermDurable` 调用(不再需要) + +## 5. 输出流修改 + +- [x] 5.1 `runOutputLoop` 中的 `handleAppendJobFile` 不再写 `block:blockId/term`(只写 `job:jobId/term`) +- [x] 5.2 前端 TermWrap 支持动态切换 zoneId(block ↔ job) + +## 6. wsh CLI 命令 + +- [x] 6.1 新建 `cmd/wsh/cmd/wshcmd-session.go` +- [x] 6.2 实现 `wsh session create`(支持 --name, --connection, --idle-timeout) +- [x] 6.3 实现 `wsh session delete` +- [x] 6.4 实现 `wsh session list`(支持 --all 显示匿名 daemon) +- [x] 6.5 实现 `wsh session attach` +- [x] 6.6 实现 `wsh session detach` +- [x] 6.7 实现 `wsh session info` +- [x] 6.8 实现 `wsh session tag`(匿名转命名) + +## 7. 前端 + +- [ ] 7.1 Block header 显示 daemon 名称和状态(dev ● / dev ◌ / dev ✗) +- [ ] 7.2 右键菜单 / header 下拉添加 attach/detach 入口 +- [ ] 7.3 TermWrap 实现 `attachToDaemon(jobId)` 和 `detachFromDaemon()` + +## 8. Migration + +- [x] 8.1 编写 DB migration:创建 sessiondaemon 表 +- [x] 8.2 扫描所有 Block.JobId != "" 的记录,迁移到 SessionDaemon +- [x] 8.3 迁移完成后清理旧 block:blockId/term 文件(数据已合并到 job:jobId/term) + +## 9. Build & Verify + +- [x] 9.1 编译通过(`go build ./...`) +- [x] 9.2 前端 build 通过(`npm run build:prod`) +- [x] 9.3 `task package` 构建成功 diff --git a/pkg/blockcontroller/blockcontroller.go b/pkg/blockcontroller/blockcontroller.go index 75f1938e12..d7e82a02f9 100644 --- a/pkg/blockcontroller/blockcontroller.go +++ b/pkg/blockcontroller/blockcontroller.go @@ -6,6 +6,7 @@ package blockcontroller import ( "context" "encoding/base64" + "errors" "fmt" "io/fs" "log" @@ -16,15 +17,18 @@ import ( "github.com/google/uuid" "github.com/wavetermdev/waveterm/pkg/blocklogger" "github.com/wavetermdev/waveterm/pkg/filestore" - "github.com/wavetermdev/waveterm/pkg/jobcontroller" "github.com/wavetermdev/waveterm/pkg/remote" "github.com/wavetermdev/waveterm/pkg/remote/conncontroller" + "github.com/wavetermdev/waveterm/pkg/sessiondaemon" "github.com/wavetermdev/waveterm/pkg/util/ds" "github.com/wavetermdev/waveterm/pkg/util/shellutil" "github.com/wavetermdev/waveterm/pkg/wavebase" "github.com/wavetermdev/waveterm/pkg/waveobj" + "github.com/wavetermdev/waveterm/pkg/wcore" "github.com/wavetermdev/waveterm/pkg/wps" + "github.com/wavetermdev/waveterm/pkg/wshrpc" "github.com/wavetermdev/waveterm/pkg/wshrpc/wshclient" + "github.com/wavetermdev/waveterm/pkg/wshutil" "github.com/wavetermdev/waveterm/pkg/wslconn" "github.com/wavetermdev/waveterm/pkg/wstore" ) @@ -35,12 +39,16 @@ const ( BlockController_Tsunami = "tsunami" ) +const MetaKey_SessionNoAutoCreate = "session:noautocreate" + const ( Status_Running = "running" Status_Done = "done" Status_Init = "init" ) +var ErrSessionDaemonJobUnknown = errors.New("session daemon job state unknown") + const ( DefaultTermMaxFileSize = 2 * 1024 * 1024 DefaultHtmlMaxFileSize = 256 * 1024 @@ -135,6 +143,12 @@ func InitBlockController() { Event: wps.Event_BlockClose, AllScopes: true, }, nil) + sessiondaemon.OnDaemonJobDoneFn = func(ctx context.Context, daemonId string) { + err := fallbackSessionDaemonToShell(ctx, daemonId, "") + if err != nil { + log.Printf("[sessiondaemon] error falling back daemon=%s to shell: %v", daemonId, err) + } + } } func handleBlockCloseEvent(event *wps.WaveEvent) { @@ -172,8 +186,16 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts if existing != nil { existingConnName := existing.GetConnName() if existingConnName != connName { + // For non-local connections, check readiness before switching + if !conncontroller.IsLocalConnName(connName) && !conncontroller.IsWslConnName(connName) && existingConnName == "" { + err = CheckConnStatus(blockId) + if err != nil { + log.Printf("not stopping blockcontroller %s due to conn change (from %q to %q): new connection not ready\n", blockId, existingConnName, connName) + return fmt.Errorf("cannot start shellproc: %w", err) + } + } log.Printf("stopping blockcontroller %s due to conn change (from %q to %q)\n", blockId, existingConnName, connName) - DestroyBlockController(blockId) + stopBlockController(blockId) time.Sleep(100 * time.Millisecond) existing = nil } @@ -187,8 +209,50 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts return nil } - // Determine if we should use DurableShellController vs ShellController - shouldUseDurableShellController := controllerName == BlockController_Shell && jobcontroller.IsBlockIdTermDurable(blockId) + // Check for SessionDaemon controller + daemonId := blockData.Meta.GetString(waveobj.MetaKey_SessionDaemonId, "") + + // For local/WSL connections, session daemon is not applicable — clear and fall through to ShellController + if daemonId != "" && controllerName == BlockController_Shell && (conncontroller.IsLocalConnName(connName) || conncontroller.IsWslConnName(connName)) { + if existing != nil { + DestroyBlockController(blockId) + time.Sleep(100 * time.Millisecond) + existing = nil + } + _ = wstore.DBUpdateFn(ctx, blockId, func(block *waveobj.Block) { + delete(block.Meta, waveobj.MetaKey_SessionDaemonId) + block.JobId = "" + }) + daemonId = "" + } + + // Validate existing daemon: confirmed-done daemons fall back to ShellController; disconnected daemons wait for connection recovery. + if daemonId != "" && controllerName == BlockController_Shell { + dbDaemon, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, daemonId) + staleStatus := false + if err != nil { + log.Printf("[sessiondaemon] staledaemon: daemon=%s block=%s not found in DB err=%v, clearing", daemonId, blockId, err) + staleStatus = true + } else if dbDaemon.Status == sessiondaemon.Status_Done { + log.Printf("[sessiondaemon] staledaemon: daemon=%s block=%s status=%s, clearing and falling back to ShellController", daemonId, blockId, dbDaemon.Status) + staleStatus = true + } else { + log.Printf("[sessiondaemon] staledaemon: daemon=%s block=%s status=%s, keeping", daemonId, blockId, dbDaemon.Status) + } + if staleStatus { + if existing != nil { + DestroyBlockController(blockId) + time.Sleep(100 * time.Millisecond) + existing = nil + } + _ = wstore.DBUpdateFn(ctx, blockId, func(block *waveobj.Block) { + delete(block.Meta, waveobj.MetaKey_SessionDaemonId) + block.Meta[MetaKey_SessionNoAutoCreate] = true + block.JobId = "" + }) + daemonId = "" + } + } // Check if we need to morph controller type if existing != nil { @@ -196,13 +260,14 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts switch existing.(type) { case *ShellController: - if controllerName != BlockController_Shell && controllerName != BlockController_Cmd { - needsReplace = true - } else if shouldUseDurableShellController { + if daemonId != "" || (controllerName != BlockController_Shell && controllerName != BlockController_Cmd) { needsReplace = true } - case *DurableShellController: - if !shouldUseDurableShellController { + case *SessionDaemonController: + sdc := existing.(*SessionDaemonController) + if daemonId == "" || conncontroller.IsLocalConnName(connName) || conncontroller.IsWslConnName(connName) { + needsReplace = true + } else if daemonId != sdc.DaemonId { needsReplace = true } case *TsunamiController: @@ -213,7 +278,7 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts if needsReplace { log.Printf("stopping blockcontroller %s due to controller type change\n", blockId) - DestroyBlockController(blockId) + stopBlockController(blockId) time.Sleep(100 * time.Millisecond) existing = nil } @@ -221,9 +286,12 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts // Force restart if requested if force && existing != nil { - DestroyBlockController(blockId) - time.Sleep(100 * time.Millisecond) - existing = nil + status := existing.GetRuntimeStatus() + if status.ShellProcStatus != Status_Running { + stopBlockController(blockId) + time.Sleep(100 * time.Millisecond) + existing = nil + } } // Destroy done controllers before restarting @@ -231,7 +299,7 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts status := existing.GetRuntimeStatus() if status.ShellProcStatus == Status_Done { log.Printf("destroying blockcontroller %s with done status before restart\n", blockId) - DestroyBlockController(blockId) + stopBlockController(blockId) time.Sleep(100 * time.Millisecond) existing = nil } @@ -242,17 +310,26 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts if existing != nil { controller = existing } else { - // Create new controller based on type - switch controllerName { - case BlockController_Shell, BlockController_Cmd: - if shouldUseDurableShellController { - controller = MakeDurableShellController(tabId, blockId, controllerName, connName) - } else { - controller = MakeShellController(tabId, blockId, controllerName, connName) + switch { + case daemonId != "": + sdc := MakeSessionDaemonController(tabId, blockId, connName) + sdc.DaemonId = daemonId + controller = sdc + registerController(blockId, controller) + // Ensure the daemon is in memory before attaching the block. + // On restart, the daemon exists in DB but not in the in-memory + // manager – AttachBlock silently no-ops if not found. + dbDaemon, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, daemonId) + if err == nil { + sessiondaemon.Manager.GetOrCreate(ctx, dbDaemon) } + sessiondaemon.Manager.AttachBlock(ctx, daemonId, blockId) + + case controllerName == BlockController_Shell || controllerName == BlockController_Cmd: + controller = MakeShellController(tabId, blockId, controllerName, connName) registerController(blockId, controller) - case BlockController_Tsunami: + case controllerName == BlockController_Tsunami: controller = MakeTsunamiController(tabId, blockId, connName) registerController(blockId, controller) @@ -263,7 +340,37 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts // Check if we need to start/restart status := controller.GetRuntimeStatus() - if status.ShellProcStatus == Status_Init { + if status.ShellProcStatus == Status_Running { + if sdc, ok := controller.(*SessionDaemonController); ok { + if daemon := sessiondaemon.Manager.Get(sdc.DaemonId); daemon != nil && daemon.JobId != "" { + ensureResult, err := sessiondaemon.Manager.EnsureJobState(ctx, sdc.DaemonId, rtOpts, false) + if err != nil { + log.Printf("[sessiondaemon] resync: daemon=%s block=%s ensure failed, falling back to shell: %v", sdc.DaemonId, blockId, err) + err = fallbackSessionDaemonToShell(ctx, sdc.DaemonId, blockId) + if err != nil { + return err + } + existing = nil + daemonId = "" + controller = replaceBlockControllerWithShell(tabId, blockId, controllerName, connName) + status = controller.GetRuntimeStatus() + } else if ensureResult.Action == sessiondaemon.DaemonEnsure_Fallback { + log.Printf("[sessiondaemon] resync: daemon=%s block=%s job manager gone, falling back to shell", sdc.DaemonId, blockId) + err = fallbackSessionDaemonToShell(ctx, sdc.DaemonId, blockId) + if err != nil { + return err + } + daemonId = "" + existing = nil + controller = replaceBlockControllerWithShell(tabId, blockId, controllerName, connName) + status = controller.GetRuntimeStatus() + } else if ensureResult.Action == sessiondaemon.DaemonEnsure_Wait { + log.Printf("[sessiondaemon] resync: daemon=%s block=%s job state unknown, waiting", sdc.DaemonId, blockId) + } + } + } + } + if status.ShellProcStatus == Status_Init || existing == nil { // For shell/cmd, check connection status first (for non-local connections) if controllerName == BlockController_Shell || controllerName == BlockController_Cmd { if !conncontroller.IsLocalConnName(connName) { @@ -277,6 +384,9 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts // Start controller err = controller.Start(ctx, blockData.Meta, rtOpts, force) if err != nil { + if errors.Is(err, ErrSessionDaemonJobUnknown) { + return nil + } return fmt.Errorf("error starting controller: %w", err) } } @@ -292,16 +402,86 @@ func GetBlockControllerRuntimeStatus(blockId string) *BlockControllerRuntimeStat return controller.GetRuntimeStatus() } -func DestroyBlockController(blockId string) { +func stopBlockController(blockId string) { controller := getController(blockId) if controller == nil { return } controller.Stop(true, Status_Done, true) wstore.DeleteRTInfo(waveobj.MakeORef(waveobj.OType_Block, blockId)) +} + +func DestroyBlockController(blockId string) { + stopBlockController(blockId) deleteController(blockId) } +func fallbackSessionDaemonToShell(ctx context.Context, daemonId string, currentBlockId string) error { + log.Printf("[sessiondaemon] fallback: daemon=%s currentBlock=%s", daemonId, currentBlockId) + blockIds := sessiondaemon.Manager.GetBlocksForDaemon(daemonId) + if len(blockIds) == 0 && currentBlockId != "" { + blockIds = append(blockIds, currentBlockId) + } + + if err := sessiondaemon.Manager.MarkDone(ctx, daemonId); err != nil { + return fmt.Errorf("mark daemon done: %w", err) + } + + seen := make(map[string]bool) + for _, blockId := range blockIds { + if blockId == "" || seen[blockId] { + continue + } + seen[blockId] = true + sessiondaemon.Manager.DetachBlock(ctx, daemonId, blockId) + err := wstore.DBUpdateFn(ctx, blockId, func(block *waveobj.Block) { + if block.Meta == nil { + block.Meta = make(waveobj.MetaMapType) + } + delete(block.Meta, waveobj.MetaKey_SessionDaemonId) + block.Meta[MetaKey_SessionNoAutoCreate] = true + block.JobId = "" + }) + if err != nil { + return fmt.Errorf("fallback block %s to shell: %w", blockId, err) + } + wcore.SendWaveObjUpdate(waveobj.MakeORef(waveobj.OType_Block, blockId)) + if blockId != currentBlockId { + DestroyBlockController(blockId) + resyncBlockController(ctx, blockId) + } + } + return nil +} + +func replaceBlockControllerWithShell(tabId string, blockId string, controllerName string, connName string) Controller { + stopBlockController(blockId) + deleteController(blockId) + time.Sleep(100 * time.Millisecond) + controller := MakeShellController(tabId, blockId, controllerName, connName) + registerController(blockId, controller) + return controller +} + +func resyncBlockController(ctx context.Context, blockId string) { + tabs, err := wstore.DBGetAllObjsByType[*waveobj.Tab](ctx, waveobj.OType_Tab) + if err != nil { + log.Printf("[sessiondaemon] warning: error getting tabs for resync: %v", err) + return + } + for _, tab := range tabs { + for _, bid := range tab.BlockIds { + if bid == blockId { + err = ResyncController(ctx, tab.OID, blockId, nil, true) + if err != nil { + log.Printf("[sessiondaemon] warning: fallback resync failed block=%s: %v", blockId, err) + } + return + } + } + } +} + func sendConnMonitorInputNotification(controller Controller) { connName := controller.GetConnName() if connName == "" || conncontroller.IsLocalConnName(connName) || conncontroller.IsWslConnName(connName) { @@ -492,3 +672,19 @@ func makeSwapToken(ctx context.Context, logCtx context.Context, blockId string, token.ScriptText = getCustomInitScript(logCtx, blockMeta, remoteName, shellType) return token } + +func attachRpcContextToSwapToken(swapToken *shellutil.TokenSwapEntry, blockId string, connName string, sockName string) error { + rpcContext := wshrpc.RpcContext{ + ProcRoute: true, + SockName: sockName, + BlockId: blockId, + Conn: connName, + } + jwtStr, err := wshutil.MakeClientJWTToken(rpcContext) + if err != nil { + return fmt.Errorf("error making jwt token: %w", err) + } + swapToken.RpcContext = &rpcContext + swapToken.Env[wshutil.WaveJwtTokenVarName] = jwtStr + return nil +} diff --git a/pkg/blockcontroller/durableshellcontroller.go b/pkg/blockcontroller/durableshellcontroller.go deleted file mode 100644 index a21dac153b..0000000000 --- a/pkg/blockcontroller/durableshellcontroller.go +++ /dev/null @@ -1,277 +0,0 @@ -// Copyright 2025, Command Line Inc. -// SPDX-License-Identifier: Apache-2.0 - -package blockcontroller - -import ( - "context" - "encoding/base64" - "fmt" - "io/fs" - "log" - "sync" - "time" - - "github.com/google/uuid" - "github.com/wavetermdev/waveterm/pkg/filestore" - "github.com/wavetermdev/waveterm/pkg/jobcontroller" - "github.com/wavetermdev/waveterm/pkg/remote" - "github.com/wavetermdev/waveterm/pkg/remote/conncontroller" - "github.com/wavetermdev/waveterm/pkg/shellexec" - "github.com/wavetermdev/waveterm/pkg/util/shellutil" - "github.com/wavetermdev/waveterm/pkg/utilds" - "github.com/wavetermdev/waveterm/pkg/wavebase" - "github.com/wavetermdev/waveterm/pkg/waveobj" - "github.com/wavetermdev/waveterm/pkg/wps" - "github.com/wavetermdev/waveterm/pkg/wshrpc" - "github.com/wavetermdev/waveterm/pkg/wshrpc/wshclient" - "github.com/wavetermdev/waveterm/pkg/wshutil" - "github.com/wavetermdev/waveterm/pkg/wstore" -) - -type DurableShellController struct { - Lock *sync.Mutex - - ControllerType string - TabId string - BlockId string - ConnName string - BlockDef *waveobj.BlockDef - VersionTs utilds.VersionTs - - InputSessionId string // random uuid - inputSeqNum int // monotonic sequence number for inputs, starts at 1 - - JobId string - LastKnownStatus string -} - -func MakeDurableShellController(tabId string, blockId string, controllerType string, connName string) Controller { - return &DurableShellController{ - Lock: &sync.Mutex{}, - ControllerType: controllerType, - TabId: tabId, - BlockId: blockId, - ConnName: connName, - LastKnownStatus: Status_Init, - InputSessionId: uuid.New().String(), - } -} - -func (dsc *DurableShellController) WithLock(f func()) { - dsc.Lock.Lock() - defer dsc.Lock.Unlock() - f() -} - -func (dsc *DurableShellController) getJobId() string { - dsc.Lock.Lock() - defer dsc.Lock.Unlock() - return dsc.JobId -} - -func (dsc *DurableShellController) getNextInputSeq() (string, int) { - dsc.Lock.Lock() - defer dsc.Lock.Unlock() - dsc.inputSeqNum++ - return dsc.InputSessionId, dsc.inputSeqNum -} - -func (dsc *DurableShellController) getJobStatus_withlock() string { - if dsc.JobId == "" { - dsc.LastKnownStatus = Status_Init - return Status_Init - } - status, err := jobcontroller.GetJobManagerStatus(context.Background(), dsc.JobId) - if err != nil { - log.Printf("error getting job status for %s: %v, using last known status: %s", dsc.JobId, err, dsc.LastKnownStatus) - return dsc.LastKnownStatus - } - dsc.LastKnownStatus = status - return status -} - -func (dsc *DurableShellController) getRuntimeStatus_withlock() BlockControllerRuntimeStatus { - var rtn BlockControllerRuntimeStatus - rtn.Version = dsc.VersionTs.GetVersionTs() - rtn.BlockId = dsc.BlockId - rtn.ShellProcStatus = dsc.getJobStatus_withlock() - rtn.ShellProcConnName = dsc.ConnName - return rtn -} - -func (dsc *DurableShellController) GetRuntimeStatus() *BlockControllerRuntimeStatus { - var rtn BlockControllerRuntimeStatus - dsc.WithLock(func() { - rtn = dsc.getRuntimeStatus_withlock() - }) - return &rtn -} - -func (dsc *DurableShellController) GetConnName() string { - dsc.Lock.Lock() - defer dsc.Lock.Unlock() - return dsc.ConnName -} - -func (dsc *DurableShellController) sendUpdate_withlock() { - rtStatus := dsc.getRuntimeStatus_withlock() - log.Printf("sending blockcontroller update %#v\n", rtStatus) - wps.Broker.Publish(wps.WaveEvent{ - Event: wps.Event_ControllerStatus, - Scopes: []string{ - waveobj.MakeORef(waveobj.OType_Tab, dsc.TabId).String(), - waveobj.MakeORef(waveobj.OType_Block, dsc.BlockId).String(), - }, - Data: rtStatus, - }) -} - -// Start initializes or reconnects to a durable shell for the block. -// Logic: -// - If block has no existing jobId: starts a new job and attaches it -// - If block has existing jobId with running job manager: reconnects to existing job -// - If block has existing jobId with non-running job manager: -// - force=true: detaches old job and starts new one -// - force=false: returns without starting (leaves block unstarted) -// -// After establishing jobId, ensures job connection is active (reconnects if needed) -func (dsc *DurableShellController) Start(ctx context.Context, blockMeta waveobj.MetaMapType, rtOpts *waveobj.RuntimeOpts, force bool) error { - blockData, err := wstore.DBMustGet[*waveobj.Block](ctx, dsc.BlockId) - if err != nil { - return fmt.Errorf("error getting block: %w", err) - } - - if conncontroller.IsLocalConnName(dsc.ConnName) { - return fmt.Errorf("durable shell controller requires a remote connection") - } - - var jobId string - if blockData.JobId != "" { - status, err := jobcontroller.GetJobManagerStatus(ctx, blockData.JobId) - if err != nil { - return fmt.Errorf("error getting job manager status: %w", err) - } - if status == jobcontroller.JobManagerStatus_Running { - jobId = blockData.JobId - } else if !force { - log.Printf("block %q has jobId %s but manager is not running (status: %s), not starting (force=false)\n", dsc.BlockId, blockData.JobId, status) - return nil - } else { - log.Printf("block %q has jobId %s but manager is not running (status: %s), starting new job (force=true)\n", dsc.BlockId, blockData.JobId, status) - // intentionally leave jobId empty to trigger starting a new job below - } - } - - if jobId == "" { - log.Printf("block %q starting new durable shell\n", dsc.BlockId) - fsErr := filestore.WFS.MakeFile(ctx, dsc.BlockId, wavebase.BlockFile_Term, nil, wshrpc.FileOpts{MaxSize: DefaultTermMaxFileSize, Circular: true}) - if fsErr != nil && fsErr != fs.ErrExist { - return fmt.Errorf("error creating block term file: %w", fsErr) - } - newJobId, err := dsc.startNewJob(ctx, blockMeta, dsc.ConnName, rtOpts) - if err != nil { - return fmt.Errorf("failed to start new job: %w", err) - } - jobId = newJobId - } - - dsc.WithLock(func() { - dsc.JobId = jobId - dsc.sendUpdate_withlock() - }) - - err = jobcontroller.ReconnectJob(ctx, jobId, rtOpts) - if err != nil { - return fmt.Errorf("failed to reconnect to job: %w", err) - } - - return nil -} - -func (dsc *DurableShellController) Stop(graceful bool, newStatus string, destroy bool) { - if !destroy { - return - } - jobId := dsc.getJobId() - if jobId == "" { - return - } - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - jobcontroller.TerminateAndDetachJob(ctx, jobId) -} - -func (dsc *DurableShellController) SendInput(inputUnion *BlockInputUnion) error { - if inputUnion == nil { - return nil - } - jobId := dsc.getJobId() - if jobId == "" { - return fmt.Errorf("no job attached to controller") - } - inputSessionId, seqNum := dsc.getNextInputSeq() - data := wshrpc.CommandJobInputData{ - JobId: jobId, - InputSessionId: inputSessionId, - SeqNum: seqNum, - TermSize: inputUnion.TermSize, - SigName: inputUnion.SigName, - } - if len(inputUnion.InputData) > 0 { - data.InputData64 = base64.StdEncoding.EncodeToString(inputUnion.InputData) - } - return jobcontroller.SendInput(context.Background(), data) -} - -func (dsc *DurableShellController) startNewJob(ctx context.Context, blockMeta waveobj.MetaMapType, connName string, rtOpts *waveobj.RuntimeOpts) (string, error) { - termSize := waveobj.TermSize{ - Rows: shellutil.DefaultTermRows, - Cols: shellutil.DefaultTermCols, - } - if rtOpts != nil && rtOpts.TermSize.Rows > 0 && rtOpts.TermSize.Cols > 0 { - termSize = rtOpts.TermSize - } - cmdStr := blockMeta.GetString(waveobj.MetaKey_Cmd, "") - cwd := blockMeta.GetString(waveobj.MetaKey_CmdCwd, "") - opts, err := remote.ParseOpts(connName) - if err != nil { - return "", fmt.Errorf("invalid ssh remote name (%s): %w", connName, err) - } - conn := conncontroller.MaybeGetConn(opts) - if conn == nil { - return "", fmt.Errorf("connection %q not found", connName) - } - connRoute := wshutil.MakeConnectionRouteId(connName) - remoteInfo, err := wshclient.RemoteGetInfoCommand(wshclient.GetBareRpcClient(), &wshrpc.RpcOpts{Route: connRoute, Timeout: 2000}) - if err != nil { - return "", fmt.Errorf("unable to obtain remote info from connserver: %w", err) - } - shellType := shellutil.GetShellTypeFromShellPath(remoteInfo.Shell) - swapToken := makeSwapToken(ctx, ctx, dsc.BlockId, blockMeta, connName, shellType) - sockName := wavebase.GetPersistentRemoteSockName(wstore.GetClientId()) - rpcContext := wshrpc.RpcContext{ - ProcRoute: true, - SockName: sockName, - BlockId: dsc.BlockId, - Conn: connName, - } - jwtStr, err := wshutil.MakeClientJWTToken(rpcContext) - if err != nil { - return "", fmt.Errorf("error making jwt token: %w", err) - } - swapToken.RpcContext = &rpcContext - swapToken.Env[wshutil.WaveJwtTokenVarName] = jwtStr - cmdOpts := shellexec.CommandOptsType{ - Interactive: true, - Login: true, - Cwd: cwd, - SwapToken: swapToken, - ForceJwt: blockMeta.GetBool(waveobj.MetaKey_CmdJwt, false), - } - jobId, err := shellexec.StartRemoteShellJob(ctx, ctx, termSize, cmdStr, cmdOpts, conn, dsc.BlockId) - if err != nil { - return "", fmt.Errorf("failed to start durable shell: %w", err) - } - return jobId, nil -} diff --git a/pkg/blockcontroller/sessiondaemoncontroller.go b/pkg/blockcontroller/sessiondaemoncontroller.go new file mode 100644 index 0000000000..2bff0e1dc4 --- /dev/null +++ b/pkg/blockcontroller/sessiondaemoncontroller.go @@ -0,0 +1,273 @@ +package blockcontroller + +import ( + "context" + "fmt" + "io/fs" + "log" + "sync" + "time" + + "github.com/google/uuid" + "github.com/wavetermdev/waveterm/pkg/filestore" + "github.com/wavetermdev/waveterm/pkg/remote" + "github.com/wavetermdev/waveterm/pkg/remote/conncontroller" + "github.com/wavetermdev/waveterm/pkg/sessiondaemon" + "github.com/wavetermdev/waveterm/pkg/shellexec" + "github.com/wavetermdev/waveterm/pkg/util/shellutil" + "github.com/wavetermdev/waveterm/pkg/utilds" + "github.com/wavetermdev/waveterm/pkg/wavebase" + "github.com/wavetermdev/waveterm/pkg/waveobj" + "github.com/wavetermdev/waveterm/pkg/wcore" + "github.com/wavetermdev/waveterm/pkg/wps" + "github.com/wavetermdev/waveterm/pkg/wshrpc" + "github.com/wavetermdev/waveterm/pkg/wshrpc/wshclient" + "github.com/wavetermdev/waveterm/pkg/wshutil" + "github.com/wavetermdev/waveterm/pkg/wstore" +) + +type SessionDaemonController struct { + Lock *sync.Mutex + + BlockId string + ConnName string + DaemonId string + TabId string + InputSessionId string + inputSeqNum int + versionTs utilds.VersionTs +} + +func MakeSessionDaemonController(tabId string, blockId string, connName string) *SessionDaemonController { + return &SessionDaemonController{ + Lock: &sync.Mutex{}, + BlockId: blockId, + ConnName: connName, + TabId: tabId, + InputSessionId: uuid.New().String(), + } +} + +func (sdc *SessionDaemonController) WithLock(f func()) { + sdc.Lock.Lock() + defer sdc.Lock.Unlock() + f() +} + +func (sdc *SessionDaemonController) getNextInputSeq() (string, int) { + sdc.Lock.Lock() + defer sdc.Lock.Unlock() + sdc.inputSeqNum++ + return sdc.InputSessionId, sdc.inputSeqNum +} + +func (sdc *SessionDaemonController) Start(ctx context.Context, blockMeta waveobj.MetaMapType, rtOpts *waveobj.RuntimeOpts, force bool) error { + daemon := sessiondaemon.Manager.Get(sdc.DaemonId) + if daemon == nil { + log.Printf("[sessiondaemon] start: daemon %s not found in manager", sdc.DaemonId) + return fmt.Errorf("session daemon %s not found in manager", sdc.DaemonId) + } + + sessiondaemon.Manager.AttachBlock(ctx, sdc.DaemonId, sdc.BlockId) + + ensureResult, err := sessiondaemon.Manager.EnsureJobState(ctx, sdc.DaemonId, rtOpts, true) + if err != nil { + return err + } + switch ensureResult.Action { + case sessiondaemon.DaemonEnsure_Ready: + sdc.incrementVersion() + sdc.sendControllerStatus() + return nil + case sessiondaemon.DaemonEnsure_Wait: + return ErrSessionDaemonJobUnknown + case sessiondaemon.DaemonEnsure_Fallback: + log.Printf("[sessiondaemon] start: daemon=%s is done, falling back block=%s to shell", sdc.DaemonId, sdc.BlockId) + return fallbackSessionDaemonToShell(ctx, sdc.DaemonId, sdc.BlockId) + case sessiondaemon.DaemonEnsure_Start: + return sdc.createJobAndSync(ctx, blockMeta, rtOpts) + } + + return fmt.Errorf("unknown session daemon ensure action %q", ensureResult.Action) +} + +// createJobAndSync starts a new remote job for the daemon and syncs +// the resulting JobId to all attached blocks so the frontend can +// switch its zoneId. +func (sdc *SessionDaemonController) createJobAndSync(ctx context.Context, blockMeta waveobj.MetaMapType, rtOpts *waveobj.RuntimeOpts) error { + fsErr := filestore.WFS.MakeFile(ctx, sdc.BlockId, wavebase.BlockFile_Term, nil, wshrpc.FileOpts{MaxSize: DefaultTermMaxFileSize, Circular: true}) + if fsErr != nil && fsErr != fs.ErrExist { + return fmt.Errorf("error creating block term file: %w", fsErr) + } + jobId, err := sdc.startNewJob(ctx, blockMeta, rtOpts) + if err != nil { + log.Printf("[sessiondaemon] start: new job failed block=%s err=%v", sdc.BlockId, err) + return fmt.Errorf("failed to start job: %w", err) + } + + err = sessiondaemon.Manager.SetJobRunning(ctx, sdc.DaemonId, jobId) + if err != nil { + log.Printf("[sessiondaemon] start: set job id failed daemon=%s job=%s err=%v", sdc.DaemonId, jobId, err) + return fmt.Errorf("failed to set job id on daemon: %w", err) + } + + sdc.syncJobIdToBlocks(ctx, jobId) + + sdc.incrementVersion() + sdc.sendControllerStatus() + return nil +} + +// syncJobIdToBlocks writes the daemon's JobId to every attached block's +// DB record so the frontend useEffect picks up the change and calls +// attachToDaemon, switching the terminal zoneId to the new job's output stream. +func (sdc *SessionDaemonController) syncJobIdToBlocks(ctx context.Context, jobId string) { + attachedBlocks := sessiondaemon.Manager.GetBlocksForDaemon(sdc.DaemonId) + for _, blockId := range attachedBlocks { + wstore.DBUpdateFn(ctx, blockId, func(block *waveobj.Block) { + block.JobId = jobId + }) + wcore.SendWaveObjUpdate(waveobj.MakeORef(waveobj.OType_Block, blockId)) + } +} + +func (sdc *SessionDaemonController) startNewJob(ctx context.Context, blockMeta waveobj.MetaMapType, rtOpts *waveobj.RuntimeOpts) (string, error) { + termSize := waveobj.TermSize{ + Rows: shellutil.DefaultTermRows, + Cols: shellutil.DefaultTermCols, + } + if rtOpts != nil && rtOpts.TermSize.Rows > 0 && rtOpts.TermSize.Cols > 0 { + termSize = rtOpts.TermSize + } + cmdStr := blockMeta.GetString(waveobj.MetaKey_Cmd, "") + cwd := blockMeta.GetString(waveobj.MetaKey_CmdCwd, "") + opts, err := remote.ParseOpts(sdc.ConnName) + if err != nil { + return "", fmt.Errorf("invalid ssh remote name (%s): %w", sdc.ConnName, err) + } + conn := conncontroller.MaybeGetConn(opts) + if conn == nil { + return "", fmt.Errorf("connection %q not found", sdc.ConnName) + } + connRoute := wshutil.MakeConnectionRouteId(sdc.ConnName) + remoteInfo, err := wshclient.RemoteGetInfoCommand(wshclient.GetBareRpcClient(), &wshrpc.RpcOpts{Route: connRoute, Timeout: 2000}) + if err != nil { + return "", fmt.Errorf("unable to obtain remote info from connserver: %w", err) + } + shellType := shellutil.GetShellTypeFromShellPath(remoteInfo.Shell) + swapToken := makeSwapToken(ctx, ctx, sdc.BlockId, blockMeta, sdc.ConnName, shellType) + sockName := wavebase.GetPersistentRemoteSockName(wstore.GetClientId()) + err = attachRpcContextToSwapToken(swapToken, sdc.BlockId, sdc.ConnName, sockName) + if err != nil { + return "", err + } + cmdOpts := shellexec.CommandOptsType{ + Interactive: true, + Login: true, + Cwd: cwd, + SwapToken: swapToken, + ForceJwt: blockMeta.GetBool(waveobj.MetaKey_CmdJwt, false), + } + jobId, err := shellexec.StartRemoteShellJob(ctx, ctx, termSize, cmdStr, cmdOpts, conn, sdc.BlockId) + if err != nil { + return "", fmt.Errorf("failed to start remote shell job: %w", err) + } + + wstore.DBUpdateFn(ctx, jobId, func(job *waveobj.Job) { + job.AttachedBlockId = "daemon:" + sdc.DaemonId + }) + + return jobId, nil +} + +func (sdc *SessionDaemonController) Stop(graceful bool, newStatus string, destroy bool) { + if !destroy { + return + } + ctx := context.Background() + sessiondaemon.Manager.DetachBlock(ctx, sdc.DaemonId, sdc.BlockId) +} + +func (sdc *SessionDaemonController) SendInput(inputUnion *BlockInputUnion) error { + if inputUnion == nil { + return nil + } + daemon := sessiondaemon.Manager.Get(sdc.DaemonId) + if daemon == nil { + return fmt.Errorf("session daemon %s not found", sdc.DaemonId) + } + return daemon.SendInput(context.Background(), inputUnion.InputData, inputUnion.SigName, inputUnion.TermSize) +} + +func (sdc *SessionDaemonController) GetRuntimeStatus() *BlockControllerRuntimeStatus { + var rtn BlockControllerRuntimeStatus + sdc.WithLock(func() { + rtn.BlockId = sdc.BlockId + rtn.ShellProcConnName = sdc.ConnName + rtn.Version = sdc.versionTs.GetVersionTs() + daemon := sessiondaemon.Manager.Get(sdc.DaemonId) + if daemon != nil { + if daemon.JobId == "" { + rtn.ShellProcStatus = "init" + } else { + rtn.ShellProcStatus = "running" + } + } else { + rtn.ShellProcStatus = "done" + } + }) + return &rtn +} + +func (sdc *SessionDaemonController) incrementVersion() { + sdc.versionTs.GetVersionTs() +} + +func (sdc *SessionDaemonController) GetConnName() string { + return sdc.ConnName +} + +func (sdc *SessionDaemonController) sendControllerStatus() { + rtStatus := sdc.GetRuntimeStatus() + log.Printf("sending blockcontroller update %#v\n", rtStatus) + wps.Broker.Publish(wps.WaveEvent{ + Event: wps.Event_ControllerStatus, + Scopes: []string{ + waveobj.MakeORef(waveobj.OType_Tab, sdc.TabId).String(), + waveobj.MakeORef(waveobj.OType_Block, sdc.BlockId).String(), + }, + Data: rtStatus, + }) +} + +func autoCreateSessionDaemon(ctx context.Context, blockId string, blockMeta waveobj.MetaMapType, connName string, rtOpts *waveobj.RuntimeOpts) (string, error) { + dbDaemon := &waveobj.SessionDaemon{ + OID: uuid.New().String(), + Name: "", + Connection: connName, + IsAnonymous: true, + Status: sessiondaemon.Status_Init, + CreatedAt: time.Now().UnixMilli(), + IdleTimeout: sessiondaemon.DefaultAnonymousIdleTimeout, + } + + err := wstore.DBInsert(ctx, dbDaemon) + if err != nil { + return "", fmt.Errorf("insert session daemon: %w", err) + } + + err = wstore.DBUpdateFn(ctx, blockId, func(block *waveobj.Block) { + block.Meta[waveobj.MetaKey_SessionDaemonId] = dbDaemon.OID + delete(block.Meta, MetaKey_SessionNoAutoCreate) + }) + if err != nil { + return "", fmt.Errorf("update block meta: %w", err) + } + + _, err = sessiondaemon.Manager.GetOrCreate(ctx, dbDaemon) + if err != nil { + return "", fmt.Errorf("create session daemon in manager: %w", err) + } + + return dbDaemon.OID, nil +} diff --git a/pkg/blockcontroller/shellcontroller.go b/pkg/blockcontroller/shellcontroller.go index a410225394..7d17245aec 100644 --- a/pkg/blockcontroller/shellcontroller.go +++ b/pkg/blockcontroller/shellcontroller.go @@ -466,18 +466,10 @@ func (bc *ShellController) setupAndStartShellProcess(logCtx context.Context, rc } } else { sockName := conn.GetDomainSocketName() - rpcContext := wshrpc.RpcContext{ - ProcRoute: true, - SockName: sockName, - BlockId: bc.BlockId, - Conn: conn.Opts.String(), - } - jwtStr, err := wshutil.MakeClientJWTToken(rpcContext) + err = attachRpcContextToSwapToken(swapToken, bc.BlockId, conn.Opts.String(), sockName) if err != nil { - return nil, fmt.Errorf("error making jwt token: %w", err) + return nil, err } - swapToken.RpcContext = &rpcContext - swapToken.Env[wshutil.WaveJwtTokenVarName] = jwtStr shellProc, err = shellexec.StartRemoteShellProc(ctx, logCtx, rc.TermSize, cmdStr, cmdOpts, conn) if err != nil { conn.SetWshError(err) diff --git a/pkg/jobcontroller/jobcontroller.go b/pkg/jobcontroller/jobcontroller.go index e41d77585c..5e73202caa 100644 --- a/pkg/jobcontroller/jobcontroller.go +++ b/pkg/jobcontroller/jobcontroller.go @@ -30,7 +30,6 @@ import ( "github.com/wavetermdev/waveterm/pkg/wavebase" "github.com/wavetermdev/waveterm/pkg/wavejwt" "github.com/wavetermdev/waveterm/pkg/waveobj" - "github.com/wavetermdev/waveterm/pkg/wconfig" "github.com/wavetermdev/waveterm/pkg/wcore" "github.com/wavetermdev/waveterm/pkg/wps" "github.com/wavetermdev/waveterm/pkg/wshrpc" @@ -41,6 +40,7 @@ import ( ) const DefaultTimeout = 2 * time.Second +const DefaultRemoteIdleTimeoutSeconds = 604800 // 7 days const ( JobManagerStatus_Init = "init" @@ -71,6 +71,19 @@ const JobOutputFileName = "term" const AutoReconnectDelay = 1 * time.Second const AutoReconnectCooldown = 30 * time.Second +// ClearSessionDaemonJobFn is set by sessiondaemon to handle cleaning +// up daemon state when a remote job manager exits. The sessiondaemon +// package cannot be imported here (import cycle), so a callback is used. +var ClearSessionDaemonJobFn func(ctx context.Context, jobId string) + +// OnConnectionUpFn is set by sessiondaemon to handle session daemon +// state reconciliation when an SSH connection becomes ready. +var OnConnectionUpFn func(ctx context.Context, connName string) + +// GetSessionDaemonBlocksFn is set by sessiondaemon so daemon-backed job +// output can still be mirrored into each attached block's terminal file. +var GetSessionDaemonBlocksFn func(daemonId string) []string + type connState struct { actual bool processed bool @@ -231,7 +244,7 @@ func SendBlockJobStatusEvent(ctx context.Context, blockId string) { } func sendBlockJobStatusEventByJob(ctx context.Context, job *waveobj.Job) { - if job == nil || job.AttachedBlockId == "" { + if job == nil || job.AttachedBlockId == "" || strings.HasPrefix(job.AttachedBlockId, "daemon:") { return } SendBlockJobStatusEvent(ctx, job.AttachedBlockId) @@ -477,7 +490,10 @@ func handleBlockCloseEvent(event *wps.WaveEvent) { } for _, jobId := range jobIds { - TerminateAndDetachJob(ctx, jobId) + err := TerminateAndDetachJob(ctx, jobId) + if err != nil { + log.Printf("[job:%s] error in handleBlockCloseEvent: %v", jobId, err) + } } } @@ -512,6 +528,11 @@ func onConnectionUp(connName string) { } log.Printf("[conn:%s] finished reconnecting jobs: %d/%d successful", connName, successCount, len(jobsToReconnect)) + + // Reconcile session daemon state for this connection. + if OnConnectionUpFn != nil { + OnConnectionUpFn(ctx, connName) + } } func onConnectionDown(connName string) { @@ -694,16 +715,17 @@ func StartJob(ctx context.Context, params StartJobParams) (string, error) { publicKeyBase64 := base64.StdEncoding.EncodeToString(publicKey) jobEnv := envutil.CopyAndAddToEnvMap(params.Env, "WAVETERM_JOBID", jobId) startJobData := wshrpc.CommandRemoteStartJobData{ - Cmd: params.Cmd, - Args: params.Args, - Env: jobEnv, - TermSize: *params.TermSize, - StreamMeta: streamMeta, - JobAuthToken: jobAuthToken, - JobId: jobId, - MainServerJwtToken: jobAccessToken, - ClientId: clientId, - PublicKeyBase64: publicKeyBase64, + Cmd: params.Cmd, + Args: params.Args, + Env: jobEnv, + TermSize: *params.TermSize, + StreamMeta: streamMeta, + JobAuthToken: jobAuthToken, + JobId: jobId, + MainServerJwtToken: jobAccessToken, + ClientId: clientId, + PublicKeyBase64: publicKeyBase64, + RemoteIdleTimeoutSeconds: DefaultRemoteIdleTimeoutSeconds, } rpcOpts := &wshrpc.RpcOpts{ @@ -761,6 +783,17 @@ func StartJob(ctx context.Context, params StartJobParams) (string, error) { }, }) + routeId := wshutil.MakeJobRouteId(jobId) + waitCtx, cancelFn := context.WithTimeout(ctx, 5*time.Second) + err = wshutil.DefaultRouter.WaitForRegister(waitCtx, routeId) + cancelFn() + if err != nil { + log.Printf("[job:%s] warning: route not established after start: %v", jobId, err) + } else { + SetJobConnStatus(jobId, JobConnStatus_Connected) + log.Printf("[job:%s] route established, job connected", jobId) + } + go func() { defer func() { panichandler.PanicHandler("jobcontroller:runOutputLoop", recover()) @@ -796,18 +829,33 @@ func handleAppendJobFile(ctx context.Context, jobId string, fileName string, dat if err != nil { return fmt.Errorf("error appending to job file: %w", err) } - job, err := wstore.DBGet[*waveobj.Job](ctx, jobId) if err != nil { return fmt.Errorf("error getting job: %w", err) } - if job != nil && job.AttachedBlockId != "" { - err = doWFSAppend(ctx, waveobj.MakeORef(waveobj.OType_Block, job.AttachedBlockId), fileName, data) - if err != nil { - return fmt.Errorf("error appending to block file: %w", err) + if job == nil || job.AttachedBlockId == "" { + return nil + } + if strings.HasPrefix(job.AttachedBlockId, "daemon:") { + daemonId := strings.TrimPrefix(job.AttachedBlockId, "daemon:") + if GetSessionDaemonBlocksFn == nil { + return nil } + for _, blockId := range GetSessionDaemonBlocksFn(daemonId) { + if blockId == "" { + continue + } + err = doWFSAppend(ctx, waveobj.MakeORef(waveobj.OType_Block, blockId), fileName, data) + if err != nil { + return fmt.Errorf("error appending daemon job output to block file: %w", err) + } + } + return nil + } + err = doWFSAppend(ctx, waveobj.MakeORef(waveobj.OType_Block, job.AttachedBlockId), fileName, data) + if err != nil { + return fmt.Errorf("error appending to block file: %w", err) } - return nil } @@ -918,15 +966,17 @@ func tryTerminateJobManager(ctx context.Context, jobId string) { } } -func TerminateAndDetachJob(ctx context.Context, jobId string) { +func TerminateAndDetachJob(ctx context.Context, jobId string) error { err := TerminateJobManager(ctx, jobId) if err != nil { log.Printf("[job:%s] error terminating job manager: %v", jobId, err) + return fmt.Errorf("terminate job manager: %w", err) } err = DetachJobFromBlock(ctx, jobId, true) if err != nil { log.Printf("[job:%s] error detaching job from block: %v", jobId, err) } + return nil } func TerminateJobManager(ctx context.Context, jobId string) error { @@ -1099,11 +1149,12 @@ func doReconnectJob(ctx context.Context, jobId string, rtOpts *waveobj.RuntimeOp } reconnectData := wshrpc.CommandRemoteReconnectToJobManagerData{ - JobId: jobId, - JobAuthToken: job.JobAuthToken, - MainServerJwtToken: jobAccessToken, - JobManagerPid: job.JobManagerPid, - JobManagerStartTs: job.JobManagerStartTs, + JobId: jobId, + JobAuthToken: job.JobAuthToken, + MainServerJwtToken: jobAccessToken, + JobManagerPid: job.JobManagerPid, + JobManagerStartTs: job.JobManagerStartTs, + RemoteIdleTimeoutSeconds: DefaultRemoteIdleTimeoutSeconds, } rpcOpts := &wshrpc.RpcOpts{ @@ -1132,6 +1183,10 @@ func doReconnectJob(ctx context.Context, jobId string, rtOpts *waveobj.RuntimeOp } else { sendBlockJobStatusEventByJob(ctx, updatedJob) } + // Clear session daemon references to this job so daemons can be restarted + if ClearSessionDaemonJobFn != nil { + ClearSessionDaemonJobFn(ctx, jobId) + } telemetry.GoRecordTEventWrap(&telemetrydata.TEvent{ Event: "job:done", Props: telemetrydata.TEventProps{ @@ -1345,59 +1400,6 @@ func restartStreaming(ctx context.Context, jobId string, knownConnected bool, rt } // this function must be kept up to date with getBlockTermDurableAtom in frontend/app/store/global.ts -func IsBlockTermDurable(block *waveobj.Block) bool { - if block == nil { - return false - } - - // Check if view is "term", and controller is "shell" - if block.Meta.GetString(waveobj.MetaKey_View, "") != "term" || block.Meta.GetString(waveobj.MetaKey_Controller, "") != "shell" { - return false - } - - // 1. Check if block has a JobId - if block.JobId != "" { - return true - } - - // 2. Check if connection is local or WSL (not durable) - connName := block.Meta.GetString(waveobj.MetaKey_Connection, "") - if conncontroller.IsLocalConnName(connName) || conncontroller.IsWslConnName(connName) { - return false - } - - // 3. Check config hierarchy: blockmeta → connection → global (default true) - // Check block meta first - if val, exists := block.Meta[waveobj.MetaKey_TermDurable]; exists { - if boolVal, ok := val.(bool); ok { - return boolVal - } - } - // Check connection config - fullConfig := wconfig.GetWatcher().GetFullConfig() - if connName != "" { - if connConfig, exists := fullConfig.Connections[connName]; exists { - if connConfig.TermDurable != nil { - return *connConfig.TermDurable - } - } - } - // Check global settings - if fullConfig.Settings.TermDurable != nil { - return *fullConfig.Settings.TermDurable - } - // Default to true for non-local connections - return true -} - -func IsBlockIdTermDurable(blockId string) bool { - block, err := wstore.DBGet[*waveobj.Block](context.Background(), blockId) - if err != nil || block == nil { - return false - } - return IsBlockTermDurable(block) -} - func DeleteJob(ctx context.Context, jobId string) error { SetJobConnStatus(jobId, JobConnStatus_Disconnected) jobTerminationMessageWritten.Delete(jobId) @@ -1539,7 +1541,7 @@ func SendInput(ctx context.Context, data wshrpc.CommandJobInputData) error { } func resetTerminalState(logCtx context.Context, blockId string) { - if blockId == "" { + if blockId == "" || strings.HasPrefix(blockId, "daemon:") { return } ctx, cancelFn := context.WithTimeout(context.Background(), DefaultTimeout) @@ -1589,7 +1591,7 @@ func writeSessionSeparatorToTerminal(blockId string, termWidth int) { // msg should not have a terminating newline func writeMutedMessageToTerminal(blockId string, msg string) { - if blockId == "" { + if blockId == "" || strings.HasPrefix(blockId, "daemon:") { return } ctx, cancelFn := context.WithTimeout(context.Background(), DefaultTimeout) diff --git a/pkg/jobmanager/jobcmd.go b/pkg/jobmanager/jobcmd.go index 8adfabefc4..0c82a690d6 100644 --- a/pkg/jobmanager/jobcmd.go +++ b/pkg/jobmanager/jobcmd.go @@ -7,7 +7,9 @@ import ( "encoding/base64" "fmt" "log" + "os" "os/exec" + "strings" "sync" "syscall" "time" @@ -53,12 +55,7 @@ func MakeJobCmd(jobId string, cmdDef CmdDef) (*JobCmd, error) { return nil, fmt.Errorf("invalid term size: %v", cmdDef.TermSize) } ecmd := exec.Command(cmdDef.Cmd, cmdDef.Args...) - if len(cmdDef.Env) > 0 { - ecmd.Env = make([]string, 0, len(cmdDef.Env)) - for key, val := range cmdDef.Env { - ecmd.Env = append(ecmd.Env, fmt.Sprintf("%s=%s", key, val)) - } - } + ecmd.Env = mergeEnv(os.Environ(), cmdDef.Env) cmdPty, err := pty.StartWithSize(ecmd, &pty.Winsize{Rows: uint16(cmdDef.TermSize.Rows), Cols: uint16(cmdDef.TermSize.Cols)}) if err != nil { return nil, fmt.Errorf("failed to start command: %w", err) @@ -72,6 +69,25 @@ func MakeJobCmd(jobId string, cmdDef CmdDef) (*JobCmd, error) { return jm, nil } +func mergeEnv(baseEnv []string, overrides map[string]string) []string { + envMap := make(map[string]string, len(baseEnv)+len(overrides)) + for _, envVar := range baseEnv { + key, val, found := strings.Cut(envVar, "=") + if !found { + continue + } + envMap[key] = val + } + for key, val := range overrides { + envMap[key] = val + } + rtn := make([]string, 0, len(envMap)) + for key, val := range envMap { + rtn = append(rtn, fmt.Sprintf("%s=%s", key, val)) + } + return rtn +} + func (jm *JobCmd) waitForProcess() { if jm.cmd == nil || jm.cmd.Process == nil { return diff --git a/pkg/remote/conncontroller/conncontroller.go b/pkg/remote/conncontroller/conncontroller.go index a24a789009..66f45f7e2c 100644 --- a/pkg/remote/conncontroller/conncontroller.go +++ b/pkg/remote/conncontroller/conncontroller.go @@ -13,6 +13,7 @@ import ( "net" "os" "path/filepath" + "strconv" "strings" "sync" "sync/atomic" @@ -1259,3 +1260,51 @@ func GetConnectionsFromConfig() ([]string, error) { return resolveSshConfigPatterns(sshConfigFiles) } + +// runSSHCommand executes a command over the SSH connection and returns stdout. +func runSSHCommand(ctx context.Context, client *ssh.Client, cmd string) (string, error) { + session, err := client.NewSession() + if err != nil { + return "", err + } + defer session.Close() + var outBuf strings.Builder + session.Stdout = &outBuf + session.Stderr = &outBuf + err = runSessionWithContext(ctx, session, cmd) + return strings.TrimSpace(outBuf.String()), err +} + +// CheckRemoteProcessAlive verifies whether a process with the given PID +// is still running on the remote host. It tries Unix (ps) first, then +// falls back to Windows (tasklist). Returns true if the process exists. +func CheckRemoteProcessAlive(ctx context.Context, connName string, pid int) (bool, error) { + opts, err := remote.ParseOpts(connName) + if err != nil { + return false, err + } + conn := MaybeGetConn(opts) + if conn == nil { + return false, fmt.Errorf("connection %q not found", connName) + } + client := conn.GetClient() + if client == nil { + return false, fmt.Errorf("connection %q not connected", connName) + } + + pidStr := strconv.Itoa(pid) + + // Unix: "ps -p -o pid=" returns the PID if process exists, empty otherwise. + out, _ := runSSHCommand(ctx, client, fmt.Sprintf("ps -p %s -o pid= 2>/dev/null", pidStr)) + if strings.TrimSpace(out) == pidStr { + return true, nil + } + + // Windows: "tasklist /FI ..." lists matching processes. + out, _ = runSSHCommand(ctx, client, fmt.Sprintf("tasklist /FI \"PID eq %s\" /NH 2>nul", pidStr)) + if strings.Contains(out, pidStr) { + return true, nil + } + + return false, nil +} diff --git a/pkg/service/blockservice/blockservice.go b/pkg/service/blockservice/blockservice.go index d2e6ca39da..1c1aeb204d 100644 --- a/pkg/service/blockservice/blockservice.go +++ b/pkg/service/blockservice/blockservice.go @@ -5,6 +5,7 @@ package blockservice import ( "context" + "errors" "fmt" "time" @@ -37,13 +38,13 @@ func (bs *BlockService) GetControllerStatus(ctx context.Context, blockId string) func (*BlockService) SaveTerminalState_Meta() tsgenmeta.MethodMeta { return tsgenmeta.MethodMeta{ - Desc: "save the terminal state to a blockfile", - ArgNames: []string{"ctx", "blockId", "state", "stateType", "ptyOffset", "termSize"}, + Desc: "save the terminal state to a zone file", + ArgNames: []string{"ctx", "zoneId", "state", "stateType", "ptyOffset", "termSize"}, } } -func (bs *BlockService) SaveTerminalState(ctx context.Context, blockId string, state string, stateType string, ptyOffset int64, termSize waveobj.TermSize) error { - _, err := wstore.DBMustGet[*waveobj.Block](ctx, blockId) +func (bs *BlockService) SaveTerminalState(ctx context.Context, zoneId string, state string, stateType string, ptyOffset int64, termSize waveobj.TermSize) error { + err := ensureTerminalStateZoneExists(ctx, zoneId) if err != nil { return err } @@ -51,8 +52,8 @@ func (bs *BlockService) SaveTerminalState(ctx context.Context, blockId string, s return fmt.Errorf("invalid state type: %q", stateType) } // ignore MakeFile error (already exists is ok) - filestore.WFS.MakeFile(ctx, blockId, "cache:term:"+stateType, nil, wshrpc.FileOpts{}) - err = filestore.WFS.WriteFile(ctx, blockId, "cache:term:"+stateType, []byte(state)) + filestore.WFS.MakeFile(ctx, zoneId, "cache:term:"+stateType, nil, wshrpc.FileOpts{}) + err = filestore.WFS.WriteFile(ctx, zoneId, "cache:term:"+stateType, []byte(state)) if err != nil { return fmt.Errorf("cannot save terminal state: %w", err) } @@ -60,13 +61,31 @@ func (bs *BlockService) SaveTerminalState(ctx context.Context, blockId string, s "ptyoffset": ptyOffset, "termsize": termSize, } - err = filestore.WFS.WriteMeta(ctx, blockId, "cache:term:"+stateType, fileMeta, true) + err = filestore.WFS.WriteMeta(ctx, zoneId, "cache:term:"+stateType, fileMeta, true) if err != nil { return fmt.Errorf("cannot save terminal state meta: %w", err) } return nil } +func ensureTerminalStateZoneExists(ctx context.Context, zoneId string) error { + _, blockErr := wstore.DBMustGet[*waveobj.Block](ctx, zoneId) + if blockErr == nil { + return nil + } + if !errors.Is(blockErr, wstore.ErrNotFound) { + return blockErr + } + _, jobErr := wstore.DBMustGet[*waveobj.Job](ctx, zoneId) + if jobErr == nil { + return nil + } + if !errors.Is(jobErr, wstore.ErrNotFound) { + return jobErr + } + return fmt.Errorf("terminal state zone %q not found", zoneId) +} + func (*BlockService) CleanupOrphanedBlocks_Meta() tsgenmeta.MethodMeta { return tsgenmeta.MethodMeta{ Desc: "queue a layout action to cleanup orphaned blocks in the tab", diff --git a/pkg/sessiondaemon/reaper.go b/pkg/sessiondaemon/reaper.go new file mode 100644 index 0000000000..06b85c1989 --- /dev/null +++ b/pkg/sessiondaemon/reaper.go @@ -0,0 +1,179 @@ +package sessiondaemon + +import ( + "context" + "log" + "time" + + "github.com/google/uuid" + "github.com/wavetermdev/waveterm/pkg/waveobj" + "github.com/wavetermdev/waveterm/pkg/wstore" +) + +func (sd *SessionDaemonManager) StartIdleReaper(ctx context.Context) { + go func() { + ticker := time.NewTicker(IdleCheckInterval * time.Second) + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + sd.reapIdleDaemons(ctx) + sd.verifyConsistency(ctx) + } + } + }() +} + +func (sd *SessionDaemonManager) cleanupDeadBlocks(ctx context.Context, daemonId string, memDaemon *SessionDaemon) { + memDaemon.Lock.Lock() + blockIds := make([]string, 0, len(memDaemon.Blocks)) + for blockId := range memDaemon.Blocks { + blockIds = append(blockIds, blockId) + } + memDaemon.Lock.Unlock() + + var deadBlocks []string + for _, blockId := range blockIds { + _, err := wstore.DBMustGet[*waveobj.Block](ctx, blockId) + if err != nil { + deadBlocks = append(deadBlocks, blockId) + } + } + + if len(deadBlocks) == 0 { + return + } + + log.Printf("[sessiondaemon] cleanupDeadBlocks: daemon=%s removing %d dead blocks: %v", daemonId, len(deadBlocks), deadBlocks) + + memDaemon.Lock.Lock() + for _, blockId := range deadBlocks { + delete(memDaemon.Blocks, blockId) + } + remaining := len(memDaemon.Blocks) + memDaemon.Lock.Unlock() + + if remaining == 0 { + sd.startIdleCountdown(ctx, daemonId) + } +} + +func (sd *SessionDaemonManager) reapIdleDaemons(ctx context.Context) { + allDaemons, err := wstore.DBGetAllObjsByType[*waveobj.SessionDaemon](ctx, waveobj.OType_SessionDaemon) + if err != nil { + return + } + + for _, dbDaemon := range allDaemons { + sd.Lock.Lock() + memDaemon, hasMem := sd.Daemons[dbDaemon.OID] + sd.Lock.Unlock() + + switch dbDaemon.Status { + case Status_Running: + sd.reapRunning(ctx, dbDaemon, memDaemon, hasMem) + case Status_Done: + sd.reapDone(ctx, dbDaemon, memDaemon, hasMem) + } + } +} + +func (sd *SessionDaemonManager) reapRunning(ctx context.Context, dbDaemon *waveobj.SessionDaemon, memDaemon *SessionDaemon, hasMem bool) { + if hasMem && memDaemon.HasAttachedBlocks() { + sd.cleanupDeadBlocks(ctx, dbDaemon.OID, memDaemon) + if memDaemon.HasAttachedBlocks() { + return + } + } + + if dbDaemon.IdleTimeout <= 0 { + return + } + + remaining := sd.advanceIdleTimer(ctx, dbDaemon.OID) + if remaining > 0 { + return + } + + log.Printf("[sessiondaemon:%s] idle timeout reached, terminating", dbDaemon.OID) + if hasMem { + err := memDaemon.Stop(ctx) + if err != nil { + log.Printf("[sessiondaemon:%s] error stopping daemon, will retry next cycle: %v", dbDaemon.OID, err) + return + } + sd.Remove(dbDaemon.OID) + } + if err := wstore.DBDelete(ctx, waveobj.OType_SessionDaemon, dbDaemon.OID); err != nil { + log.Printf("[sessiondaemon:%s] reapRunning: error deleting from DB: %v", dbDaemon.OID, err) + } +} + +func (sd *SessionDaemonManager) reapDone(ctx context.Context, dbDaemon *waveobj.SessionDaemon, memDaemon *SessionDaemon, hasMem bool) { + if hasMem && memDaemon.HasAttachedBlocks() { + return + } + + if dbDaemon.IdleTimeout <= 0 { + return + } + + if dbDaemon.IdleSince <= 0 { + if err := wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbD *waveobj.SessionDaemon) { + dbD.IdleSince = DoneReapTimeout + }); err != nil { + log.Printf("[sessiondaemon:%s] reapDone: error setting done reap timeout: %v", dbDaemon.OID, err) + } + return + } + + remaining := sd.advanceIdleTimer(ctx, dbDaemon.OID) + if remaining > 0 { + return + } + + log.Printf("[sessiondaemon:%s] done daemon reaped", dbDaemon.OID) + if hasMem { + sd.Remove(dbDaemon.OID) + } + if err := wstore.DBDelete(ctx, waveobj.OType_SessionDaemon, dbDaemon.OID); err != nil { + log.Printf("[sessiondaemon:%s] reapDone: error deleting from DB: %v", dbDaemon.OID, err) + } +} + +func (sd *SessionDaemonManager) verifyConsistency(ctx context.Context) { + daemons, err := wstore.DBGetAllObjsByType[*waveobj.SessionDaemon](ctx, waveobj.OType_SessionDaemon) + if err != nil { + return + } + + dbIds := make(map[string]bool) + for _, dbDaemon := range daemons { + dbIds[dbDaemon.OID] = true + } + + sd.Lock.Lock() + defer sd.Lock.Unlock() + + for id := range sd.Daemons { + if !dbIds[id] { + log.Printf("[sessiondaemon] consistency: daemon %s in memory but not in DB, removing from memory", id) + delete(sd.Daemons, id) + } + } + + for _, dbDaemon := range daemons { + if _, exists := sd.Daemons[dbDaemon.OID]; !exists { + log.Printf("[sessiondaemon] consistency: daemon %s in DB but not in memory, loading", dbDaemon.OID) + sd.Daemons[dbDaemon.OID] = &SessionDaemon{ + DaemonId: dbDaemon.OID, + Name: dbDaemon.Name, + JobId: dbDaemon.JobId, + InputSessionId: uuid.New().String(), + Blocks: make(map[string]bool), + } + } + } +} diff --git a/pkg/sessiondaemon/sessiondaemon.go b/pkg/sessiondaemon/sessiondaemon.go new file mode 100644 index 0000000000..a2e3bea59e --- /dev/null +++ b/pkg/sessiondaemon/sessiondaemon.go @@ -0,0 +1,604 @@ +package sessiondaemon + +import ( + "context" + "encoding/base64" + "fmt" + "log" + "sync" + "time" + + "github.com/google/uuid" + "github.com/wavetermdev/waveterm/pkg/jobcontroller" + "github.com/wavetermdev/waveterm/pkg/remote/conncontroller" + "github.com/wavetermdev/waveterm/pkg/waveobj" + "github.com/wavetermdev/waveterm/pkg/wshrpc" + "github.com/wavetermdev/waveterm/pkg/wstore" +) + +const ( + DefaultAnonymousIdleTimeout = 600 // 10min + DefaultNamedIdleTimeout = 86400 // 24h + IdleCheckInterval = 60 // 60s + DoneReapTimeout = 300 // 5min for done daemons with no blocks +) + +const ( + Status_Init = "init" + Status_Running = "running" + Status_Disconnected = "disconnected" + Status_Done = "done" +) + +const ( + JobManagerState_Alive = "alive" + JobManagerState_Dead = "dead" + JobManagerState_Unknown = "unknown" +) + +const ( + DaemonEnsure_Ready = "ready" + DaemonEnsure_Wait = "wait" + DaemonEnsure_Fallback = "fallback" + DaemonEnsure_Start = "start" +) + +type SessionDaemon struct { + Lock sync.Mutex + + DaemonId string + Name string + JobId string + InputSessionId string + SeqNum int + Blocks map[string]bool +} + +type SessionDaemonManager struct { + Lock sync.Mutex + Daemons map[string]*SessionDaemon +} + +type EnsureResult struct { + Action string + JobId string +} + +var Manager = &SessionDaemonManager{ + Daemons: make(map[string]*SessionDaemon), +} + +var OnDaemonJobDoneFn func(ctx context.Context, daemonId string) + +func init() { + jobcontroller.ClearSessionDaemonJobFn = func(ctx context.Context, jobId string) { + Manager.ClearJobIdFromDaemons(ctx, jobId) + } + jobcontroller.OnConnectionUpFn = func(ctx context.Context, connName string) { + Manager.OnConnectionUp(ctx, connName) + } + jobcontroller.GetSessionDaemonBlocksFn = func(daemonId string) []string { + return Manager.GetBlocksForDaemon(daemonId) + } +} + +func (sd *SessionDaemon) GetNextInputSeq() (string, int) { + sd.Lock.Lock() + defer sd.Lock.Unlock() + sd.SeqNum++ + return sd.InputSessionId, sd.SeqNum +} + +func (sd *SessionDaemon) HasAttachedBlocks() bool { + sd.Lock.Lock() + defer sd.Lock.Unlock() + return len(sd.Blocks) > 0 +} + +func (sd *SessionDaemon) HasBlock(blockId string) bool { + sd.Lock.Lock() + defer sd.Lock.Unlock() + return sd.Blocks[blockId] +} + +func (sd *SessionDaemon) Reconnect(ctx context.Context, dbDaemon *waveobj.SessionDaemon, rtOpts *waveobj.RuntimeOpts) error { + if dbDaemon.JobId == "" { + return fmt.Errorf("no jobid to reconnect") + } + sd.Lock.Lock() + sd.JobId = dbDaemon.JobId + sd.Lock.Unlock() + + err := jobcontroller.ReconnectJob(ctx, dbDaemon.JobId, rtOpts) + if err != nil { + var jobGone bool + dbErr := wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbSd *waveobj.SessionDaemon) { + if dbSd.JobId == "" { + dbSd.Status = Status_Done + jobGone = true + } else { + dbSd.Status = Status_Disconnected + } + }) + if dbErr != nil { + log.Printf("[sessiondaemon:%s] reconnect: error updating status: %v (memory may be stale)", sd.DaemonId, dbErr) + // If the DB write failed, jobGone is unreliable — do NOT clear memory JobId. + return fmt.Errorf("reconnect failed: %w", err) + } + if jobGone { + sd.Lock.Lock() + sd.JobId = "" + sd.Lock.Unlock() + log.Printf("[sessiondaemon:%s] reconnect: job manager gone, status -> done", sd.DaemonId) + return fmt.Errorf("job manager has exited") + } + log.Printf("[sessiondaemon:%s] reconnect: failed, status -> disconnected: %v", sd.DaemonId, err) + return fmt.Errorf("reconnect failed: %w", err) + } + + if err := wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbSd *waveobj.SessionDaemon) { + dbSd.Status = Status_Running + }); err != nil { + log.Printf("[sessiondaemon:%s] reconnect: error updating status to running: %v", sd.DaemonId, err) + } + log.Printf("[sessiondaemon:%s] reconnect: success, status -> running", sd.DaemonId) + return nil +} + +func (sd *SessionDaemon) Stop(ctx context.Context) error { + sd.Lock.Lock() + jobId := sd.JobId + sd.Lock.Unlock() + log.Printf("[sessiondaemon] stop daemon=%s job=%s", sd.DaemonId, jobId) + if jobId != "" { + err := jobcontroller.TerminateAndDetachJob(ctx, jobId) + if err != nil { + log.Printf("[sessiondaemon:%s] error terminating remote job %s: %v", sd.DaemonId, jobId, err) + return fmt.Errorf("failed to terminate remote job: %w", err) + } + } + return nil +} + +func (sd *SessionDaemon) SendInput(ctx context.Context, inputData []byte, sigName string, termSize *waveobj.TermSize) error { + sd.Lock.Lock() + jobId := sd.JobId + if jobId == "" { + sd.Lock.Unlock() + return fmt.Errorf("no job attached") + } + sd.SeqNum++ + inputSessionId, seqNum := sd.InputSessionId, sd.SeqNum + sd.Lock.Unlock() + + data := wshrpc.CommandJobInputData{ + JobId: jobId, + InputSessionId: inputSessionId, + SeqNum: seqNum, + TermSize: termSize, + SigName: sigName, + } + if len(inputData) > 0 { + data.InputData64 = base64.StdEncoding.EncodeToString(inputData) + } + return jobcontroller.SendInput(ctx, data) +} + +func (sd *SessionDaemonManager) GetOrCreate(ctx context.Context, dbDaemon *waveobj.SessionDaemon) (*SessionDaemon, error) { + sd.Lock.Lock() + defer sd.Lock.Unlock() + + if existing, ok := sd.Daemons[dbDaemon.OID]; ok { + existing.Lock.Lock() + if existing.JobId == "" { + existing.JobId = dbDaemon.JobId + } + existing.Lock.Unlock() + return existing, nil + } + + daemon := &SessionDaemon{ + DaemonId: dbDaemon.OID, + Name: dbDaemon.Name, + JobId: dbDaemon.JobId, + InputSessionId: uuid.New().String(), + Blocks: make(map[string]bool), + } + sd.Daemons[dbDaemon.OID] = daemon + return daemon, nil +} + +func (sd *SessionDaemonManager) Get(daemonId string) *SessionDaemon { + sd.Lock.Lock() + defer sd.Lock.Unlock() + return sd.Daemons[daemonId] +} + +func (sd *SessionDaemonManager) Remove(daemonId string) { + sd.Lock.Lock() + defer sd.Lock.Unlock() + delete(sd.Daemons, daemonId) +} + +func (sd *SessionDaemonManager) SetJobRunning(ctx context.Context, daemonId string, jobId string) error { + daemon := sd.Get(daemonId) + var oldJobId string + if daemon != nil { + daemon.Lock.Lock() + oldJobId = daemon.JobId + daemon.JobId = jobId + daemon.Lock.Unlock() + } + + err := wstore.DBUpdateFn(ctx, daemonId, func(sdDb *waveobj.SessionDaemon) { + sdDb.JobId = jobId + sdDb.Status = Status_Running + }) + if err != nil { + if daemon != nil { + daemon.Lock.Lock() + daemon.JobId = oldJobId + daemon.Lock.Unlock() + } + log.Printf("[sessiondaemon:%s] SetJobRunning: DB update failed: %v", daemonId, err) + return err + } + return nil +} + +func (sd *SessionDaemonManager) clearJobDone(ctx context.Context, daemonId string) error { + daemon := sd.Get(daemonId) + var oldJobId string + if daemon != nil { + daemon.Lock.Lock() + oldJobId = daemon.JobId + daemon.JobId = "" + daemon.Lock.Unlock() + } + + if err := wstore.DBUpdateFn(ctx, daemonId, func(dbSd *waveobj.SessionDaemon) { + dbSd.JobId = "" + dbSd.Status = Status_Done + }); err != nil { + if daemon != nil { + daemon.Lock.Lock() + daemon.JobId = oldJobId + daemon.Lock.Unlock() + } + return err + } + return nil +} + +func (sd *SessionDaemonManager) AttachBlock(ctx context.Context, daemonId string, blockId string) { + sd.Lock.Lock() + daemon, ok := sd.Daemons[daemonId] + if !ok { + sd.Lock.Unlock() + return + } + daemon.Lock.Lock() + sd.Lock.Unlock() + defer daemon.Lock.Unlock() + daemon.Blocks[blockId] = true + sd.resetIdleTimer(ctx, daemonId) +} + +func (sd *SessionDaemonManager) DetachBlock(ctx context.Context, daemonId string, blockId string) { + sd.Lock.Lock() + daemon, ok := sd.Daemons[daemonId] + if !ok { + sd.Lock.Unlock() + return + } + daemon.Lock.Lock() + sd.Lock.Unlock() + defer daemon.Lock.Unlock() + delete(daemon.Blocks, blockId) + if len(daemon.Blocks) == 0 { + sd.startIdleCountdown(ctx, daemonId) + } +} + +// --- idle timer helpers --- +// These centralize IdleSince management so there is a single place +// to understand the countdown mechanics. + +func (sd *SessionDaemonManager) resetIdleTimer(ctx context.Context, daemonId string) { + err := wstore.DBUpdateFn(ctx, daemonId, func(dbD *waveobj.SessionDaemon) { + dbD.IdleSince = 0 + }) + if err != nil { + log.Printf("[sessiondaemon:%s] error resetting idle timer: %v", daemonId, err) + } +} + +func (sd *SessionDaemonManager) startIdleCountdown(ctx context.Context, daemonId string) { + err := wstore.DBUpdateFn(ctx, daemonId, func(dbD *waveobj.SessionDaemon) { + if dbD.Status == Status_Done { + dbD.IdleSince = DoneReapTimeout + return + } + dbD.IdleSince = dbD.IdleTimeout + }) + if err != nil { + log.Printf("[sessiondaemon:%s] error starting idle countdown: %v", daemonId, err) + } +} + +// advanceIdleTimer decrements IdleSince and returns the new value. +// A return value <= 0 means the timer has expired. Returns 0 on error. +func (sd *SessionDaemonManager) advanceIdleTimer(ctx context.Context, daemonId string) int64 { + var remaining int64 + err := wstore.DBUpdateFn(ctx, daemonId, func(dbD *waveobj.SessionDaemon) { + dbD.IdleSince -= IdleCheckInterval + remaining = dbD.IdleSince + }) + if err != nil { + log.Printf("[sessiondaemon:%s] error advancing idle timer: %v", daemonId, err) + return 0 + } + return remaining +} + +func (sd *SessionDaemonManager) GetBlocksForDaemon(daemonId string) []string { + sd.Lock.Lock() + daemon, ok := sd.Daemons[daemonId] + if !ok { + sd.Lock.Unlock() + return nil + } + daemon.Lock.Lock() + sd.Lock.Unlock() + defer daemon.Lock.Unlock() + var rtn []string + for blockId := range daemon.Blocks { + rtn = append(rtn, blockId) + } + return rtn +} + +func (sd *SessionDaemonManager) SendInput(daemonId string, inputData []byte, sigName string, termSize *waveobj.TermSize) error { + ctx := context.Background() + sd.Lock.Lock() + daemon, ok := sd.Daemons[daemonId] + sd.Lock.Unlock() + if !ok { + return fmt.Errorf("daemon %s not found", daemonId) + } + return daemon.SendInput(ctx, inputData, sigName, termSize) +} + +func (sd *SessionDaemonManager) MarkDone(ctx context.Context, daemonId string) error { + if err := sd.clearJobDone(ctx, daemonId); err != nil { + log.Printf("[sessiondaemon:%s] MarkDone: DB update failed: %v", daemonId, err) + return err + } + log.Printf("[sessiondaemon:%s] MarkDone: job cleared, status=done", daemonId) + return nil +} + +func ClassifyJobManagerState(ctx context.Context, dbDaemon *waveobj.SessionDaemon) (string, error) { + if dbDaemon == nil || dbDaemon.JobId == "" { + return JobManagerState_Dead, nil + } + job, err := wstore.DBGet[*waveobj.Job](ctx, dbDaemon.JobId) + if err != nil { + return JobManagerState_Unknown, fmt.Errorf("get job %s: %w", dbDaemon.JobId, err) + } + if job == nil || job.JobManagerStatus == jobcontroller.JobManagerStatus_Done { + return JobManagerState_Dead, nil + } + if job.JobManagerPid == 0 { + return JobManagerState_Unknown, nil + } + connected, err := conncontroller.IsConnected(dbDaemon.Connection) + if err != nil { + return JobManagerState_Unknown, err + } + if !connected { + return JobManagerState_Unknown, nil + } + alive, err := conncontroller.CheckRemoteProcessAlive(ctx, dbDaemon.Connection, job.JobManagerPid) + if err != nil { + return JobManagerState_Unknown, nil + } + if alive { + return JobManagerState_Alive, nil + } + return JobManagerState_Dead, nil +} + +func (sd *SessionDaemonManager) EnsureJobState(ctx context.Context, daemonId string, rtOpts *waveobj.RuntimeOpts, reconnect bool) (*EnsureResult, error) { + dbDaemon, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, daemonId) + if err != nil { + return nil, fmt.Errorf("get session daemon: %w", err) + } + + memDaemon, err := sd.GetOrCreate(ctx, dbDaemon) + if err != nil { + return nil, fmt.Errorf("create session daemon in manager: %w", err) + } + + if dbDaemon.Status == Status_Done { + return &EnsureResult{Action: DaemonEnsure_Fallback}, nil + } + + if dbDaemon.JobId == "" { + if dbDaemon.Status == Status_Disconnected { + return &EnsureResult{Action: DaemonEnsure_Wait}, nil + } + return &EnsureResult{Action: DaemonEnsure_Start}, nil + } + + jobState, err := ClassifyJobManagerState(ctx, dbDaemon) + if err != nil { + return nil, fmt.Errorf("check session daemon job manager: %w", err) + } + switch jobState { + case JobManagerState_Dead: + if err := sd.MarkDone(ctx, daemonId); err != nil { + return nil, err + } + return &EnsureResult{Action: DaemonEnsure_Fallback}, nil + case JobManagerState_Unknown: + return &EnsureResult{Action: DaemonEnsure_Wait}, nil + } + + if !reconnect { + return &EnsureResult{Action: DaemonEnsure_Ready, JobId: dbDaemon.JobId}, nil + } + + err = memDaemon.Reconnect(ctx, dbDaemon, rtOpts) + if err != nil { + dbDaemon2, dbErr := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, daemonId) + if dbErr == nil && dbDaemon2.Status == Status_Done { + return &EnsureResult{Action: DaemonEnsure_Fallback}, nil + } + return nil, err + } + return &EnsureResult{Action: DaemonEnsure_Ready, JobId: dbDaemon.JobId}, nil +} + +// GetMemJobId returns the in-memory JobId for a daemon, used as a +// fallback when the DB read returns stale data (e.g., SessionInfoCommand +// called before a SetJobId transaction is visible). +func (sd *SessionDaemonManager) GetMemJobId(daemonId string) string { + sd.Lock.Lock() + daemon, ok := sd.Daemons[daemonId] + sd.Lock.Unlock() + if !ok { + return "" + } + daemon.Lock.Lock() + defer daemon.Lock.Unlock() + return daemon.JobId +} + +// Rename updates the daemon's name and marks it as non-anonymous, +// both in memory and in the database. +func (sd *SessionDaemonManager) Rename(ctx context.Context, daemonId string, name string) error { + sd.Lock.Lock() + daemon, ok := sd.Daemons[daemonId] + sd.Lock.Unlock() + if ok { + daemon.Lock.Lock() + daemon.Name = name + daemon.Lock.Unlock() + } + err := wstore.DBUpdateFn(ctx, daemonId, func(sdDb *waveobj.SessionDaemon) { + sdDb.Name = name + sdDb.IsAnonymous = false + }) + if err != nil { + return fmt.Errorf("update session daemon: %w", err) + } + return nil +} + +// RecordActivity updates the daemon's LastActiveAt timestamp in the database. +func (sd *SessionDaemonManager) RecordActivity(ctx context.Context, daemonId string) error { + err := wstore.DBUpdateFn(ctx, daemonId, func(sdDb *waveobj.SessionDaemon) { + sdDb.LastActiveAt = time.Now().UnixMilli() + }) + if err != nil { + return fmt.Errorf("record session activity: %w", err) + } + return nil +} + +func (sd *SessionDaemonManager) ClearJobIdFromDaemons(ctx context.Context, jobId string) { + sd.Lock.Lock() + var daemonIds []string + for _, daemon := range sd.Daemons { + daemon.Lock.Lock() + if daemon.JobId == jobId { + daemonIds = append(daemonIds, daemon.DaemonId) + daemon.Lock.Unlock() + continue + } + daemon.Lock.Unlock() + } + sd.Lock.Unlock() + + for _, daemonId := range daemonIds { + if err := sd.clearJobDone(ctx, daemonId); err != nil { + log.Printf("[sessiondaemon:%s] ClearJobIdFromDaemons: DB update failed: %v", daemonId, err) + continue + } + log.Printf("[sessiondaemon:%s] ClearJobIdFromDaemons: job=%s cleared, status=done", daemonId, jobId) + if OnDaemonJobDoneFn != nil { + OnDaemonJobDoneFn(ctx, daemonId) + } + } +} + +func (sd *SessionDaemonManager) InitFromDB(ctx context.Context) error { + daemons, err := wstore.DBGetAllObjsByType[*waveobj.SessionDaemon](ctx, waveobj.OType_SessionDaemon) + if err != nil { + return fmt.Errorf("load session daemons: %w", err) + } + + for _, dbDaemon := range daemons { + _, err := sd.GetOrCreate(ctx, dbDaemon) + if err != nil { + log.Printf("[sessiondaemon] warning: failed to load daemon %s: %v", dbDaemon.OID, err) + continue + } + + switch dbDaemon.Status { + case Status_Running, Status_Disconnected: + // Do NOT call Reconnect here — connections may not be established yet. + // Reconnection is deferred to SessionDaemonController.Start() when a + // block referencing this daemon is resynced and the connection is ready. + log.Printf("[sessiondaemon:%s] loaded daemon status=%s job=%s (reconnect deferred)", dbDaemon.OID, dbDaemon.Status, dbDaemon.JobId) + case Status_Done: + log.Printf("[sessiondaemon:%s] loaded done daemon", dbDaemon.OID) + case Status_Init: + log.Printf("[sessiondaemon:%s] loaded init daemon", dbDaemon.OID) + default: + log.Printf("[sessiondaemon:%s] unknown status %q, treating as init", dbDaemon.OID, dbDaemon.Status) + if err := wstore.DBUpdateFn(ctx, dbDaemon.OID, func(dbSd *waveobj.SessionDaemon) { + dbSd.Status = Status_Init + }); err != nil { + log.Printf("[sessiondaemon:%s] error fixing unknown status: %v", dbDaemon.OID, err) + } + } + } + + log.Printf("[sessiondaemon] InitFromDB complete: %d daemons loaded", len(sd.Daemons)) + return nil +} + +// OnConnectionUp is called when an SSH connection becomes ready. +// It checks all daemons on that connection: reconnects live jobs and +// cleans up daemons whose remote job manager has died. +func (sd *SessionDaemonManager) OnConnectionUp(ctx context.Context, connName string) { + daemons, err := wstore.DBGetAllObjsByType[*waveobj.SessionDaemon](ctx, waveobj.OType_SessionDaemon) + if err != nil { + return + } + for _, dbDaemon := range daemons { + if dbDaemon.Connection != connName { + continue + } + if dbDaemon.JobId == "" { + continue + } + + ensureResult, err := sd.EnsureJobState(ctx, dbDaemon.OID, nil, true) + if err != nil { + log.Printf("[sessiondaemon:%s] OnConnectionUp: error checking job manager state: %v", dbDaemon.OID, err) + continue + } + switch ensureResult.Action { + case DaemonEnsure_Fallback: + log.Printf("[sessiondaemon:%s] OnConnectionUp: remote job manager dead, falling back", dbDaemon.OID) + if OnDaemonJobDoneFn != nil { + OnDaemonJobDoneFn(ctx, dbDaemon.OID) + } + case DaemonEnsure_Wait: + log.Printf("[sessiondaemon:%s] OnConnectionUp: job manager state unknown, waiting", dbDaemon.OID) + } + } +} diff --git a/pkg/waveobj/metaconsts.go b/pkg/waveobj/metaconsts.go index 0ce08099d8..be1bd4f077 100644 --- a/pkg/waveobj/metaconsts.go +++ b/pkg/waveobj/metaconsts.go @@ -62,6 +62,8 @@ const ( MetaKey_CmdInitScriptPwsh = "cmd:initscript.pwsh" MetaKey_CmdInitScriptFish = "cmd:initscript.fish" + MetaKey_SessionDaemonId = "session:daemonid" + MetaKey_AiClear = "ai:*" MetaKey_AiPresetKey = "ai:preset" MetaKey_AiApiType = "ai:apitype" diff --git a/pkg/waveobj/wtype.go b/pkg/waveobj/wtype.go index 0ac9e92eb1..01323fa143 100644 --- a/pkg/waveobj/wtype.go +++ b/pkg/waveobj/wtype.go @@ -31,7 +31,8 @@ const ( OType_MainServer = "mainserver" OType_Job = "job" OType_Temp = "temp" - OType_Builder = "builder" // not persisted to DB + OType_Builder = "builder" // not persisted to DB + OType_SessionDaemon = "sessiondaemon" ) var ValidOTypes = map[string]bool{ @@ -45,6 +46,7 @@ var ValidOTypes = map[string]bool{ OType_Job: true, OType_Temp: true, OType_Builder: true, + OType_SessionDaemon: true, } type WaveObjUpdate struct { @@ -354,6 +356,27 @@ func (*Job) GetOType() string { return OType_Job } +type SessionDaemon struct { + OID string `json:"oid"` + Version int `json:"version"` + + Name string `json:"name,omitempty"` + Connection string `json:"connection,omitempty"` + JobId string `json:"jobid,omitempty"` + IsAnonymous bool `json:"isanonymous,omitempty"` + Status string `json:"status,omitempty"` + Cwd string `json:"cwd,omitempty"` + CreatedAt int64 `json:"createdat,omitempty"` + IdleTimeout int64 `json:"idletimeout,omitempty"` + IdleSince int64 `json:"idlesince,omitempty"` // ms timestamp when last block detached (0 = has attached blocks) + LastActiveAt int64 `json:"lastactiveat,omitempty"` // ms timestamp when a block last gained focus in this session + Meta MetaMapType `json:"meta"` +} + +func (*SessionDaemon) GetOType() string { + return OType_SessionDaemon +} + func AllWaveObjTypes() []reflect.Type { return []reflect.Type{ reflect.TypeOf(&Client{}), @@ -364,6 +387,7 @@ func AllWaveObjTypes() []reflect.Type { reflect.TypeOf(&LayoutState{}), reflect.TypeOf(&MainServer{}), reflect.TypeOf(&Job{}), + reflect.TypeOf(&SessionDaemon{}), } } diff --git a/pkg/waveobj/wtypemeta.go b/pkg/waveobj/wtypemeta.go index 2280b55d2d..be2283d082 100644 --- a/pkg/waveobj/wtypemeta.go +++ b/pkg/waveobj/wtypemeta.go @@ -61,7 +61,9 @@ type MetaTSType struct { CmdInitScriptBash string `json:"cmd:initscript.bash,omitempty"` CmdInitScriptZsh string `json:"cmd:initscript.zsh,omitempty"` CmdInitScriptPwsh string `json:"cmd:initscript.pwsh,omitempty"` - CmdInitScriptFish string `json:"cmd:initscript.fish,omitempty"` + CmdInitScriptFish string `json:"cmd:initscript.fish,omitempty"` + + SessionDaemonId string `json:"session:daemonid,omitempty"` // AI options match settings AiClear bool `json:"ai:*,omitempty"` diff --git a/pkg/wshrpc/wshclient/wshclient.go b/pkg/wshrpc/wshclient/wshclient.go index d5333aec2b..24f7535f1a 100644 --- a/pkg/wshrpc/wshclient/wshclient.go +++ b/pkg/wshrpc/wshclient/wshclient.go @@ -670,6 +670,12 @@ func ReadAppFileCommand(w *wshutil.WshRpc, data wshrpc.CommandReadAppFileData, o return resp, err } +// command "recordsessionactivity", wshserver.RecordSessionActivityCommand +func RecordSessionActivityCommand(w *wshutil.WshRpc, data wshrpc.CommandRecordSessionActivityData, opts *wshrpc.RpcOpts) error { + _, err := sendRpcRequestCallHelper[any](w, "recordsessionactivity", data, opts) + return err +} + // command "recordtevent", wshserver.RecordTEventCommand func RecordTEventCommand(w *wshutil.WshRpc, data telemetrydata.TEvent, opts *wshrpc.RpcOpts) error { _, err := sendRpcRequestCallHelper[any](w, "recordtevent", data, opts) @@ -830,6 +836,48 @@ func SendTelemetryCommand(w *wshutil.WshRpc, opts *wshrpc.RpcOpts) error { return err } +// command "sessionattach", wshserver.SessionAttachCommand +func SessionAttachCommand(w *wshutil.WshRpc, data wshrpc.CommandSessionAttachData, opts *wshrpc.RpcOpts) error { + _, err := sendRpcRequestCallHelper[any](w, "sessionattach", data, opts) + return err +} + +// command "sessioncreate", wshserver.SessionCreateCommand +func SessionCreateCommand(w *wshutil.WshRpc, data wshrpc.CommandSessionCreateData, opts *wshrpc.RpcOpts) (*wshrpc.SessionInfoRtnData, error) { + resp, err := sendRpcRequestCallHelper[*wshrpc.SessionInfoRtnData](w, "sessioncreate", data, opts) + return resp, err +} + +// command "sessiondelete", wshserver.SessionDeleteCommand +func SessionDeleteCommand(w *wshutil.WshRpc, data wshrpc.CommandSessionDeleteData, opts *wshrpc.RpcOpts) error { + _, err := sendRpcRequestCallHelper[any](w, "sessiondelete", data, opts) + return err +} + +// command "sessiondetach", wshserver.SessionDetachCommand +func SessionDetachCommand(w *wshutil.WshRpc, data wshrpc.CommandSessionDetachData, opts *wshrpc.RpcOpts) error { + _, err := sendRpcRequestCallHelper[any](w, "sessiondetach", data, opts) + return err +} + +// command "sessioninfo", wshserver.SessionInfoCommand +func SessionInfoCommand(w *wshutil.WshRpc, data wshrpc.CommandSessionInfoData, opts *wshrpc.RpcOpts) (*wshrpc.SessionInfoRtnData, error) { + resp, err := sendRpcRequestCallHelper[*wshrpc.SessionInfoRtnData](w, "sessioninfo", data, opts) + return resp, err +} + +// command "sessionlist", wshserver.SessionListCommand +func SessionListCommand(w *wshutil.WshRpc, data wshrpc.CommandSessionListData, opts *wshrpc.RpcOpts) ([]wshrpc.SessionInfoRtnData, error) { + resp, err := sendRpcRequestCallHelper[[]wshrpc.SessionInfoRtnData](w, "sessionlist", data, opts) + return resp, err +} + +// command "sessiontag", wshserver.SessionTagCommand +func SessionTagCommand(w *wshutil.WshRpc, data wshrpc.CommandSessionTagData, opts *wshrpc.RpcOpts) error { + _, err := sendRpcRequestCallHelper[any](w, "sessiontag", data, opts) + return err +} + // command "setblockfocus", wshserver.SetBlockFocusCommand func SetBlockFocusCommand(w *wshutil.WshRpc, data string, opts *wshrpc.RpcOpts) error { _, err := sendRpcRequestCallHelper[any](w, "setblockfocus", data, opts) diff --git a/pkg/wshrpc/wshremote/wshremote.go b/pkg/wshrpc/wshremote/wshremote.go index 35dc92d5bb..6e892c9ec2 100644 --- a/pkg/wshrpc/wshremote/wshremote.go +++ b/pkg/wshrpc/wshremote/wshremote.go @@ -26,10 +26,20 @@ import ( ) type JobManagerConnection struct { - JobId string - Conn net.Conn - WshRpc *wshutil.WshRpc - CleanupFn func() + JobId string + Conn net.Conn + WshRpc *wshutil.WshRpc + CleanupFn func() + Pid int + StartTs int64 + RemoteIdleTimeoutSeconds int64 +} + +type disconnectEntry struct { + Deadline time.Time + JobId string + Pid int + StartTs int64 } type ServerImpl struct { @@ -41,17 +51,21 @@ type ServerImpl struct { JobManagerMap map[string]*JobManagerConnection SockName string Lock sync.Mutex + + disconnectDeadlines map[string]*disconnectEntry + disconnectMu sync.Mutex } func MakeRemoteRpcServerImpl(logWriter io.Writer, router *wshutil.WshRouter, rpcClient *wshutil.WshRpc, isLocal bool, initialEnv map[string]string, sockName string) *ServerImpl { return &ServerImpl{ - LogWriter: logWriter, - Router: router, - RpcClient: rpcClient, - IsLocal: isLocal, - InitialEnv: initialEnv, - JobManagerMap: make(map[string]*JobManagerConnection), - SockName: sockName, + LogWriter: logWriter, + Router: router, + RpcClient: rpcClient, + IsLocal: isLocal, + InitialEnv: initialEnv, + JobManagerMap: make(map[string]*JobManagerConnection), + SockName: sockName, + disconnectDeadlines: make(map[string]*disconnectEntry), } } diff --git a/pkg/wshrpc/wshremote/wshremote_job.go b/pkg/wshrpc/wshremote/wshremote_job.go index b357116427..ccf540505d 100644 --- a/pkg/wshrpc/wshremote/wshremote_job.go +++ b/pkg/wshrpc/wshremote/wshremote_job.go @@ -104,6 +104,8 @@ func (impl *ServerImpl) connectToJobManager(ctx context.Context, jobId string, m } impl.addJobManagerConnection(jobConn) + impl.removeDisconnectEntry(jobId) + log.Printf("connectToJobManager: successfully connected and authenticated\n") return jobRouteId, cleanup, nil } @@ -117,11 +119,16 @@ func (impl *ServerImpl) addJobManagerConnection(conn *JobManagerConnection) { func (impl *ServerImpl) removeJobManagerConnection(jobId string) { impl.Lock.Lock() - defer impl.Lock.Unlock() - if _, exists := impl.JobManagerMap[jobId]; exists { + conn, exists := impl.JobManagerMap[jobId] + if exists { delete(impl.JobManagerMap, jobId) log.Printf("removeJobManagerConnection: removed job manager connection for jobid=%s\n", jobId) } + impl.Lock.Unlock() + + if conn != nil && conn.Pid > 0 { + impl.addDisconnectEntry(jobId, conn.Pid, conn.StartTs, conn.RemoteIdleTimeoutSeconds) + } } func (impl *ServerImpl) getJobManagerConnection(jobId string) *JobManagerConnection { @@ -254,6 +261,17 @@ func (impl *ServerImpl) RemoteStartJobCommand(ctx context.Context, data wshrpc.C return nil, err } + impl.Lock.Lock() + if jobConn := impl.JobManagerMap[data.JobId]; jobConn != nil { + jobConn.Pid = cmd.Process.Pid + jobConn.StartTs = time.Now().UnixMilli() + jobConn.RemoteIdleTimeoutSeconds = data.RemoteIdleTimeoutSeconds + if jobConn.RemoteIdleTimeoutSeconds <= 0 { + jobConn.RemoteIdleTimeoutSeconds = 604800 + } + } + impl.Lock.Unlock() + combinedEnv := make(map[string]string) for k, v := range impl.InitialEnv { combinedEnv[k] = v @@ -317,6 +335,17 @@ func (impl *ServerImpl) RemoteReconnectToJobManagerCommand(ctx context.Context, }, nil } + impl.Lock.Lock() + if jobConn := impl.JobManagerMap[data.JobId]; jobConn != nil { + jobConn.Pid = data.JobManagerPid + jobConn.StartTs = data.JobManagerStartTs + jobConn.RemoteIdleTimeoutSeconds = data.RemoteIdleTimeoutSeconds + if jobConn.RemoteIdleTimeoutSeconds <= 0 { + jobConn.RemoteIdleTimeoutSeconds = 604800 + } + } + impl.Lock.Unlock() + log.Printf("RemoteReconnectToJobManagerCommand: successfully reconnected to job manager\n") return &wshrpc.CommandRemoteReconnectToJobManagerRtnData{ Success: true, @@ -357,3 +386,77 @@ func (impl *ServerImpl) RemoteTerminateJobManagerCommand(ctx context.Context, da } return nil } + +const disconnectCheckInterval = 60 // seconds + +var disconnectManagerStarted sync.Once + +func (impl *ServerImpl) ensureDisconnectManager() { + disconnectManagerStarted.Do(func() { + go impl.runDisconnectManager() + }) +} + +func (impl *ServerImpl) runDisconnectManager() { + ticker := time.NewTicker(disconnectCheckInterval * time.Second) + defer ticker.Stop() + for range ticker.C { + impl.reapDisconnectedJobs() + } +} + +func (impl *ServerImpl) reapDisconnectedJobs() { + impl.disconnectMu.Lock() + now := time.Now() + var expired []*disconnectEntry + var remaining []*disconnectEntry + for _, entry := range impl.disconnectDeadlines { + if now.After(entry.Deadline) { + expired = append(expired, entry) + } else { + remaining = append(remaining, entry) + } + } + impl.disconnectDeadlines = make(map[string]*disconnectEntry) + for _, entry := range remaining { + impl.disconnectDeadlines[entry.JobId] = entry + } + impl.disconnectMu.Unlock() + + for _, entry := range expired { + proc, err := isProcessRunning(entry.Pid, entry.StartTs) + if err != nil { + log.Printf("disconnectManager: error checking process for job=%s pid=%d: %v", entry.JobId, entry.Pid, err) + continue + } + if proc != nil { + log.Printf("disconnectManager: terminating orphaned job=%s pid=%d", entry.JobId, entry.Pid) + err = proc.SendSignal(syscall.SIGTERM) + if err != nil { + log.Printf("disconnectManager: error sending SIGTERM to job=%s pid=%d: %v", entry.JobId, entry.Pid, err) + } + } + } +} + +func (impl *ServerImpl) addDisconnectEntry(jobId string, pid int, startTs int64, timeoutSeconds int64) { + impl.ensureDisconnectManager() + impl.disconnectMu.Lock() + defer impl.disconnectMu.Unlock() + impl.disconnectDeadlines[jobId] = &disconnectEntry{ + JobId: jobId, + Pid: pid, + StartTs: startTs, + Deadline: time.Now().Add(time.Duration(timeoutSeconds) * time.Second), + } + log.Printf("disconnectManager: added entry for job=%s deadline=%v", jobId, impl.disconnectDeadlines[jobId].Deadline) +} + +func (impl *ServerImpl) removeDisconnectEntry(jobId string) { + impl.disconnectMu.Lock() + defer impl.disconnectMu.Unlock() + if _, exists := impl.disconnectDeadlines[jobId]; exists { + delete(impl.disconnectDeadlines, jobId) + log.Printf("disconnectManager: removed entry for job=%s", jobId) + } +} diff --git a/pkg/wshrpc/wshrpctypes.go b/pkg/wshrpc/wshrpctypes.go index 51e2338ba8..ed8e1ed860 100644 --- a/pkg/wshrpc/wshrpctypes.go +++ b/pkg/wshrpc/wshrpctypes.go @@ -211,6 +211,16 @@ type WshRpcInterface interface { JobControllerDetachJobCommand(ctx context.Context, jobId string) error JobControllerGetAllJobManagerStatusCommand(ctx context.Context) ([]*JobManagerStatusUpdate, error) BlockJobStatusCommand(ctx context.Context, blockId string) (*BlockJobStatusData, error) + + // session daemon + SessionCreateCommand(ctx context.Context, data CommandSessionCreateData) (*SessionInfoRtnData, error) + SessionDeleteCommand(ctx context.Context, data CommandSessionDeleteData) error + SessionListCommand(ctx context.Context, data CommandSessionListData) ([]SessionInfoRtnData, error) + SessionAttachCommand(ctx context.Context, data CommandSessionAttachData) error + SessionDetachCommand(ctx context.Context, data CommandSessionDetachData) error + SessionInfoCommand(ctx context.Context, data CommandSessionInfoData) (*SessionInfoRtnData, error) + SessionTagCommand(ctx context.Context, data CommandSessionTagData) error + RecordSessionActivityCommand(ctx context.Context, data CommandRecordSessionActivityData) error } // for frontend @@ -733,24 +743,26 @@ type CommandStartJobData struct { } type CommandRemoteStartJobData struct { - Cmd string `json:"cmd"` - Args []string `json:"args"` - Env map[string]string `json:"env"` - TermSize waveobj.TermSize `json:"termsize"` - StreamMeta *StreamMeta `json:"streammeta,omitempty"` - JobAuthToken string `json:"jobauthtoken"` - JobId string `json:"jobid"` - MainServerJwtToken string `json:"mainserverjwttoken"` - ClientId string `json:"clientid"` - PublicKeyBase64 string `json:"publickeybase64"` + Cmd string `json:"cmd"` + Args []string `json:"args"` + Env map[string]string `json:"env"` + TermSize waveobj.TermSize `json:"termsize"` + StreamMeta *StreamMeta `json:"streammeta,omitempty"` + JobAuthToken string `json:"jobauthtoken"` + JobId string `json:"jobid"` + MainServerJwtToken string `json:"mainserverjwttoken"` + ClientId string `json:"clientid"` + PublicKeyBase64 string `json:"publickeybase64"` + RemoteIdleTimeoutSeconds int64 `json:"remoteidletimeoutseconds,omitempty"` } type CommandRemoteReconnectToJobManagerData struct { - JobId string `json:"jobid"` - JobAuthToken string `json:"jobauthtoken"` - MainServerJwtToken string `json:"mainserverjwttoken"` - JobManagerPid int `json:"jobmanagerpid"` - JobManagerStartTs int64 `json:"jobmanagerstartts"` + JobId string `json:"jobid"` + JobAuthToken string `json:"jobauthtoken"` + MainServerJwtToken string `json:"mainserverjwttoken"` + JobManagerPid int `json:"jobmanagerpid"` + JobManagerStartTs int64 `json:"jobmanagerstartts"` + RemoteIdleTimeoutSeconds int64 `json:"remoteidletimeoutseconds,omitempty"` } type CommandRemoteReconnectToJobManagerRtnData struct { @@ -925,3 +937,57 @@ type CommandRemoteProcessSignalData struct { Pid int32 `json:"pid"` Signal string `json:"signal"` } + +// session daemon +type CommandSessionCreateData struct { + Name string `json:"name,omitempty"` + Connection string `json:"connection,omitempty"` + IdleTimeout int64 `json:"idletimeout,omitempty"` +} + +type CommandSessionDeleteData struct { + DaemonId string `json:"daemonid"` +} + +type CommandSessionListData struct { + ShowAll bool `json:"showall,omitempty"` +} + +type CommandSessionAttachData struct { + DaemonId string `json:"daemonid"` + BlockId string `json:"blockid"` + CurrentDaemonId string `json:"currentdaemonid,omitempty"` +} + +type CommandSessionDetachData struct { + DaemonId string `json:"daemonid"` + BlockId string `json:"blockid,omitempty"` +} + +type CommandSessionInfoData struct { + DaemonId string `json:"daemonid"` +} + +type CommandSessionTagData struct { + DaemonId string `json:"daemonid"` + Name string `json:"name"` +} + +type CommandRecordSessionActivityData struct { + DaemonId string `json:"daemonid"` +} + +type SessionInfoRtnData struct { + DaemonId string `json:"daemonid"` + Name string `json:"name"` + Connection string `json:"connection"` + JobId string `json:"jobid,omitempty"` + IsAnonymous bool `json:"isanonymous"` + Status string `json:"status"` + Cwd string `json:"cwd,omitempty"` + CreatedAt int64 `json:"createdat"` + IdleTimeout int64 `json:"idletimeout"` + IdleSince int64 `json:"idlesince,omitempty"` + LastActiveAt int64 `json:"lastactiveat,omitempty"` + Blocks []string `json:"blocks,omitempty"` +} diff --git a/pkg/wshrpc/wshserver/wshserver.go b/pkg/wshrpc/wshserver/wshserver.go index 38006fd9a8..239cc2b8b8 100644 --- a/pkg/wshrpc/wshserver/wshserver.go +++ b/pkg/wshrpc/wshserver/wshserver.go @@ -307,6 +307,10 @@ func (ws *WshServer) CreateSubBlockCommand(ctx context.Context, data wshrpc.Comm } func (ws *WshServer) ControllerDestroyCommand(ctx context.Context, blockId string) error { + status := blockcontroller.GetBlockControllerRuntimeStatus(blockId) + if status != nil && status.ShellProcStatus == blockcontroller.Status_Running { + return nil + } blockcontroller.DestroyBlockController(blockId) return nil } diff --git a/pkg/wshrpc/wshserver/wshserver_session.go b/pkg/wshrpc/wshserver/wshserver_session.go new file mode 100644 index 0000000000..527ab55a98 --- /dev/null +++ b/pkg/wshrpc/wshserver/wshserver_session.go @@ -0,0 +1,260 @@ +// Copyright 2026, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +package wshserver + +import ( + "context" + "fmt" + "log" + "sort" + "time" + + "github.com/google/uuid" + "github.com/wavetermdev/waveterm/pkg/blockcontroller" + "github.com/wavetermdev/waveterm/pkg/remote/conncontroller" + "github.com/wavetermdev/waveterm/pkg/sessiondaemon" + "github.com/wavetermdev/waveterm/pkg/waveobj" + "github.com/wavetermdev/waveterm/pkg/wcore" + "github.com/wavetermdev/waveterm/pkg/wshrpc" + "github.com/wavetermdev/waveterm/pkg/wstore" +) + +func (ws *WshServer) SessionCreateCommand(ctx context.Context, data wshrpc.CommandSessionCreateData) (*wshrpc.SessionInfoRtnData, error) { + dbDaemon := &waveobj.SessionDaemon{ + OID: uuid.New().String(), + Name: data.Name, + Connection: data.Connection, + IsAnonymous: data.Name == "", + Status: sessiondaemon.Status_Init, + CreatedAt: time.Now().UnixMilli(), + IdleTimeout: data.IdleTimeout, + } + if dbDaemon.IsAnonymous { + dbDaemon.IdleTimeout = sessiondaemon.DefaultAnonymousIdleTimeout + } else if dbDaemon.IdleTimeout <= 0 { + dbDaemon.IdleTimeout = sessiondaemon.DefaultNamedIdleTimeout + } + + err := wstore.DBInsert(ctx, dbDaemon) + if err != nil { + return nil, fmt.Errorf("insert session daemon: %w", err) + } + + _, err = sessiondaemon.Manager.GetOrCreate(ctx, dbDaemon) + if err != nil { + return nil, fmt.Errorf("create session daemon in manager: %w", err) + } + + return buildSessionInfoRtnData(ctx, dbDaemon) +} + +func (ws *WshServer) SessionDeleteCommand(ctx context.Context, data wshrpc.CommandSessionDeleteData) error { + dbDaemon, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, data.DaemonId) + if err != nil { + return fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) + } + + memDaemon := sessiondaemon.Manager.Get(data.DaemonId) + forceDelete := false + if memDaemon != nil { + err = memDaemon.Stop(ctx) + if err != nil { + forceDelete = isRemoteProcessDead(ctx, dbDaemon) + if !forceDelete { + return fmt.Errorf("failed to stop session daemon: %w", err) + } + log.Printf("[sessiondaemon] SessionDelete: daemon=%s remote job dead, deleting despite stop failure", data.DaemonId) + } + sessiondaemon.Manager.Remove(data.DaemonId) + } + + err = wstore.DBDelete(ctx, waveobj.OType_SessionDaemon, data.DaemonId) + if err != nil { + return fmt.Errorf("delete session daemon: %w", err) + } + return nil +} + +func isRemoteProcessDead(ctx context.Context, dbDaemon *waveobj.SessionDaemon) bool { + if dbDaemon.JobId == "" { + return false + } + job, err := wstore.DBMustGet[*waveobj.Job](ctx, dbDaemon.JobId) + if err != nil || job.JobManagerPid == 0 { + return false + } + alive, err := conncontroller.CheckRemoteProcessAlive(ctx, dbDaemon.Connection, job.JobManagerPid) + return err == nil && !alive +} + +func (ws *WshServer) SessionListCommand(ctx context.Context, data wshrpc.CommandSessionListData) ([]wshrpc.SessionInfoRtnData, error) { + allDaemons, err := wstore.DBGetAllObjsByType[*waveobj.SessionDaemon](ctx, waveobj.OType_SessionDaemon) + if err != nil { + return nil, fmt.Errorf("list session daemons: %w", err) + } + + rtn := make([]wshrpc.SessionInfoRtnData, 0) + for _, dbDaemon := range allDaemons { + if dbDaemon.IsAnonymous && !data.ShowAll { + continue + } + info, err := buildSessionInfoRtnData(ctx, dbDaemon) + if err != nil { + return nil, err + } + rtn = append(rtn, *info) + } + sort.Slice(rtn, func(i, j int) bool { + ai := rtn[i].LastActiveAt + aj := rtn[j].LastActiveAt + if ai != aj { + return ai > aj + } + return rtn[i].CreatedAt > rtn[j].CreatedAt + }) + return rtn, nil +} + +func (ws *WshServer) SessionAttachCommand(ctx context.Context, data wshrpc.CommandSessionAttachData) error { + if data.CurrentDaemonId != "" && data.CurrentDaemonId == data.DaemonId { + return nil + } + + if data.CurrentDaemonId != "" { + sessiondaemon.Manager.DetachBlock(ctx, data.CurrentDaemonId, data.BlockId) + } + + dbDaemon, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, data.DaemonId) + if err != nil { + if data.CurrentDaemonId != "" { + sessiondaemon.Manager.AttachBlock(ctx, data.CurrentDaemonId, data.BlockId) + } + return fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) + } + + blockData, err := wstore.DBMustGet[*waveobj.Block](ctx, data.BlockId) + if err == nil { + blockConn := blockData.Meta.GetString(waveobj.MetaKey_Connection, "") + if blockConn != "" && blockConn != dbDaemon.Connection { + log.Printf("[sessiondaemon] SessionAttach: block=%s conn=%q daemon conn=%q mismatch, refusing", + data.BlockId, blockConn, dbDaemon.Connection) + return fmt.Errorf("cannot attach to session on connection %q from connection %q", dbDaemon.Connection, blockConn) + } + } + + _, err = sessiondaemon.Manager.GetOrCreate(ctx, dbDaemon) + if err != nil { + return fmt.Errorf("create session daemon in manager: %w", err) + } + + sessiondaemon.Manager.AttachBlock(ctx, data.DaemonId, data.BlockId) + + err = wstore.DBUpdateFn(ctx, data.BlockId, func(block *waveobj.Block) { + block.Meta[waveobj.MetaKey_SessionDaemonId] = data.DaemonId + delete(block.Meta, blockcontroller.MetaKey_SessionNoAutoCreate) + block.JobId = dbDaemon.JobId + }) + + if err != nil { + sessiondaemon.Manager.DetachBlock(ctx, data.DaemonId, data.BlockId) + if data.CurrentDaemonId != "" { + sessiondaemon.Manager.AttachBlock(ctx, data.CurrentDaemonId, data.BlockId) + } + return fmt.Errorf("update block meta: %w", err) + } + + resyncBlockController(ctx, data.BlockId) + wcore.SendWaveObjUpdate(waveobj.MakeORef(waveobj.OType_Block, data.BlockId)) + return nil +} + +func (ws *WshServer) SessionDetachCommand(ctx context.Context, data wshrpc.CommandSessionDetachData) error { + _, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, data.DaemonId) + if err != nil { + return fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) + } + + blockIds := []string{} + if data.BlockId != "" { + blockIds = append(blockIds, data.BlockId) + } else { + blockIds = sessiondaemon.Manager.GetBlocksForDaemon(data.DaemonId) + } + + for _, blockId := range blockIds { + sessiondaemon.Manager.DetachBlock(ctx, data.DaemonId, blockId) + err = wstore.DBUpdateFn(ctx, blockId, func(block *waveobj.Block) { + delete(block.Meta, waveobj.MetaKey_SessionDaemonId) + block.Meta[blockcontroller.MetaKey_SessionNoAutoCreate] = true + }) + if err != nil { + return fmt.Errorf("update block meta: %w", err) + } + resyncBlockController(ctx, blockId) + wcore.SendWaveObjUpdate(waveobj.MakeORef(waveobj.OType_Block, blockId)) + } + return nil +} + +func (ws *WshServer) SessionInfoCommand(ctx context.Context, data wshrpc.CommandSessionInfoData) (*wshrpc.SessionInfoRtnData, error) { + dbDaemon, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, data.DaemonId) + if err != nil { + return nil, fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) + } + if dbDaemon.JobId == "" { + if memJobId := sessiondaemon.Manager.GetMemJobId(dbDaemon.OID); memJobId != "" { + dbDaemon.JobId = memJobId + } + } + return buildSessionInfoRtnData(ctx, dbDaemon) +} + +func (ws *WshServer) SessionTagCommand(ctx context.Context, data wshrpc.CommandSessionTagData) error { + _, err := wstore.DBMustGet[*waveobj.SessionDaemon](ctx, data.DaemonId) + if err != nil { + return fmt.Errorf("session daemon %q not found: %w", data.DaemonId, err) + } + return sessiondaemon.Manager.Rename(ctx, data.DaemonId, data.Name) +} + +func (ws *WshServer) RecordSessionActivityCommand(ctx context.Context, data wshrpc.CommandRecordSessionActivityData) error { + return sessiondaemon.Manager.RecordActivity(ctx, data.DaemonId) +} + +func buildSessionInfoRtnData(ctx context.Context, dbDaemon *waveobj.SessionDaemon) (*wshrpc.SessionInfoRtnData, error) { + if dbDaemon == nil { + return nil, fmt.Errorf("session daemon is nil") + } + blocks := sessiondaemon.Manager.GetBlocksForDaemon(dbDaemon.OID) + return &wshrpc.SessionInfoRtnData{ + DaemonId: dbDaemon.OID, + Name: dbDaemon.Name, + Connection: dbDaemon.Connection, + JobId: dbDaemon.JobId, + IsAnonymous: dbDaemon.IsAnonymous, + Status: dbDaemon.Status, + Cwd: dbDaemon.Cwd, + CreatedAt: dbDaemon.CreatedAt, + IdleTimeout: dbDaemon.IdleTimeout, + IdleSince: dbDaemon.IdleSince, + LastActiveAt: dbDaemon.LastActiveAt, + Blocks: blocks, + }, nil +} + +func resyncBlockController(ctx context.Context, blockId string) { + tabs, err := wstore.DBGetAllObjsByType[*waveobj.Tab](ctx, waveobj.OType_Tab) + if err != nil { + log.Printf("[sessiondaemon] warning: error getting tabs for resync: %v", err) + return + } + for _, tab := range tabs { + for _, bid := range tab.BlockIds { + if bid == blockId { + blockcontroller.ResyncController(ctx, tab.OID, blockId, nil, true) + return + } + } + } +} diff --git a/pkg/wstore/wstore_dbsessionmigration.go b/pkg/wstore/wstore_dbsessionmigration.go new file mode 100644 index 0000000000..85c6a78a96 --- /dev/null +++ b/pkg/wstore/wstore_dbsessionmigration.go @@ -0,0 +1,95 @@ +// Copyright 2026, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +package wstore + +import ( + "context" + "fmt" + "log" + "time" + + "github.com/google/uuid" + "github.com/wavetermdev/waveterm/pkg/waveobj" +) + +const MigrateSessionDaemonKey = "migrate:sessiondaemon" + +func runSessionDaemonMigration(ctx context.Context) error { + client, err := DBGetSingleton[*waveobj.Client](ctx) + if err != nil { + return fmt.Errorf("get client: %w", err) + } + + if client.Meta != nil && client.Meta[MigrateSessionDaemonKey] == true { + return nil + } + + blocks, err := DBGetAllObjsByType[*waveobj.Block](ctx, waveobj.OType_Block) + if err != nil { + return fmt.Errorf("list blocks: %w", err) + } + + var migrated int + for _, block := range blocks { + if block.JobId == "" { + continue + } + connName := block.Meta.GetString("connection", "") + if connName == "" { + continue + } + + daemonId := uuid.New().String() + dbDaemon := &waveobj.SessionDaemon{ + OID: daemonId, + Name: "", + Connection: connName, + JobId: block.JobId, + IsAnonymous: true, + Status: "running", + CreatedAt: time.Now().UnixMilli(), + IdleTimeout: 60, + } + + err = DBInsert(ctx, dbDaemon) + if err != nil { + log.Printf("[migration] warning: error inserting session daemon for block %s: %v", block.OID, err) + continue + } + + err = DBUpdateFn(ctx, block.OID, func(b *waveobj.Block) { + if b.Meta == nil { + b.Meta = make(waveobj.MetaMapType) + } + b.Meta[waveobj.MetaKey_SessionDaemonId] = daemonId + b.JobId = "" + }) + if err != nil { + log.Printf("[migration] warning: error updating block %s: %v", block.OID, err) + continue + } + + migrated++ + } + + if client.Meta == nil { + client.Meta = make(waveobj.MetaMapType) + } + client.Meta[MigrateSessionDaemonKey] = true + err = DBUpdate(ctx, client) + if err != nil { + return fmt.Errorf("update client meta: %w", err) + } + + if migrated > 0 { + log.Printf("[migration] migrated %d blocks to session daemon\n", migrated) + } + return nil +} + +func RunSessionDaemonMigration(ctx context.Context) error { + ctx, cancelFn := context.WithTimeout(ctx, 30*time.Second) + defer cancelFn() + return runSessionDaemonMigration(ctx) +}