diff --git a/cmd/client.go b/cmd/client.go new file mode 100644 index 0000000..f0415bf --- /dev/null +++ b/cmd/client.go @@ -0,0 +1,98 @@ +package cmd + +import ( + "context" + "fmt" + "log/slog" + "os" + "time" + + "ora/internal/agent" + "ora/internal/audio" + "ora/internal/config" + "ora/internal/db" + "ora/internal/ui" +) + +func runClient(ctx context.Context, shutdownObs func(context.Context) error, daemonStatus string) error { + slog.Info("Starting Ora Client...") + + // Parallel hardware & DB init + var ( + store *db.Store + mic audio.Microphone + speaker audio.Speaker + ) + + initErrChan := make(chan error, 3) + + go func() { + var err error + store, err = db.New("ora-db/db") + initErrChan <- err + }() + + go func() { + var err error + mic, err = audio.NewMic() + initErrChan <- err + }() + + go func() { + var err error + speaker, err = audio.NewSpeaker() + initErrChan <- err + }() + + for i := 0; i < 3; i++ { + if err := <-initErrChan; err != nil { + return fmt.Errorf("fatal: startup component failure: %w", err) + } + } + + defer store.Close() + defer mic.Close() + defer speaker.Close() + + // Start mic capture once — survives session reconnects. + micChan, err := mic.StartCapture(ctx) + if err != nil { + return fmt.Errorf("failed to start microphone: %w", err) + } + + apiKey := os.Getenv("GEMINI_API_KEY") + + orchestrator := agent.NewAgent(mic, speaker, store, nil, apiKey) + orchestrator.SetModel(config.VoiceModel) + + // Reconnect loop: if the Gemini session drops (idle timeout, network blip, + // session limit), restart automatically. Mic stays running throughout. + go func() { + for { + if err := orchestrator.Connect(ctx, micChan); err != nil { + if ctx.Err() != nil { + return + } + slog.Error("agent session lost, reconnecting in 2s", "error", err) + select { + case orchestrator.TextResponseChan <- "\n*[connection lost — reconnecting...]*\n": + default: + } + select { + case <-time.After(2 * time.Second): + case <-ctx.Done(): + return + } + } else { + // Clean exit (ctx cancelled) + return + } + } + }() + + if err := ui.Run(orchestrator, daemonStatus); err != nil { + return fmt.Errorf("UI Error: %w", err) + } + + return nil +} diff --git a/cmd/daemon.go b/cmd/daemon.go new file mode 100644 index 0000000..d404c42 --- /dev/null +++ b/cmd/daemon.go @@ -0,0 +1,138 @@ +package cmd + +import ( + "context" + _ "embed" + "encoding/json" + "log/slog" + "net" + "net/http" + "os" + "time" + + "ora/internal/config" + "ora/internal/db" + "ora/internal/memory" + "ora/internal/tracker" + + "github.com/getlantern/systray" +) + +//go:embed tray_icon.ico +var trayIcon []byte + +const DaemonPort = "6942" + +func runDaemon(ctx context.Context, shutdownObs func(context.Context) error) error { + slog.Info("Starting Ora Daemon...") + + // port binding instance lock to prevent double spawning + listener, err := net.Listen("tcp", "127.0.0.1:"+DaemonPort) + if err != nil { + slog.Warn("failed to bind daemon port", "port", DaemonPort, "error", err) + return nil + } + + onReady := func() { + systray.SetIcon(trayIcon) + systray.SetTitle("Ora") + systray.SetTooltip("Ora Context Runtime is active") + mQuit := systray.AddMenuItem("Quit Ora", "Stop the background tracker and database") + + store, err := db.New("ora-db/db") + if err != nil { + slog.Error("failed to init db", "error", err) + listener.Close() + return + } + + eye, err := tracker.New() + if err != nil { + slog.Error("failed to init tracker", "error", err) + listener.Close() + return + } + + appConfig := config.LoadConfig() + apiKey := os.Getenv("GEMINI_API_KEY") + + summarizer, err := memory.NewGeminiSummarizer(apiKey) + if err != nil { + slog.Warn("failed to init summarizer, semantic memory disabled", "error", err) + } + compiler := memory.NewCompiler(summarizer, store) + + eventChan := make(chan tracker.Activity, 100) + daemon := tracker.NewDaemon(eye, 2*time.Second, appConfig.Tracker.DwellTime*time.Millisecond, appConfig.Tracker.Blocklist, eventChan) + + // tracker loop entry point + go daemon.Start(ctx) + + go func() { + for ev := range eventChan { + store.LogActivity(ctx, ev.App, ev.Title) + if compiler != nil { + compiler.Ingest(ctx, ev) + } + } + }() + + // local http for inter-process communication between tui and daemon + // ipc is a bridge to securely talk and share data between independent apps + mux := http.NewServeMux() + + // heartbeat + mux.HandleFunc("/ping", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Write([]byte("pong")) + }) + + // data sharing endpooint, get latest tracking data + mux.HandleFunc("/buffer", func(w http.ResponseWriter, r *http.Request) { + if compiler == nil { + w.WriteHeader(http.StatusNoContent) + return + } + buf := compiler.GetCurrentBuffer() + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(buf) + }) + + server := &http.Server{ + Handler: mux, + } + + go func() { + if err := server.Serve(listener); err != nil && err != http.ErrServerClosed { + slog.Error("daemon ipc server failed", "error", err) + } + }() + + slog.Info("Daemon running in background.") + + // block and wait for quit signal + go func() { + select { + case <-mQuit.ClickedCh: + slog.Info("Quit requested via System Tray") + systray.Quit() + case <-ctx.Done(): + slog.Info("Context cancelled, shutting down systray") + systray.Quit() + } + + // cleanup process + shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + server.Shutdown(shutdownCtx) + store.Close() + }() + } + + onExit := func() { + slog.Info("Ora Daemon has completely shut down.") + } + + systray.Run(onReady, onExit) + return nil +} diff --git a/cmd/daemon_spawn_windows_test.go b/cmd/daemon_spawn_windows_test.go new file mode 100644 index 0000000..1197525 --- /dev/null +++ b/cmd/daemon_spawn_windows_test.go @@ -0,0 +1,173 @@ +package cmd_test + +import ( + "fmt" + "net" + "net/http" + "os" + "os/exec" + "path/filepath" + "strings" + "syscall" + "testing" + "time" +) + +const daemonTestPort = "6942" + +var testPingClient = &http.Client{Timeout: 1 * time.Second} + +func isDaemonUp() bool { + resp, err := testPingClient.Get("http://127.0.0.1:" + daemonTestPort + "/ping") + if err != nil { + return false + } + resp.Body.Close() + return resp.StatusCode == http.StatusOK +} + +// port blocked by smth else or nahs +func isPortBound() bool { + conn, err := net.DialTimeout("tcp", "127.0.0.1:"+daemonTestPort, 150*time.Millisecond) + if err != nil { + return false + } + conn.Close() + return true +} + +// finds PID of blocker process and kills it, could be zombie daemons from prev runs +func killPortOwner(t *testing.T) { + t.Helper() + out, err := exec.Command("netstat", "-ano", "-p", "tcp").Output() + if err != nil { + return + } + for _, line := range strings.Split(string(out), "\n") { + if strings.Contains(line, ":"+daemonTestPort) && strings.Contains(line, "LISTENING") { + fields := strings.Fields(line) + if len(fields) >= 5 { + pid := strings.TrimSpace(fields[len(fields)-1]) + t.Logf("killing PID %s holding port %s", pid, daemonTestPort) + exec.Command("taskkill", "/F", "/PID", pid).Run() + // wait for OS to release the port + time.Sleep(300 * time.Millisecond) + } + } + } +} + +// buildOraBinary compiles the module into a temp binary and returns its path. +// Test working dir is cmd/, so ".." is the module root. +func buildOraBinary(t *testing.T) string { + t.Helper() + dir := t.TempDir() + bin := filepath.Join(dir, "ora_test.exe") + + cmd := exec.Command("go", "build", "-o", bin, "..") + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("go build failed: %v\n%s", err, out) + } + return bin +} + +func netstatPort(t *testing.T, label string) { + t.Helper() + out, err := exec.Command("netstat", "-ano").Output() + if err != nil { + t.Logf("[%s] netstat error: %v", label, err) + return + } + for _, line := range strings.Split(string(out), "\n") { + if strings.Contains(line, daemonTestPort) { + t.Logf("[%s] netstat: %s", label, strings.TrimSpace(line)) + } + } +} + +func TestDaemonSpawnsAndResponds(t *testing.T) { + netstatPort(t, "initial") + + if isDaemonUp() { + t.Skip("responsive daemon already on port 69420 — skipping to avoid conflict") + } + + if isPortBound() { + t.Log("port 69420 is bound but not serving — killing port owner") + killPortOwner(t) + netstatPort(t, "after-kill") + if isPortBound() { + t.Skip("could not free port 69420 — kill any running ora.exe manually and retry") + } + } + + bin := buildOraBinary(t) + + // give the daemon a clean working dir so ora-db/ doesn't pollute the repo + workDir := t.TempDir() + + cmd := exec.Command(bin, "--daemon") + cmd.Dir = workDir + cmd.SysProcAttr = &syscall.SysProcAttr{HideWindow: true} + + // output for diagnostics if the test fails. + outFile, err := os.CreateTemp("", "ora-daemon-*.log") + if err == nil { + cmd.Stdout = outFile + cmd.Stderr = outFile + defer outFile.Close() + } + + if err := cmd.Start(); err != nil { + t.Fatalf("failed to start daemon process: %v", err) + } + defer func() { + cmd.Process.Kill() + cmd.Wait() + // daemon stdout/stderr (pre-slog-redirect lines) + if outFile != nil { + outFile.Seek(0, 0) + buf := make([]byte, 4096) + n, _ := outFile.Read(buf) + if n > 0 { + t.Logf("daemon stderr/stdout:\n%s", buf[:n]) + } + } + + if logData, err := os.ReadFile(filepath.Join(workDir, "ora-db", "ora.log")); err == nil { + t.Logf("daemon ora.log:\n%s", logData) + } + }() + + // poll up to 15s, each attempt has its own 300ms timeout so we never hang + deadline := time.Now().Add(15 * time.Second) + attempts := 0 + for time.Now().Before(deadline) { + attempts++ + if isDaemonUp() { + t.Logf("daemon /ping succeeded after %d attempts (~%dms)", attempts, attempts*100) + return + } + time.Sleep(100 * time.Millisecond) + } + + netstatPort(t, "after-timeout") + t.Fatalf("daemon did not respond to /ping within 15s after %d attempts", attempts) +} + +func TestDaemonPingEndpoint(t *testing.T) { + if !isDaemonUp() { + t.Skip("no daemon running — run TestDaemonSpawnsAndResponds first or start daemon manually") + } + + resp, err := testPingClient.Get(fmt.Sprintf("http://127.0.0.1:%s/ping", daemonTestPort)) + if err != nil { + t.Fatalf("ping failed: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + t.Fatalf("expected 200, got %d", resp.StatusCode) + } +} diff --git a/cmd/fork_unix.go b/cmd/fork_unix.go new file mode 100644 index 0000000..871c66b --- /dev/null +++ b/cmd/fork_unix.go @@ -0,0 +1,13 @@ +//go:build !windows + +package cmd + +import ( + "os" + "os/exec" +) + +func spawnHiddenDaemon() error { + cmd := exec.Command(os.Args[0], "--daemon") + return cmd.Start() +} diff --git a/cmd/fork_windows.go b/cmd/fork_windows.go new file mode 100644 index 0000000..b308b44 --- /dev/null +++ b/cmd/fork_windows.go @@ -0,0 +1,20 @@ +//go:build windows + +package cmd + +import ( + "os" + "os/exec" + "syscall" +) + +func spawnHiddenDaemon() error { + exePath, err := os.Executable() + if err != nil { + exePath = os.Args[0] // find current pwd if os.Exec fails but os.Exec mostly wont + } + cmd := exec.Command(exePath, "--daemon") + // CREATE_NO_WINDOW flag + cmd.SysProcAttr = &syscall.SysProcAttr{HideWindow: true} + return cmd.Start() +} diff --git a/cmd/root.go b/cmd/root.go index de07605..b8d0a20 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -4,14 +4,8 @@ import ( "context" "fmt" "log/slog" - "ora/internal/agent" - "ora/internal/audio" - "ora/internal/config" - "ora/internal/db" - "ora/internal/memory" + "net/http" "ora/internal/obs" - "ora/internal/tracker" - "ora/internal/ui" "os" "os/signal" "time" @@ -29,7 +23,6 @@ var rootCmd = &cobra.Command{ // has an action associated with it: Run: func(cmd *cobra.Command, args []string) { - // globally load the .env if err := godotenv.Load(); err != nil { slog.Info("No .env file found, read from sys env") } @@ -45,96 +38,45 @@ var rootCmd = &cobra.Command{ } defer shutdownObs(ctx) - // 🚀 Parallel Hardware & DB Initialization - var ( - store *db.Store - mic audio.Microphone - speaker audio.Speaker - ) + isDaemon, _ := cmd.Flags().GetBool("daemon") - initErrChan := make(chan error, 3) - - go func() { - var err error - store, err = db.New("ora-db/db") - initErrChan <- err - }() - - go func() { - var err error - mic, err = audio.NewMic() - initErrChan <- err - }() - - go func() { - var err error - speaker, err = audio.NewSpeaker() - initErrChan <- err - }() - - // wait for all 3 to finish - for i := 0; i < 3; i++ { - if err := <-initErrChan; err != nil { - fmt.Fprintf(os.Stderr, "fatal: startup component failure: %v\n", err) - return + if isDaemon { + if err := runDaemon(ctx, shutdownObs); err != nil { + slog.Error("daemon crashed", "error", err) } - } - - defer store.Close() - defer mic.Close() - defer speaker.Close() - - // initialize tracker and daemon - eye, err := tracker.New() - if err != nil { - fmt.Fprintf(os.Stderr, "fatal: could not initialize tracker: %v\n", err) return } - apiKey := os.Getenv("GEMINI_API_KEY") - - // initialize compiler - summarizer, err := memory.NewGeminiSummarizer(apiKey) - if err != nil { - slog.Warn("failed to initialize summarizer, semantic memory disabled", "error", err) - } - compiler := memory.NewCompiler(summarizer, store) - - eventChan := make(chan tracker.Activity, 100) - daemon := tracker.NewDaemon(eye, 2*time.Second, eventChan) - - // starts daemon in the background!!! - go daemon.Start(ctx) - - go func() { - for ev := range eventChan { - // temp integration of compiler - store.LogActivity(ctx, ev.App, ev.Title) - if compiler != nil { - compiler.Ingest(ctx, ev) + // get-or-create daemon. + // using short per-request timeout without it http.Get hangs if the TCP port is bound but nobody has called Accept yet + var daemonStatus string + if pingDaemon() { + slog.Info("connected to existing daemon") + daemonStatus = "connected" + } else { + slog.Info("daemon not found, spawning background process") + if spawnErr := spawnHiddenDaemon(); spawnErr != nil { + slog.Error("failed to spawn background daemon", "error", spawnErr) + daemonStatus = "daemon spawn failed: " + spawnErr.Error() + } else { + // 300ms timeout, i.e. no blocking + // poll until daemon is ready, max 10s, 100ms sleep b/w attempts + deadline := time.Now().Add(10 * time.Second) + daemonStatus = "daemon spawn failed: timed out" + for time.Now().Before(deadline) { + time.Sleep(100 * time.Millisecond) + if pingDaemon() { + daemonStatus = "started" + break + } } } - }() - - // inject dependencies - orchestrator := agent.NewAgent(mic, speaker, store, compiler, apiKey) - orchestrator.SetModel(config.VoiceModel) - - // voice loop starts in bg - go func() { - if err := orchestrator.Connect(ctx); err != nil { - // if tui is running, we might not want to print to stdout directly - // but for now this is fine for debugging crashes - slog.Error("agent connection crashed", "error", err) - } - }() - - // starts TUI - if err := ui.Run(orchestrator); err != nil { - fmt.Printf("UI Error: %v\n", err) - return } + // start tui + if err := runClient(ctx, shutdownObs, daemonStatus); err != nil { + slog.Error("Client crashed", "error", err) + } }, // root command is supposed to startup the CLI and other tools - db, memory companion, tui, et cetera. } @@ -149,5 +91,18 @@ func Execute() { } func init() { - // Root flags can be added here + rootCmd.PersistentFlags().Bool("daemon", false, "Run as background daemon") +} + +// pingDaemon sends a single /ping with a short timeout. +// Returns true only if the daemon responds 200 OK. +var daemonPingClient = &http.Client{Timeout: 300 * time.Millisecond} + +func pingDaemon() bool { + resp, err := daemonPingClient.Get("http://127.0.0.1:" + DaemonPort + "/ping") + if err != nil { + return false + } + resp.Body.Close() + return resp.StatusCode == http.StatusOK } diff --git a/cmd/tray_icon.ico b/cmd/tray_icon.ico new file mode 100644 index 0000000..f4e94ac Binary files /dev/null and b/cmd/tray_icon.ico differ diff --git a/docs/images/tray_icon.ico b/docs/images/tray_icon.ico new file mode 100644 index 0000000..f4e94ac Binary files /dev/null and b/docs/images/tray_icon.ico differ diff --git a/docs/images/tray_icon.png b/docs/images/tray_icon.png new file mode 100644 index 0000000..1bd8fbf Binary files /dev/null and b/docs/images/tray_icon.png differ diff --git a/go.mod b/go.mod index d338e9f..2b80131 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ require ( github.com/charmbracelet/bubbletea v1.3.10 github.com/charmbracelet/lipgloss v1.1.0 github.com/ebitengine/oto/v3 v3.4.0 + github.com/getlantern/systray v1.2.2 github.com/go-ole/go-ole v1.3.0 github.com/joho/godotenv v1.5.1 github.com/moutend/go-wca v0.3.0 @@ -37,8 +38,15 @@ require ( github.com/dustin/go-humanize v1.0.1 // indirect github.com/ebitengine/purego v0.9.0 // indirect github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect + github.com/getlantern/context v0.0.0-20190109183933-c447772a6520 // indirect + github.com/getlantern/errors v0.0.0-20190325191628-abdb3e3e36f7 // indirect + github.com/getlantern/golog v0.0.0-20190830074920-4ef2e798c2d7 // indirect + github.com/getlantern/hex v0.0.0-20190417191902-c6586a6fe0b7 // indirect + github.com/getlantern/hidden v0.0.0-20190325191715-f02dbb02be55 // indirect + github.com/getlantern/ops v0.0.0-20190325191751-d70cb0d6f85f // indirect github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect + github.com/go-stack/stack v1.8.0 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/google/go-cmp v0.7.0 // indirect github.com/google/s2a-go v0.1.8 // indirect @@ -55,6 +63,7 @@ require ( github.com/muesli/cancelreader v0.2.2 // indirect github.com/muesli/termenv v0.16.0 // indirect github.com/ncruces/go-strftime v1.0.0 // indirect + github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c // indirect github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/sahilm/fuzzy v0.1.1 // indirect diff --git a/go.sum b/go.sum index 9b7a7dc..d750778 100644 --- a/go.sum +++ b/go.sum @@ -59,6 +59,20 @@ github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1m github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4= github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM= +github.com/getlantern/context v0.0.0-20190109183933-c447772a6520 h1:NRUJuo3v3WGC/g5YiyF790gut6oQr5f3FBI88Wv0dx4= +github.com/getlantern/context v0.0.0-20190109183933-c447772a6520/go.mod h1:L+mq6/vvYHKjCX2oez0CgEAJmbq1fbb/oNJIWQkBybY= +github.com/getlantern/errors v0.0.0-20190325191628-abdb3e3e36f7 h1:6uJ+sZ/e03gkbqZ0kUG6mfKoqDb4XMAzMIwlajq19So= +github.com/getlantern/errors v0.0.0-20190325191628-abdb3e3e36f7/go.mod h1:l+xpFBrCtDLpK9qNjxs+cHU6+BAdlBaxHqikB6Lku3A= +github.com/getlantern/golog v0.0.0-20190830074920-4ef2e798c2d7 h1:guBYzEaLz0Vfc/jv0czrr2z7qyzTOGC9hiQ0VC+hKjk= +github.com/getlantern/golog v0.0.0-20190830074920-4ef2e798c2d7/go.mod h1:zx/1xUUeYPy3Pcmet8OSXLbF47l+3y6hIPpyLWoR9oc= +github.com/getlantern/hex v0.0.0-20190417191902-c6586a6fe0b7 h1:micT5vkcr9tOVk1FiH8SWKID8ultN44Z+yzd2y/Vyb0= +github.com/getlantern/hex v0.0.0-20190417191902-c6586a6fe0b7/go.mod h1:dD3CgOrwlzca8ed61CsZouQS5h5jIzkK9ZWrTcf0s+o= +github.com/getlantern/hidden v0.0.0-20190325191715-f02dbb02be55 h1:XYzSdCbkzOC0FDNrgJqGRo8PCMFOBFL9py72DRs7bmc= +github.com/getlantern/hidden v0.0.0-20190325191715-f02dbb02be55/go.mod h1:6mmzY2kW1TOOrVy+r41Za2MxXM+hhqTtY3oBKd2AgFA= +github.com/getlantern/ops v0.0.0-20190325191751-d70cb0d6f85f h1:wrYrQttPS8FHIRSlsrcuKazukx/xqO/PpLZzZXsF+EA= +github.com/getlantern/ops v0.0.0-20190325191751-d70cb0d6f85f/go.mod h1:D5ao98qkA6pxftxoqzibIBBrLSUli+kYnJqrgBf9cIA= +github.com/getlantern/systray v1.2.2 h1:dCEHtfmvkJG7HZ8lS/sLklTH4RKUcIsKrAD9sThoEBE= +github.com/getlantern/systray v1.2.2/go.mod h1:pXFOI1wwqwYXEhLPm9ZGjS2u/vVELeIgNMY5HvhHhcE= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= @@ -67,6 +81,8 @@ github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE= github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78= +github.com/go-stack/stack v1.8.0 h1:5SgMzNM5HxrEjV0ww2lTmX6E2Izsfxas4+YHWRs3Lsk= +github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= @@ -114,6 +130,8 @@ github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0 github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/lucasb-eyer/go-colorful v1.3.0 h1:2/yBRLdWBZKrf7gB40FoiKfAWYQ0lqNcbuQwVHXptag= github.com/lucasb-eyer/go-colorful v1.3.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= +github.com/lxn/walk v0.0.0-20210112085537-c389da54e794/go.mod h1:E23UucZGqpuUANJooIbHWCufXvOcT6E7Stq81gU+CSQ= +github.com/lxn/win v0.0.0-20210218163916-a377121e959e/go.mod h1:KxxjdtRkfNoYDCUP5ryK7XJJNTnpC8atvtmTheChOtk= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4= @@ -130,6 +148,8 @@ github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk= github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w= github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= +github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c h1:rp5dCmg/yLR3mgFuSOe4oEnDDmGLROTvMragMUXpTQw= +github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c/go.mod h1:X07ZCGwUbLaax7L0S3Tw4hpejzu63ZrrQiUe6W0hcy0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= @@ -140,6 +160,7 @@ github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUc github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sahilm/fuzzy v0.1.1 h1:ceu5RHF8DGgoi+/dR5PsECjCDH1BE3Fnmpo7aVXOdRA= github.com/sahilm/fuzzy v0.1.1/go.mod h1:VFvziUEIMCrT6A6tw2RFIXPXXmzXbOsSHF0DOI8ZK9Y= +github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966/go.mod h1:sUM3LWHvSMaG192sy56D9F7CNvL7jUJVXoqM1QKLnog= github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU= github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4= github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= @@ -148,6 +169,7 @@ github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3A github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= @@ -209,6 +231,7 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201018230417-eeed37f84f13/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -257,6 +280,7 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= +gopkg.in/Knetic/govaluate.v3 v3.0.0/go.mod h1:csKLBORsPbafmSCGTEh3U7Ozmsuq8ZSIlKk1bcqph0E= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/internal/agent/agent.go b/internal/agent/agent.go index 0acbee6..027cb63 100644 --- a/internal/agent/agent.go +++ b/internal/agent/agent.go @@ -17,6 +17,11 @@ type ContextReader interface { QueryMemory(ctx context.Context, query string) ([]string, error) } +type ToolRequest struct { + Command string + ResultChan chan<- string +} + type Agent struct { mic audio.Microphone speaker audio.Speaker @@ -29,6 +34,8 @@ type Agent struct { TextChan chan string // this is for tui text input TextResponseChan chan string // results for tui text resp ErrorChan chan error // websocket connection crashes + ToolApprovalChan chan ToolRequest + AllowedCmds sync.Map // session allowlist for shell commands } // initializer and orchestrates all hardware (2) and memory (1) moduels @@ -80,6 +87,7 @@ func NewAgent(mic audio.Microphone, speaker audio.Speaker, brain ContextReader, TextChan: make(chan string, 100), TextResponseChan: make(chan string, 100), ErrorChan: make(chan error, 10), + ToolApprovalChan: make(chan ToolRequest, 1), } return a } diff --git a/internal/agent/agent_test.go b/internal/agent/agent_test.go index 91b86c5..742fbfa 100644 --- a/internal/agent/agent_test.go +++ b/internal/agent/agent_test.go @@ -17,27 +17,53 @@ func (m *mockMic) CurrentAmplitude() float64 { ret type mockSpeaker struct{} -func (s *mockSpeaker) Play(pcm []byte) error { return nil } -func (s *mockSpeaker) Flush() {} -func (s *mockSpeaker) Close() error { return nil } +func (s *mockSpeaker) Play(pcm []byte) error { return nil } +func (s *mockSpeaker) Flush() {} +func (s *mockSpeaker) Close() error { return nil } func (s *mockSpeaker) CurrentAmplitude() float64 { return 0 } type mockBrain struct{} -func (b *mockBrain) GetImplicitContext(ctx context.Context) ([]string, error) { return nil, nil } +func (b *mockBrain) GetImplicitContext(ctx context.Context) ([]string, error) { return nil, nil } func (b *mockBrain) QueryMemory(ctx context.Context, query string) ([]string, error) { return nil, nil } // behavior +// verifies that connect returns an error on failure and can be called again with the same micChan +func TestAgent_ReconnectLoopRetries(t *testing.T) { + mic := &mockMic{} + a := agent.NewAgent(mic, &mockSpeaker{}, &mockBrain{}, nil, "FAKE_API_KEY") + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + micChan, _ := mic.StartCapture(ctx) + + // connect should fail fast (bad key) and be callable multiple times with the same micChan without panicking or blocking + attempts := 0 + for attempts < 3 { + err := a.Connect(ctx, micChan) + if err == nil { + t.Fatal("expected Connect to fail with bad API key") + } + attempts++ + } + + if attempts != 3 { + t.Fatalf("expected 3 reconnect attempts, got %d", attempts) + } +} + func TestAgent_ConnectFailsWithBadKey(t *testing.T) { // inject mocks and fake API key - a := agent.NewAgent(&mockMic{}, &mockSpeaker{}, &mockBrain{}, nil, "FAKE_API_KEY") + mic := &mockMic{} + a := agent.NewAgent(mic, &mockSpeaker{}, &mockBrain{}, nil, "FAKE_API_KEY") ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() - // connection failure to gemini - err := a.Connect(ctx) + micChan, _ := mic.StartCapture(ctx) + err := a.Connect(ctx, micChan) if err == nil { t.Fatal("Expected Connect to fail with a fake API key, but it succeeded..") diff --git a/internal/agent/connect.go b/internal/agent/connect.go index 65fca5c..6313291 100644 --- a/internal/agent/connect.go +++ b/internal/agent/connect.go @@ -15,7 +15,7 @@ import ( "google.golang.org/genai" ) -func (a *Agent) Connect(ctx context.Context) error { +func (a *Agent) Connect(ctx context.Context, micChan <-chan []byte) error { tracer := obs.GetTracer(ctx, "ora.agent") handshakeCtx, span := tracer.Start(ctx, "Agent.ConnectHandshake") @@ -105,6 +105,11 @@ func (a *Agent) Connect(ctx context.Context) error { slog.Debug("connected to Live API") defer session.Close() + // per-session context: cancels all goroutines for THIS session when Connect returns + // w/o this, textSendLoop and audioSendLoop from a dead session linger across reconnects + sessCtx, sessCancel := context.WithCancel(ctx) + defer sessCancel() + // kickstartttterrr a.writeMu.Lock() err = session.SendClientContent(genai.LiveSendClientContentParameters{ @@ -127,27 +132,19 @@ func (a *Agent) Connect(ctx context.Context) error { // handshake complete, end span span.End() - errChan := make(chan error, 1) // 1 slot error channel - - // receive loop - handles model responses and tool calls - go a.receiveLoop(ctx, session, a.GetModel(), errChan) - - micChan, err := a.mic.StartCapture(ctx) - if err != nil { - return fmt.Errorf("failed to start microphone: %w", err) - } - - // audio send loop - go a.audioSendLoop(ctx, session, micChan, errChan) + // Buffer 2: receiveLoop and audioSendLoop both write here; without room for both + // the second writer blocks forever if Connect has already returned on the first error. + errChan := make(chan error, 2) - // text send loop (uses textchan) - go a.textSendLoop(ctx, session) + go a.receiveLoop(sessCtx, session, a.GetModel(), errChan) + go a.audioSendLoop(sessCtx, session, micChan, errChan) + go a.textSendLoop(sessCtx, session) select { case err := <-errChan: return err - case <-ctx.Done(): - return ctx.Err() + case <-sessCtx.Done(): + return sessCtx.Err() } } @@ -174,6 +171,20 @@ func (a *Agent) receiveLoop(ctx context.Context, session *genai.Session, model s return } + // check for server-side barge-in (VAD) + if msg.ServerContent != nil && msg.ServerContent.Interrupted { + slog.Info("barge-in detected: server interrupted model generation") + a.speaker.Flush() + + // optional: send a visual cue to the UI + // TODO: remove in dev, or idk keep it + select { + case a.TextResponseChan <- "\n*[ora stopped]*\n": + default: + } + continue + } + // this has very interesting spanning logic if msg.ServerContent != nil && msg.ServerContent.ModelTurn != nil { _, turnSpan := otelTracer.Start(recvCtx, "Agent.ModelTurn") @@ -295,7 +306,7 @@ func (a *Agent) textSendLoop(ctx context.Context, session *genai.Session) { // this is voice haha a.speaker.Flush() slog.Debug("sending text to model", "text", text) - + a.writeMu.Lock() err := session.SendClientContent(genai.LiveSendClientContentParameters{ Turns: []*genai.Content{ @@ -308,7 +319,7 @@ func (a *Agent) textSendLoop(ctx context.Context, session *genai.Session) { }, }) a.writeMu.Unlock() - + if err != nil { slog.Error("failed to send text", "error", err) } @@ -317,4 +328,3 @@ func (a *Agent) textSendLoop(ctx context.Context, session *genai.Session) { } } } - diff --git a/internal/agent/tools.go b/internal/agent/tools.go index 6400110..151d2b1 100644 --- a/internal/agent/tools.go +++ b/internal/agent/tools.go @@ -89,6 +89,25 @@ func toolDefinitions() []*genai.Tool { }} } +func RunShellCommand(command string) string { + var cmd *exec.Cmd + if runtime.GOOS == "windows" { + cmd = exec.Command("powershell", "-Command", command) + } else { + cmd = exec.Command("sh", "-c", command) + } + + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Sprintf("error: %v\noutput: %s", err, string(output)) + } + result := string(output) + if len(result) > 2000 { + result = result[:2000] + "\n... (truncated)" + } + return result +} + // executeTool runs a tool and returns the result as a string // maybe this can be seperated into /agent/tools altogether later and be compiled with OS specific code? func (a *Agent) executeTool(name string, args map[string]any) string { @@ -98,24 +117,24 @@ func (a *Agent) executeTool(name string, args map[string]any) string { if !ok { return "error: command argument is required" } - slog.Warn("executing shell command", "command", command) - var cmd *exec.Cmd - if runtime.GOOS == "windows" { - cmd = exec.Command("powershell", "-Command", command) - } else { - cmd = exec.Command("sh", "-c", command) + // Check session allowlist + if _, allowed := a.AllowedCmds.Load(command); allowed { + slog.Info("executing auto-allowed shell command", "command", command) + return RunShellCommand(command) } - output, err := cmd.CombinedOutput() - if err != nil { - return fmt.Sprintf("error: %v\noutput: %s", err, string(output)) - } - result := string(output) - if len(result) > 2000 { - result = result[:2000] + "\n... (truncated)" + slog.Warn("intercepting shell command for HITL", "command", command) + + resChan := make(chan string, 1) + select { + case a.ToolApprovalChan <- ToolRequest{Command: command, ResultChan: resChan}: + default: + // TUI approval queue full — another tool is pending. Reject to unblock. + return "error: approval queue busy, command rejected" } - return result + + return <-resChan case "read_clipboard": var cmd *exec.Cmd diff --git a/internal/audio/capture_windows.go b/internal/audio/capture_windows.go index 6a263b1..a15da58 100644 --- a/internal/audio/capture_windows.go +++ b/internal/audio/capture_windows.go @@ -132,21 +132,20 @@ func (m *winMic) StartCapture(ctx context.Context) (<-chan []byte, error) { floatData := unsafe.Slice((*float32)(unsafe.Pointer(data)), frames) pcm = make([]byte, frames*2) - var maxAmp float32 + var sumSq float64 for i := 0; i < int(frames); i++ { + // windows gives 32 bit, api requires 16 bit val := float32ToInt16(floatData[i]) pcm[i*2] = byte(val) pcm[i*2+1] = byte(val >> 8) - // extra amp values for tui waveform - absVal := floatData[i] - if absVal < 0 { - absVal = -absVal - } - if absVal > maxAmp { - maxAmp = absVal - } + f := float64(floatData[i]) + sumSq += f * f } - m.currentAmp.Store(math.Float64bits(float64(maxAmp))) + rms := math.Sqrt(sumSq/float64(frames)) * 3.0 + if rms > 1.0 { + rms = 1.0 + } + m.currentAmp.Store(math.Float64bits(rms)) // releasing gives memory back to soundcard before we block further audio acc.ReleaseBuffer(frames) diff --git a/internal/audio/player.go b/internal/audio/player.go index d2425a5..e2eec8e 100644 --- a/internal/audio/player.go +++ b/internal/audio/player.go @@ -4,6 +4,7 @@ import ( "fmt" "io" "math" + "sync" "sync/atomic" "time" @@ -18,9 +19,8 @@ type otoPlayer struct { // we store WCA handles here if later needed for cleanup // Windows Core Audio 2006, lowest audio level possible, allows contains a share mode for multi-active-window mic capturing - player *oto.Player - streamer *audioStreamer - currentAmp atomic.Uint64 + player *oto.Player + streamer *audioStreamer } func NewSpeaker() (Speaker, error) { @@ -58,18 +58,6 @@ func NewSpeaker() (Speaker, error) { } func (p *otoPlayer) Play(pcm []byte) error { - var maxAmp int16 - for i := 0; i < len(pcm)-1; i += 2 { - sample := int16(pcm[i]) | int16(pcm[i+1])<<8 - if sample < 0 { - sample = -sample - } - if sample > maxAmp { - maxAmp = sample - } - } - p.currentAmp.Store(math.Float64bits(float64(maxAmp) / 32768.0)) - // drop the audio chunk here, and read() should pick it up // the backpressure is required in streaming media. natural backpressure forces the llm to wait for real time playback? // i think im right but i'll see? update: @@ -83,21 +71,24 @@ func (p *otoPlayer) Play(pcm []byte) error { } func (p *otoPlayer) CurrentAmplitude() float64 { - return math.Float64frombits(p.currentAmp.Load()) + return math.Float64frombits(p.streamer.currentAmp.Load()) } // INTERRUPT HANDLING HAHA func (p *otoPlayer) Flush() { // drain the chunks channel immediately - for len(p.streamer.chunks) > 0 { +drain: + for { select { case <-p.streamer.chunks: default: - return + break drain } } - // also clear the active buffer in the streamer + // also clear the active buffer in the streamer under lock — Read() runs on a separate OS audio thread + p.streamer.mu.Lock() p.streamer.buffer = nil + p.streamer.mu.Unlock() } func (p *otoPlayer) Close() error { @@ -107,36 +98,62 @@ func (p *otoPlayer) Close() error { } type audioStreamer struct { - chunks chan []byte - buffer []byte + mu sync.Mutex + chunks chan []byte + buffer []byte + currentAmp atomic.Uint64 } func (s *audioStreamer) Read(p []byte) (n int, err error) { - // if no current audio, check channel - if len(s.buffer) == 0 { + s.mu.Lock() + bufLen := len(s.buffer) + s.mu.Unlock() + + if bufLen == 0 { select { case chunk, ok := <-s.chunks: if !ok { return 0, io.EOF // close the channel + end of stream } + s.mu.Lock() s.buffer = chunk + s.mu.Unlock() default: // this should run when no audio is ready // returning silence for some time so that hardware doesn't deadlock // we fill the entire buffer p to maintain clock sync - silenceLen := len(p) // 24khz mono 16bit - for i := 0; i < silenceLen; i++ { + for i := range p { p[i] = 0 } + s.currentAmp.Store(0) time.Sleep(time.Millisecond) // we dont want cpu spinning and just blocking everything either - return silenceLen, nil + return len(p), nil } } // copy real audio (recoreded) into the destination buffer - + s.mu.Lock() n = copy(p, s.buffer) s.buffer = s.buffer[n:] + s.mu.Unlock() + + // rms amplitude scaled up to match perceived sensitivity of peak. + // Raw rms is ~3x lower than peak for speech; multiply to restore range. + samples := n / 2 + var sum float64 + for i := 0; i+1 < n; i += 2 { + s16 := float64(int16(p[i]) | int16(p[i+1])<<8) + sum += s16 * s16 + } + var rms float64 + if samples > 0 { + rms = math.Sqrt(sum/float64(samples)) / 32768.0 * 3.0 + if rms > 1.0 { + rms = 1.0 + } + } + s.currentAmp.Store(math.Float64bits(rms)) + return n, nil } diff --git a/internal/config/config.go b/internal/config/config.go new file mode 100644 index 0000000..8fe0e54 --- /dev/null +++ b/internal/config/config.go @@ -0,0 +1,60 @@ +package config + +import ( + "encoding/json" + "log/slog" + "os" + "path/filepath" + "time" +) + +type OraConfig struct { + Tracker TrackerConfig `json:"tracker"` +} + +type TrackerConfig struct { + Blocklist []string `json:"blocklist"` + DwellTime time.Duration `json:"dwell_time_ms"` +} + +// currently windows only +var DefaultBlocklist = []string{ + "1Password.exe", + "Bitwarden.exe", + "Taskmgr.exe", + "LockApp.exe", +} + +// get or create +func LoadConfig() OraConfig { + cfg := OraConfig{ + Tracker: TrackerConfig{ + Blocklist: DefaultBlocklist, + DwellTime: 3000, + }, + } + + configPath := filepath.Join("ora-db", "ora-config.json") + + if _, err := os.Stat(configPath); os.IsNotExist(err) { + slog.Info("Creating default config file", "path", configPath) + if err := os.MkdirAll("ora-db", 0755); err == nil { + data, _ := json.MarshalIndent(cfg, "", " ") + os.WriteFile(configPath, data, 0644) + } + return cfg + } + + data, err := os.ReadFile(configPath) + if err != nil { + slog.Error("Failed to read config file, using defaults", "error", err) + return cfg + } + + if err := json.Unmarshal(data, &cfg); err != nil { + slog.Error("Failed to parse config file, using defaults", "error", err) + return cfg + } + + return cfg +} diff --git a/internal/obs/telemetry.go b/internal/obs/telemetry.go index ff48d1c..ad1efc5 100644 --- a/internal/obs/telemetry.go +++ b/internal/obs/telemetry.go @@ -42,17 +42,31 @@ func InitTelemetry(ctx context.Context, isTest bool) (func(context.Context) erro logger := slog.New(&TraceHandler{handler: jsonHandler}) slog.SetDefault(logger) + noop := func(ctx context.Context) error { return nil } + if isTest { slog.Debug("Running in test mode, bypassing OTLP exporter setup.") - return func(ctx context.Context) error { return nil }, nil + return noop, nil } // setup otlp trace exporter (for any collector) // otlptracegrpc localhost:4317 // data moves to 4317 and then to any collector (whatever is setup) - exporter, err := otlptracegrpc.New(ctx, otlptracegrpc.WithInsecure()) + // but now, we only enable OTLP exporter if endpoint is explicitly configured + // mosts won't have Jaeger running; blocking on gRPC dial added 3-5s startup cost + endpoint := os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT") + if endpoint == "" { + slog.Info("tracing disabled (set OTEL_EXPORTER_OTLP_ENDPOINT=localhost:4317 to enable)") + return noop, nil + } + + exporter, err := otlptracegrpc.New(ctx, + otlptracegrpc.WithInsecure(), + otlptracegrpc.WithEndpoint(endpoint), + ) if err != nil { - return nil, fmt.Errorf("failed to create OTLP trace exporter: %w", err) + slog.Warn("failed to create OTLP exporter, tracing disabled", "error", err) + return noop, nil } res, _ := resource.New(ctx, diff --git a/internal/tracker/daemon.go b/internal/tracker/daemon.go index d22274b..e17691c 100644 --- a/internal/tracker/daemon.go +++ b/internal/tracker/daemon.go @@ -3,6 +3,7 @@ package tracker import ( "context" "log/slog" + "strings" "time" "ora/internal/obs" @@ -13,24 +14,31 @@ import ( type Daemon struct { eye Tracker interval time.Duration + dwellTime time.Duration + blocklist []string eventChan chan Activity } -func NewDaemon(eye Tracker, interval time.Duration, eventChan chan Activity) *Daemon { +func NewDaemon(eye Tracker, interval time.Duration, dwellTime time.Duration, blocklist []string, eventChan chan Activity) *Daemon { if interval <= 0 { interval = 2 * time.Second // default polling } + if dwellTime <= 0 { + dwellTime = 3 * time.Second // default dwell time + } - return &Daemon{eye, interval, eventChan} + return &Daemon{eye, interval, dwellTime, blocklist, eventChan} } func (d *Daemon) Start(ctx context.Context) { ticker := time.NewTicker(d.interval) defer ticker.Stop() - // short term mem (empty at first) var lastActivity *Activity - var lastEmitTime time.Time + var pendingActivity *Activity + var pendingSince time.Time + var emittedCurrent bool + tracer := obs.GetTracer(ctx, "ora.tracker") for { @@ -51,24 +59,64 @@ func (d *Daemon) Start(ctx context.Context) { continue } + blocked := false + for _, blockedApp := range d.blocklist { + if strings.EqualFold(activity.App, blockedApp) { + blocked = true + break + } + } + + if blocked { + span.SetAttributes(attribute.Bool("tracker.blocked", true)) + pendingActivity = nil + span.End() + continue + } + // updates at every logged window change or 10 min heartbeat - changed := lastActivity != nil && (activity.App != lastActivity.App || activity.Title != lastActivity.Title) - if lastActivity == nil || changed || time.Since(lastEmitTime) > 10*time.Minute { + changed := lastActivity == nil || activity.App != lastActivity.App || activity.Title != lastActivity.Title + + if changed { + lastActivity = activity + pendingActivity = activity + pendingSince = time.Now() + emittedCurrent = false span.SetAttributes( - attribute.Bool("tracker.changed", changed), + attribute.Bool("tracker.changed", true), attribute.String("tracker.app", activity.App), + attribute.String("tracker.state", "pending"), ) - lastActivity = activity - lastEmitTime = time.Now() - select { - case d.eventChan <- *activity: // successfully pushed - case <-ctx.Done(): - span.End() - return // user exit + pipe full // safe exit + } else if pendingActivity != nil && !emittedCurrent { + // window hasn't changed, check if it met dwell time + if time.Since(pendingSince) >= d.dwellTime { + span.SetAttributes( + attribute.Bool("tracker.changed", false), + attribute.String("tracker.state", "emitted"), + ) + + slog.Info("activity tracked", "app", pendingActivity.App, "title", pendingActivity.Title) + + select { + case d.eventChan <- *pendingActivity: // successfully pushed + emittedCurrent = true + case <-ctx.Done(): + span.End() + return // user exit + pipe full // safe exit + } + } else { + span.SetAttributes( + attribute.Bool("tracker.changed", false), + attribute.String("tracker.state", "dwelling"), + ) } } else { - span.SetAttributes(attribute.Bool("tracker.changed", false)) + span.SetAttributes( + attribute.Bool("tracker.changed", false), + attribute.String("tracker.state", "idle"), + ) } + span.End() } } diff --git a/internal/tracker/daemon_test.go b/internal/tracker/daemon_test.go index 82906f2..92be0a2 100644 --- a/internal/tracker/daemon_test.go +++ b/internal/tracker/daemon_test.go @@ -23,46 +23,52 @@ func (m *mockTracker) GetActiveWindow() (*tracker.Activity, error) { return resp, nil } -func TestDaemon_OnlyLogOnWindowsChange(t *testing.T) { +func TestDaemon_DwellTimeAndBlocklist(t *testing.T) { mockEye := &mockTracker{ responses: []*tracker.Activity{ - {App: "Code.exe", Title: "main.go - VSCode"}, - {App: "Code.exe", Title: "main.go - VSCode"}, // to ignore - {App: "chrome.exe", Title: "Go docs"}, // to log + {App: "1Password.exe", Title: "Vault"}, // tick 1: blocked + {App: "1Password.exe", Title: "Vault"}, // tick 2: blocked + {App: "Code.exe", Title: "main.go"}, // tick 3: pending + {App: "chrome.exe", Title: "Go docs"}, // tick 4: transient! Code.exe discarded, chrome.exe pending + {App: "chrome.exe", Title: "Go docs"}, // tick 5 + {App: "chrome.exe", Title: "Go docs"}, // tick 6 + {App: "chrome.exe", Title: "Go docs"}, // tick 7: definitely emitted + {App: "Code.exe", Title: "main.go"}, // tick 8: pending + {App: "Code.exe", Title: "main.go"}, // tick 9 + {App: "Code.exe", Title: "main.go"}, // tick 10: definitely emitted }, } eventChan := make(chan tracker.Activity, 10) - daemon := tracker.NewDaemon(mockEye, 10*time.Millisecond, eventChan) + // poll every 50ms, Dwell time requires 2 ticks (100ms) + daemon := tracker.NewDaemon(mockEye, 50*time.Millisecond, 100*time.Millisecond, []string{"1Password.exe", "Taskmgr.exe"}, eventChan) ctx, cancel := context.WithCancel(context.Background()) go daemon.Start(ctx) - time.Sleep(50 * time.Millisecond) - - // verification var receivedEvents []tracker.Activity - timeout := time.After(150 * time.Millisecond) + timeout := time.After(1 * time.Second) - for i := 0; i < 2; i++ { +CollectLoop: + for { select { case ev := <-eventChan: receivedEvents = append(receivedEvents, ev) case <-timeout: - t.Fatalf("Test timed out waiting for events. Only got %d", len(receivedEvents)) + cancel() + break CollectLoop } } - cancel() - if len(receivedEvents) != 2 { - t.Fatalf("Expected 2 events, got %d: %+v", len(receivedEvents), receivedEvents) + t.Fatalf("Expected exactly 2 events (Chrome, Code), got %d: %+v", len(receivedEvents), receivedEvents) } - if receivedEvents[1].App != "chrome.exe" { - t.Errorf("Expected second event to be chrome, got %s", receivedEvents[1].App) + if receivedEvents[0].App != "chrome.exe" { + t.Errorf("Expected first event to be chrome.exe, got %s", receivedEvents[0].App) + } + if receivedEvents[1].App != "Code.exe" { + t.Errorf("Expected second event to be Code.exe, got %s", receivedEvents[1].App) } - - t.Logf("Capture verified: Tracked %d distinct window changes.", len(receivedEvents)) } diff --git a/internal/ui/constants.go b/internal/ui/constants.go index 9a9fa86..2933973 100644 --- a/internal/ui/constants.go +++ b/internal/ui/constants.go @@ -4,7 +4,7 @@ package ui const ( MinViewportHeight = 5 LayoutPadding = 6 - SignalFieldHeight = 5 + SignalFieldHeight = 7 GutterWidth = 10 DefaultWidth = 80 MaxTextareaHeight = 6 diff --git a/internal/ui/layout.go b/internal/ui/layout.go index 67651e1..73c7e25 100644 --- a/internal/ui/layout.go +++ b/internal/ui/layout.go @@ -8,35 +8,59 @@ import ( ) func (m *model) renderHeader() string { - headerLeft := lipgloss.JoinHorizontal(lipgloss.Center, - m.styles.OraLogo.Render("ORA"), + headerLeft := m.styles.OraLogo.Render("ORA") + + var trackPill string + if m.daemonOK { + trackPill = lipgloss.NewStyle(). + Foreground(m.styles.Green). + Background(m.styles.BgHeader). + Padding(0, 1). + Render("⊙ tracking") + } else { + trackPill = lipgloss.NewStyle(). + Foreground(m.styles.Muted). + Background(m.styles.BgHeader). + Padding(0, 1). + Render("○ no tracker") + } + + var connPill string + if m.isConnected { + connPill = m.styles.LivePill.Render("● live") + } else { + connPill = lipgloss.NewStyle(). + Foreground(lipgloss.Color("#f59e0b")). + Background(lipgloss.Color("#16100a")). + Padding(0, 1). + Render("● reconnecting") + } + + headerRight := lipgloss.JoinHorizontal(lipgloss.Center, + trackPill, m.styles.HeaderSep.Render("·"), - m.styles.HeaderPath.Render("~/dev/ora"), + connPill, ) - headerRight := m.styles.LivePill.Render("● live") - - // spacer math is headache, we force it to fill the gap spacer := strings.Repeat(" ", max(0, m.width-lipgloss.Width(headerLeft)-lipgloss.Width(headerRight)-8)) - header := lipgloss.JoinHorizontal(lipgloss.Center, headerLeft, spacer, headerRight) return m.styles.Header.Width(m.width).Render(header) } // this field handles the microphone and speaker waveforms func (m *model) renderSignalField() string { - if m.mode == ModeText || m.agent.IsMuted() { + if m.mode == ModeText { return "" } micView := m.micWave.Render(m.styles.WaveUser, "MICROPHONE") speakerView := m.speakerWave.Render(m.styles.WaveOra, "ORA VOICE") - // spacer needs to be as tall as the waveforms (3 lines) to avoid holes below it + // spacer needs to be as tall as the waveforms (label + 4 braille rows = 5) to avoid holes below it spacer := lipgloss.NewStyle(). Background(m.styles.BgInput). Width(10). - Height(3). + Height(5). Render("") waves := lipgloss.JoinHorizontal(lipgloss.Top, micView, spacer, speakerView) @@ -62,24 +86,35 @@ func (m *model) renderInput() string { modeHint = m.styles.KbdLabel.Render("both: voice+text") } + ramHint := m.styles.KbdLabel.Render(m.cachedRAM) + hints := lipgloss.JoinHorizontal(lipgloss.Center, m.styles.KbdKey.Render("↵"), m.styles.KbdLabel.Render("send"), sep, m.styles.KbdKey.Render("/context"), m.styles.KbdLabel.Render("workspace"), sep, m.styles.KbdKey.Render("/voice"), m.styles.KbdLabel.Render("toggle"), sep, - m.styles.KbdKey.Render("mode"), modeHint, + m.styles.KbdKey.Render("mode"), modeHint, sep, + m.styles.KbdKey.Render("ram"), ramHint, ) - // 1. Render the command menu if active + // 1. Render the menu depending on state var inputDeck string - if m.showCmdList { + if m.mode == ModeToolConfirm { + menu := lipgloss.NewStyle(). + Background(m.styles.BgInput). + PaddingTop(1). + Render(m.hitlList.View()) + + inputDeck = lipgloss.JoinVertical(lipgloss.Left, inpRow, menu) + } else if m.showCmdList { menu := lipgloss.NewStyle(). Background(m.styles.BgInput). + PaddingTop(1). Render(m.cmdList.View()) // Show menu below the input inputDeck = lipgloss.JoinVertical(lipgloss.Left, inpRow, menu) } else { - inputDeck = lipgloss.JoinVertical(lipgloss.Left, inpRow, lipgloss.NewStyle().Background(m.styles.BgInput).MarginTop(1).Render(hints)) + inputDeck = lipgloss.JoinVertical(lipgloss.Left, inpRow, lipgloss.NewStyle().Background(m.styles.BgInput).PaddingTop(1).Render(hints)) } return m.styles.InputWrap.Width(m.width).Render(inputDeck) diff --git a/internal/ui/list.go b/internal/ui/list.go index 44db818..60bc1d8 100644 --- a/internal/ui/list.go +++ b/internal/ui/list.go @@ -74,11 +74,29 @@ func newCommandList(s Styles) list.Model { l.SetShowHelp(false) // Styles for pagination - l.Styles.PaginationStyle = lipgloss.NewStyle().PaddingLeft(2).Foreground(s.Muted) + l.Styles.PaginationStyle = lipgloss.NewStyle().PaddingLeft(2).Foreground(s.Muted).Background(s.BgInput) return l } +func newHitlList(s Styles) list.Model { + items := []list.Item{ + commandItem{title: "Allow once", desc: "Execute this command and return the result"}, + commandItem{title: "Allow for session", desc: "Always execute this command without asking"}, + commandItem{title: "Reject", desc: "Cancel execution and notify the agent"}, + commandItem{title: "Suggest changes", desc: "Edit the command before running"}, + } + + l := list.New(items, commandDelegate{styles: s}, DefaultListWidth, DefaultListHeight) + l.SetShowTitle(false) + l.SetShowStatusBar(false) + l.SetFilteringEnabled(false) + l.SetShowHelp(false) + + l.Styles.PaginationStyle = lipgloss.NewStyle().PaddingLeft(2).Foreground(s.Muted).Background(s.BgInput) + + return l +} func FilterCommands(l *list.Model, query string) { allCommands := []list.Item{ commandItem{title: "voice", desc: "Switch to Voice-Only mode"}, diff --git a/internal/ui/message.go b/internal/ui/message.go index 71f6d0a..8d63f27 100644 --- a/internal/ui/message.go +++ b/internal/ui/message.go @@ -14,12 +14,11 @@ type Message struct { } func (m *model) renderMessage(msg Message, width int) string { - // Fixed width for alignment gutterWidth := GutterWidth contentWidth := width - gutterWidth - // Base row style with no background forced, to rely on lipgloss.Place - rowStyle := lipgloss.NewStyle().Width(width).UnsetBackground() + // Explicit background on every row prevents terminal bleed-through in viewport. + rowStyle := lipgloss.NewStyle().Width(width).Background(m.styles.BgViewport) var prefix, content string @@ -76,13 +75,20 @@ func (m *model) renderBanner(content string, width int) string { func (m *model) updateViewport() { var wrapped []string - // internal width accounting for the padding - renderWidth := m.viewport.Width - 4 + // account for the viewport's own padding (2,4) → 8 chars horizontal + renderWidth := m.viewport.Width - 8 + if renderWidth < 20 { + renderWidth = 20 + } + + // Styled blank line so the separator between messages has the same background + // as the viewport. Plain "\n\n" shows the terminal color through. + sep := lipgloss.NewStyle().Background(m.styles.BgViewport).Width(renderWidth).Render("") for _, msg := range m.messages { wrapped = append(wrapped, m.renderMessage(msg, renderWidth)) } - m.viewport.SetContent(strings.Join(wrapped, "\n\n")) + m.viewport.SetContent(strings.Join(wrapped, "\n"+sep+"\n")) m.viewport.GotoBottom() } diff --git a/internal/ui/styles.go b/internal/ui/styles.go index 6a000ae..86ce7fb 100644 --- a/internal/ui/styles.go +++ b/internal/ui/styles.go @@ -109,26 +109,27 @@ func DefaultStyles() Styles { Padding(0, 1) // chat block prefixes and colors - s.PrefixSystem = lipgloss.NewStyle().Foreground(s.Muted).Width(gutterWidth).Align(lipgloss.Right).PaddingRight(2) - s.TextSystem = lipgloss.NewStyle().Foreground(s.Muted) + s.PrefixSystem = lipgloss.NewStyle().Foreground(s.Muted).Background(s.BgViewport).Width(gutterWidth).Align(lipgloss.Right).PaddingRight(2) + s.TextSystem = lipgloss.NewStyle().Foreground(s.Muted).Background(s.BgViewport) - s.PrefixThought = lipgloss.NewStyle().Foreground(lipgloss.Color("#a78bfa")).Width(gutterWidth).Align(lipgloss.Right).PaddingRight(2) + s.PrefixThought = lipgloss.NewStyle().Foreground(lipgloss.Color("#a78bfa")).Background(s.BgViewport).Width(gutterWidth).Align(lipgloss.Right).PaddingRight(2) s.TextThought = lipgloss.NewStyle(). Foreground(lipgloss.Color("#c4b5fd")). + Background(s.BgThought). Italic(true). Border(lipgloss.NormalBorder(), false, false, false, true). BorderForeground(lipgloss.Color("#363650")). PaddingLeft(1) s.BgThought = lipgloss.Color("#13131a") // slightly lighter Zinc/Navy for thoughts - s.PrefixYou = lipgloss.NewStyle().Foreground(s.Green).Bold(true).Width(gutterWidth).Align(lipgloss.Right).PaddingRight(2) - s.TextYou = lipgloss.NewStyle().Foreground(lipgloss.Color("#a3e635")) + s.PrefixYou = lipgloss.NewStyle().Foreground(s.Green).Background(s.BgViewport).Bold(true).Width(gutterWidth).Align(lipgloss.Right).PaddingRight(2) + s.TextYou = lipgloss.NewStyle().Foreground(lipgloss.Color("#a3e635")).Background(s.BgViewport) - s.PrefixOra = lipgloss.NewStyle().Foreground(s.Purple).Bold(true).Width(gutterWidth).Align(lipgloss.Right).PaddingRight(2) - s.TextOra = lipgloss.NewStyle().Foreground(s.White) + s.PrefixOra = lipgloss.NewStyle().Foreground(s.Purple).Background(s.BgViewport).Bold(true).Width(gutterWidth).Align(lipgloss.Right).PaddingRight(2) + s.TextOra = lipgloss.NewStyle().Foreground(s.White).Background(s.BgViewport) - s.ToolDot = lipgloss.NewStyle().Foreground(s.Purple) - s.ToolText = lipgloss.NewStyle().Foreground(s.Gray).Italic(true) + s.ToolDot = lipgloss.NewStyle().Foreground(s.Purple).Background(s.BgViewport) + s.ToolText = lipgloss.NewStyle().Foreground(s.Gray).Background(s.BgViewport).Italic(true) // keyboard hints and input styling - locking to BgInput s.InputPrefix = lipgloss.NewStyle(). diff --git a/internal/ui/ui.go b/internal/ui/ui.go index 3cc8808..7ba00dc 100644 --- a/internal/ui/ui.go +++ b/internal/ui/ui.go @@ -1,6 +1,9 @@ package ui import ( + "context" + "fmt" + "runtime" "strings" "time" @@ -16,34 +19,41 @@ import ( type AgentMode string const ( - ModeBoth AgentMode = "both" - ModeVoice AgentMode = "voice" - ModeText AgentMode = "text" + ModeBoth AgentMode = "both" + ModeVoice AgentMode = "voice" + ModeText AgentMode = "text" + ModeToolConfirm AgentMode = "tool_confirm" + ModeToolEdit AgentMode = "tool_edit" ) type model struct { - agent *agent.Agent - viewport viewport.Model - textarea textarea.Model - styles Styles - messages []Message - lastSender string - lastUpdate time.Time - micWave *Waveform - speakerWave *Waveform - width int - height int - isThinking bool - mode AgentMode - cmdList list.Model - showCmdList bool + agent *agent.Agent + viewport viewport.Model + textarea textarea.Model + styles Styles + messages []Message + lastUpdate time.Time + micWave *Waveform + speakerWave *Waveform + width int + height int + isThinking bool + isConnected bool + daemonOK bool + mode AgentMode + cmdList list.Model + showCmdList bool + hitlList list.Model + activeToolReq *agent.ToolRequest + cachedRAM string + lastRAMCheck time.Time } type responseMsg string type tickMsg time.Time type errorMsg error -func NewModel(a *agent.Agent) model { +func NewModel(a *agent.Agent, daemonStatus string) model { s := DefaultStyles() ta := textarea.New() @@ -70,12 +80,30 @@ func NewModel(a *agent.Agent) model { ╚██████╔╝██║ ██║██║ ██║ ╚═════╝ ╚═╝ ╚═╝╚═╝ ╚═╝` + var daemonLine string + switch daemonStatus { + case "connected": + daemonLine = "tracker connected · activity memory active" + case "started": + daemonLine = "tracker started · activity memory active" + default: + if daemonStatus != "" { + daemonLine = "⚠ " + daemonStatus + } + } + + introContent := "\n\n\n\n\n\n" + banner + "\n\nambient OS companion · v0.1.1-alpha · type /help for commands" + if daemonLine != "" { + introContent += "\n" + daemonLine + } + introMsg := Message{ - Sender: "system", - // we use vertical space to push the banner to the middle-ish - Content: "\n\n\n\n\n\n" + banner + "\n\nambient OS companion · v0.1.1-alpha · type /help for commands", + Sender: "system", + Content: introContent, } + daemonOK := daemonStatus == "connected" || daemonStatus == "started" + return model{ agent: a, styles: s, @@ -86,15 +114,18 @@ func NewModel(a *agent.Agent) model { speakerWave: NewWaveform(40), mode: ModeBoth, cmdList: newCommandList(s), + hitlList: newHitlList(s), + isConnected: true, + daemonOK: daemonOK, } } func (m model) Init() tea.Cmd { - // start everything at once return tea.Batch( textarea.Blink, m.waitForResponse(), m.waitForError(), + m.waitForToolRequest(), m.doTick(), ) } @@ -126,6 +157,16 @@ func (m model) waitForResponse() tea.Cmd { } } +func (m model) waitForToolRequest() tea.Cmd { + return func() tea.Msg { + req, ok := <-m.agent.ToolApprovalChan + if !ok { + return nil + } + return req + } +} + func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) { var ( tiCmd tea.Cmd @@ -134,7 +175,18 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) { // all slash cmds, and tui updates switch msg := msg.(type) { + case agent.ToolRequest: + m.activeToolReq = &msg + m.mode = ModeToolConfirm + m.messages = append(m.messages, Message{Sender: "tool", Content: "Ora wants to execute:\n " + msg.Command, IsTool: true}) + m.updateViewport() + return m, m.waitForToolRequest() + case tea.MouseMsg: + if m.mode == ModeToolConfirm { + m.hitlList, _ = m.hitlList.Update(msg) + return m, nil + } if m.showCmdList { m.cmdList, _ = m.cmdList.Update(msg) m.textarea, tiCmd = m.textarea.Update(msg) @@ -145,6 +197,71 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) { return m, tea.Batch(vpCmd, tiCmd) case tea.KeyMsg: + if m.mode == ModeToolConfirm { + switch msg.Type { + case tea.KeyUp, tea.KeyDown: + var cmd tea.Cmd + m.hitlList, cmd = m.hitlList.Update(msg) + return m, cmd + case tea.KeyEnter: + if i, ok := m.hitlList.SelectedItem().(commandItem); ok { + switch i.title { + case "Allow once": + m.messages = append(m.messages, Message{Sender: "tool", Content: "Approved. Executing..."}) + go func(cmd string, c chan<- string) { + c <- agent.RunShellCommand(cmd) + }(m.activeToolReq.Command, m.activeToolReq.ResultChan) + m.mode = ModeBoth + case "Allow for session": + m.messages = append(m.messages, Message{Sender: "tool", Content: "Approved for session. Executing..."}) + m.agent.AllowedCmds.Store(m.activeToolReq.Command, true) + go func(cmd string, c chan<- string) { + c <- agent.RunShellCommand(cmd) + }(m.activeToolReq.Command, m.activeToolReq.ResultChan) + m.mode = ModeBoth + case "Reject": + m.messages = append(m.messages, Message{Sender: "tool", Content: "Command rejected."}) + m.activeToolReq.ResultChan <- "User rejected this command." + m.mode = ModeBoth + case "Suggest changes": + m.textarea.SetValue(m.activeToolReq.Command) + m.textarea.Focus() + m.mode = ModeToolEdit + } + m.activeToolReq = nil + m.updateViewport() + return m, nil + } + } + return m, nil + } + + if m.mode == ModeToolEdit { + if msg.Type == tea.KeyEnter { + editedCmd := strings.TrimSpace(m.textarea.Value()) + m.textarea.Reset() + m.messages = append(m.messages, Message{Sender: "tool", Content: "Executing modified command:\n " + editedCmd}) + m.updateViewport() + go func(cmd string, c chan<- string) { + c <- agent.RunShellCommand(cmd) + }(editedCmd, m.activeToolReq.ResultChan) + m.mode = ModeBoth + m.activeToolReq = nil + return m, nil + } + if msg.Type == tea.KeyEsc { + m.textarea.Reset() + m.messages = append(m.messages, Message{Sender: "tool", Content: "Edit cancelled. Command rejected."}) + m.activeToolReq.ResultChan <- "User rejected this command." + m.mode = ModeBoth + m.activeToolReq = nil + m.updateViewport() + return m, nil + } + m.textarea, tiCmd = m.textarea.Update(msg) + return m, tiCmd + } + // navigate through the menu list if m.showCmdList { switch msg.Type { @@ -177,6 +294,11 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) { m.executeCommand(input) m.textarea.Reset() m.showCmdList = false + } else if m.mode == ModeVoice { + // Voice-only: text sends are disabled. Only slash commands work. + m.messages = append(m.messages, Message{Sender: "system", Content: "Text input disabled in voice mode. Use /both to enable."}) + m.textarea.Reset() + m.updateViewport() } else { m.agent.TextChan <- input m.streamLine("you", input) @@ -199,9 +321,11 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) { return m, tiCmd case responseMsg: + m.isConnected = true m.streamLine("ora", string(msg)) return m, m.waitForResponse() case errorMsg: + m.isConnected = false m.streamLine("system", "CONNECTION CRITICAL: "+msg.Error()) return m, m.waitForError() @@ -214,6 +338,18 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) { m.speakerWave.Update(m.agent.GetSpeaker().CurrentAmplitude()) } + // ReadMemStats causes a GC stop-the-world — only sample every 5s + if time.Since(m.lastRAMCheck) > 5*time.Second { + var mem runtime.MemStats + runtime.ReadMemStats(&mem) + if mem.Sys >= 1024*1024*1024 { + m.cachedRAM = fmt.Sprintf("%.1f GB", float64(mem.Sys)/(1024*1024*1024)) + } else { + m.cachedRAM = fmt.Sprintf("%d MB", mem.Sys/(1024*1024)) + } + m.lastRAMCheck = time.Now() + } + // check if textarea needs to grow { val := m.textarea.Value() @@ -288,8 +424,10 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) { // handles the real-time streaming logic, keeps the viewport updated func (m *model) streamLine(sender, content string) { - // If in voice mode, drop standard Ora text responses (keep thoughts) - if m.mode == ModeVoice && sender == "ora" && !strings.Contains(content, "**") && !m.isThinking { + // In voice mode, Ora's audio plays through the speaker — drop text responses + // from the viewport (they'd be incomplete fragments anyway). Keep thoughts, + // system messages, and tool messages always visible. + if m.mode == ModeVoice && sender == "ora" && !m.isThinking { return } @@ -300,7 +438,6 @@ func (m *model) streamLine(sender, content string) { if content != "" { m.messages = append(m.messages, Message{Sender: sender, Content: content, IsThought: m.isThinking}) - m.lastSender = "" } } else if len(m.messages) > 0 && m.messages[len(m.messages)-1].Sender == sender && m.messages[len(m.messages)-1].IsThought == m.isThinking && !m.messages[len(m.messages)-1].IsTool { // append to the last message if it's the same sender and state @@ -308,7 +445,6 @@ func (m *model) streamLine(sender, content string) { } else { // start a new message block m.messages = append(m.messages, Message{Sender: sender, Content: content, IsThought: m.isThinking}) - m.lastSender = sender } // throttle viewport updates so it doesn't flicker too much @@ -323,26 +459,52 @@ func (m *model) executeCommand(input string) { case "/voice": m.mode = ModeVoice m.agent.SetMute(false) - m.messages = append(m.messages, Message{Sender: "system", Content: "Switched to Voice-Only Mode"}) + m.textarea.Placeholder = "voice active — /both or /text to enable typing" + m.messages = append(m.messages, Message{Sender: "system", Content: "Voice-Only Mode — mic active, text input disabled"}) case "/text": m.mode = ModeText m.agent.SetMute(true) - m.messages = append(m.messages, Message{Sender: "system", Content: "Switched to Text-Only Mode (Microphone paused)"}) + m.textarea.Placeholder = "ask anything, or /command" + m.messages = append(m.messages, Message{Sender: "system", Content: "Text Mode — mic paused, type to interact"}) case "/both": m.mode = ModeBoth m.agent.SetMute(false) - m.messages = append(m.messages, Message{Sender: "system", Content: "Switched to Voice + Text Mode"}) + m.textarea.Placeholder = "ask anything, or /command" + m.messages = append(m.messages, Message{Sender: "system", Content: "Voice + Text Mode — mic active, text input enabled"}) case "/mute": - isMuted := m.agent.ToggleMute() - status := "unmuted" - if isMuted { - status = "muted" + if m.mode == ModeText { + m.messages = append(m.messages, Message{Sender: "system", Content: "Already in Text Mode — mic is paused. Use /both to re-enable."}) + } else if m.mode == ModeVoice { + m.messages = append(m.messages, Message{Sender: "system", Content: "Cannot mute in Voice Mode. Use /both first, then /mute."}) + } else { + isMuted := m.agent.ToggleMute() + status := "unmuted" + if isMuted { + status = "muted" + } + m.messages = append(m.messages, Message{Sender: "system", Content: "Microphone " + status}) } - m.messages = append(m.messages, Message{Sender: "system", Content: "Microphone globally " + status}) case "/clear": m.messages = []Message{} + case "/help": + helpText := `Available Commands: + /voice - Switch to Voice-Only mode + /text - Switch to Text-Only mode + /both - Switch to Voice + Text mode + /mute - Toggle global microphone mute + /context - View the semantic memory currently loaded + /clear - Clear the chat screen + /help - Show this help menu` + m.messages = append(m.messages, Message{Sender: "system", Content: helpText}) + case "/context": + importCtx, err := m.agent.GetBrain().GetImplicitContext(context.Background()) + if err != nil { + m.messages = append(m.messages, Message{Sender: "system", Content: "Failed to fetch context: " + err.Error()}) + } else { + m.messages = append(m.messages, Message{Sender: "system", Content: "Active Context Window:\n" + strings.Join(importCtx, "\n")}) + } default: - m.messages = append(m.messages, Message{Sender: "tool", Content: "Executed command: " + input, IsTool: true}) + m.messages = append(m.messages, Message{Sender: "system", Content: "Unknown command: " + input + ". Type /help for available commands."}) } m.updateViewport() } @@ -377,9 +539,8 @@ func max(a, b int) int { return b } -func Run(a *agent.Agent) error { - // allows for clickable terminal - p := tea.NewProgram(NewModel(a), tea.WithAltScreen(), tea.WithMouseCellMotion()) +func Run(a *agent.Agent, daemonStatus string) error { + p := tea.NewProgram(NewModel(a, daemonStatus), tea.WithAltScreen(), tea.WithMouseCellMotion()) _, err := p.Run() return err } diff --git a/internal/ui/waveform.go b/internal/ui/waveform.go index b1fc4ae..7a637d5 100644 --- a/internal/ui/waveform.go +++ b/internal/ui/waveform.go @@ -1,21 +1,54 @@ package ui import ( + "math" "strings" "github.com/charmbracelet/lipgloss" ) -/* - Waveform expectations (this one is a sine wave) - ⣤⣶⣶⣿⣿⣿⣿⣿⣶⣶⣤⣤⣀⣀⣀⣀⠀⣀⣀⣀⣀⣤⣶⣶⣿⣿⣿⣿⣿⣶⣶⣤⣤⣀⣀⣀⠀⠀⣀⣀⣀⣀⣤⣶⣶⣿⣿⣿⣿⣿⣶⣶⣤⣤⣀⣀⣀⠀⠀⣀ - ⠛⠿⠿⣿⣿⣿⣿⣿⠿⠿⠛⠛⠉⠉⠉⠉⠀⠉⠉⠉⠉⠛⠿⠿⣿⣿⣿⣿⣿⠿⠿⠛⠛⠉⠉⠉⠀⠀⠉⠉⠉⠉⠛⠿⠿⣿⣿⣿⣿⣿⠿⠿⠛⠛⠉⠉⠉⠀⠀⠉ -*/ - -// god bless gemini for this one +// Waveform renders a live symmetric audio bar visualizer using Braille characters. +// +// Each column has a fixed variation factor producing irregular bar heights — +// taller bars respond more to loud audio, creating an organic spectrogram feel. +// Bars extend symmetrically above and below a center baseline. +// Silent columns show a dim gray centerline. Speaking columns fill in active color. +// +// silent: ⣀⣀⣀⣀⣀⣀⣀⣀⣀⣀⣀⣀⣀⣀⣀⣀⣀⣀⣀⣀ +// ⠉⠉⠉⠉⠉⠉⠉⠉⠉⠉⠉⠉⠉⠉⠉⠉⠉⠉⠉⠉ +// active: ⣿⣶⣤⣀⣤⣿⣦⣀⣤⣶⣿⣷⣤⣀⣤⣿⣶⣤⣀⣤ +// ⣿⣿⣿⣀⣿⣿⣿⣀⣿⣿⣿⣿⣿⣀⣿⣿⣿⣀⣿⣿ type Waveform struct { - history []float64 - width int + smoothed float64 + variation []float64 + width int +} + +const dimColor = lipgloss.Color("#3a3a4a") +const ampThreshold = 0.01 + +// riseAlpha: how fast amplitude rises (0=no rise, 1=instant). +// fallAlpha: how fast amplitude falls — 0.26 reaches near-zero in ~500ms (clear pauses, not jumpy). +const riseAlpha = 0.85 +const fallAlpha = 0.26 + +// row0Levels: braille masks for top char (row0), filling from center upward. +// Index = number of dot-rows filled (0=empty, 4=full block). +var row0Levels = [5]rune{ + 0x2800, // 0: empty + 0x2800 | 0x40 | 0x80, // 1: dots 7,8 (bottom of top char = center) + 0x2800 | 0x40 | 0x80 | 0x04 | 0x20, // 2: + dots 3,6 + 0x2800 | 0x40 | 0x80 | 0x04 | 0x20 | 0x02 | 0x10, // 3: + dots 2,5 + 0x28FF, // 4: all dots +} + +// row1Levels: braille masks for bottom char (row1), filling from center downward. +var row1Levels = [5]rune{ + 0x2800, // 0: empty + 0x2800 | 0x01 | 0x08, // 1: dots 1,4 (top of bottom char = center) + 0x2800 | 0x01 | 0x08 | 0x02 | 0x10, // 2: + dots 2,5 + 0x2800 | 0x01 | 0x08 | 0x02 | 0x10 | 0x04 | 0x20, // 3: + dots 3,6 + 0x28FF, // 4: all dots } func NewWaveform(width int) *Waveform { @@ -23,119 +56,107 @@ func NewWaveform(width int) *Waveform { width = 40 } return &Waveform{ - history: make([]float64, width), - width: width, + variation: buildVariation(width), + width: width, } } +// buildVariation generates per-column multipliers (0.35–1.0) using overlapping +// sine waves so adjacent columns have correlated but irregular heights. +func buildVariation(width int) []float64 { + v := make([]float64, width) + for i := range v { + f := float64(i) + raw := math.Sin(f*0.9+0.4)*0.5 + math.Sin(f*0.3+1.1)*0.3 + math.Sin(f*1.7)*0.2 + // raw in ~[-1,1]; map to [0.35, 1.0] + v[i] = 0.35 + 0.65*(raw+1.0)/2.0 + } + return v +} + +// Update pushes a new amplitude sample. Rise is fast, fall is smoothed. func (w *Waveform) Update(amp float64) { - // push the new amplitude and slide the history - w.history = append(w.history[1:], amp) + if amp > w.smoothed { + w.smoothed = w.smoothed*(1-riseAlpha) + amp*riseAlpha + } else { + w.smoothed = w.smoothed*(1-fallAlpha) + amp*fallAlpha + } + if w.smoothed < ampThreshold { + w.smoothed = 0 + } } func (w *Waveform) SetWidth(width int) { if width <= 0 || width == w.width { return } - // resizing logic so we don't lose the waves when the window moves - newHistory := make([]float64, width) - copyLen := len(w.history) - if copyLen > width { - copyLen = width - } - copy(newHistory[width-copyLen:], w.history[len(w.history)-copyLen:]) w.width = width - w.history = newHistory + w.variation = buildVariation(width) } -// creates a symmetric waveform using multi-row Braille func (w *Waveform) Render(style lipgloss.Style, label string) string { - height := 2 // 2 rows of braille characters = 8 dots high. Very high res. - rows := make([]strings.Builder, height) + activeColor := style.GetForeground() + bg := style.GetBackground() - // Braille dot layout: - // 1 4 - // 2 5 - // 3 6 - // 7 8 + // 4 rows: [0]=far-top [1]=near-top [2]=near-bottom [3]=far-bottom + var rows [4]strings.Builder for i := 0; i < w.width; i++ { - amp := w.history[i] - - // Map 0.0-1.0 to 0-8 dots total height (4 dots per row) - totalDots := amp * float64(height) * 4.0 - if totalDots < 1.0 && amp > 0.01 { - totalDots = 1.0 - } - - // We want to fill dots from the middle outwards. - // Center is between row 0/1 and dots 2/3/4/5 - - for r := 0; r < height; r++ { - var char rune = 0x2800 - - // Dots to enable in this row (0 to 4 dots) - // Row 0 is top, Row 1 is bottom - // For 2 rows, center is between row 0 (bottom dots) and row 1 (top dots) - - for d := 0; d < 4; d++ { - // Distance from center line - // center = row 1 top (dist 0) - var dist float64 - if r == 0 { - // Row 0 dots (from bottom to top) - // dot 7,8: dist 0.5 - // dot 3,6: dist 1.5 - // dot 2,5: dist 2.5 - // dot 1,4: dist 3.5 - dist = float64(3-d) + 0.5 - } else { - // Row 1 dots (from top to bottom) - // dot 1,4: dist 0.5 - // dot 2,5: dist 1.5 - // dot 3,6: dist 2.5 - // dot 7,8: dist 3.5 - dist = float64(d) + 0.5 - } - - if dist <= totalDots/2.0 { - switch d { - case 0: - char |= 0x01 | 0x08 // 1,4 - case 1: - char |= 0x02 | 0x10 // 2,5 - case 2: - char |= 0x04 | 0x20 // 3,6 - case 3: - char |= 0x40 | 0x80 // 7,8 - } - } + colAmp := w.smoothed * w.variation[i] + active := colAmp >= ampThreshold + + var chs [4]rune + var color lipgloss.TerminalColor + + if !active { + chs[0] = row0Levels[0] // empty (far top) + chs[1] = row0Levels[1] // dim centerline (near top) + chs[2] = row1Levels[1] // dim centerline (near bottom) + chs[3] = row1Levels[0] // empty (far bottom) + color = dimColor + } else { + // gamma 0.42: close to original sensitivity, works with scaled RMS. + // 0.05→0.18, 0.1→0.27, 0.3→0.53, 0.7→0.82, 1.0→1.0 + scaled := math.Pow(colAmp, 0.42) + n := int(math.Round(scaled * 8)) + if n < 1 { + n = 1 + } + if n > 8 { + n = 8 + } + near := n + if near > 4 { + near = 4 + } + far := n - 4 + if far < 0 { + far = 0 } - rows[r].WriteRune(char) + chs[0] = row0Levels[far] // far top fills outward from row1 + chs[1] = row0Levels[near] // near top fills from center + chs[2] = row1Levels[near] // near bottom fills from center + chs[3] = row1Levels[far] // far bottom fills outward from row2 + color = activeColor } - } - - // use the background from the style if it exists, otherwise fallback to transparent - waveStyle := lipgloss.NewStyle().Foreground(style.GetForeground()) - if style.GetBackground() != nil { - waveStyle = waveStyle.Background(style.GetBackground()) - } - // ensure every line in the wave block has a background and fills the width - // otherwise we get "holes" where the background doesn't reach - var waveRows []string - for i := 0; i < height; i++ { - waveRows = append(waveRows, waveStyle.Width(w.width).Render(rows[i].String())) + cs := lipgloss.NewStyle().Foreground(color).Background(bg) + for r := range rows { + rows[r].WriteString(cs.Render(string(chs[r]))) + } } - // speaker title (like ORA VOICE) sits right above the wave + bgFill := lipgloss.NewStyle().Background(bg).Width(w.width) lblStyle := lipgloss.NewStyle(). - Foreground(style.GetForeground()). - Background(style.GetBackground()). // force background on label - Width(w.width). // force label to be as wide as the wave + Foreground(activeColor). + Background(bg). + Width(w.width). Bold(true) - // join the label and wave block vertically - elements := append([]string{lblStyle.Render(label)}, waveRows...) - return lipgloss.JoinVertical(lipgloss.Left, elements...) + elems := make([]string, 5) // label + 4 rows + elems[0] = lblStyle.Render(label) + for i := range rows { + elems[i+1] = bgFill.Render(rows[i].String()) + } + return lipgloss.JoinVertical(lipgloss.Left, elems...) } diff --git a/internal/ui/waveform_test.go b/internal/ui/waveform_test.go index 9c18f7e..6bf94a7 100644 --- a/internal/ui/waveform_test.go +++ b/internal/ui/waveform_test.go @@ -2,104 +2,98 @@ package ui import ( "fmt" - "math" "strings" "testing" "github.com/charmbracelet/lipgloss" ) -func TestWaveform_Logic(t *testing.T) { - width := 10 - w := NewWaveform(width) - - if len(w.history) != width { - t.Errorf("expected width %d, got %d", width, len(w.history)) - } - - for i := 0; i < width; i++ { - w.Update(float64(i) / 10.0) +func TestWaveform_InitialisedToZero(t *testing.T) { + w := NewWaveform(10) + if w.smoothed != 0 { + t.Errorf("expected smoothed=0, got %f", w.smoothed) } - - if w.history[width-1] != 0.9 { - t.Errorf("expected latest value 0.9, got %f", w.history[width-1]) + if w.width != 10 || len(w.variation) != 10 { + t.Errorf("expected width=10 variation=10, got width=%d variation=%d", w.width, len(w.variation)) } +} - // update once more and check scroll +func TestWaveform_RisesFast(t *testing.T) { + w := NewWaveform(10) w.Update(1.0) - if w.history[0] != 0.1 { - t.Errorf("expected scroll: first value should be 0.1, got %f", w.history[0]) + if w.smoothed < 0.5 { + t.Errorf("expected fast rise toward 1.0, got %f", w.smoothed) } } -func TestWaveform_VisualDemo(t *testing.T) { - /* - NOTE - This test is intended for manual visual verification - Run with: go test -v ./internal/ui - */ - width := 60 - w := NewWaveform(width) - style := lipgloss.NewStyle().Foreground(lipgloss.Color("#AF87FF")) - - fmt.Println("\n--- Waveform Visual Test (Sine Wave) ---") - - // a simple sine wave pattern - for i := 0; i < width; i++ { - amp := 0.5 + 0.5*math.Sin(float64(i)*0.3) - w.Update(amp) +func TestWaveform_FallingEdgeIsSmoothed(t *testing.T) { + w := NewWaveform(10) + // drive up + for range 5 { + w.Update(1.0) } - - output := w.Render(style, "SINE TEST") - fmt.Println(output) - fmt.Println("----------------------------------------") - - if !strings.Contains(output, "SINE TEST") { - t.Error("Render output missing label") + high := w.smoothed + if high < 0.5 { + t.Fatalf("expected high amplitude after repeated Update(1.0), got %f", high) + } + // single zero tick should not drop to zero + w.Update(0.0) + if w.smoothed == 0 { + t.Error("expected smoothed fall, not instant drop to 0") + } + if w.smoothed >= high { + t.Errorf("expected decay below peak %f, got %f", high, w.smoothed) } } -func TestWaveform_Resize(t *testing.T) { - /* - NOTE - This test is intended for manual visual verification - Run with: go test -v ./internal/ui - */ - - style := lipgloss.NewStyle().Foreground(lipgloss.Color("#7D7D7D")) - - fmt.Println("\n--- Waveform Resize Test ---") - - w := NewWaveform(30) - // ramp up - for i := 0; i < 30; i++ { - w.Update(float64(i) / 30.0) +func TestWaveform_VariationNonZero(t *testing.T) { + w := NewWaveform(20) + for i, v := range w.variation { + if v <= 0 || v > 1.0 { + t.Errorf("variation[%d]=%f out of (0,1] range", i, v) + } } - fmt.Println("Original (Width 30):") - fmt.Println(w.Render(style, "WIDTH 30")) - - // expand the pattern - w.SetWidth(60) - fmt.Println("\nExpanded (Width 60 - Pattern should be on the right):") - fmt.Println(w.Render(style, "WIDTH 60")) +} - if len(w.history) != 60 { - t.Errorf("expected width 60, got %d", len(w.history)) +func TestWaveform_SetWidth(t *testing.T) { + w := NewWaveform(20) + w.SetWidth(40) + if w.width != 40 || len(w.variation) != 40 { + t.Errorf("expected width=40, got width=%d variation=%d", w.width, len(w.variation)) } - if w.history[59] < 0.9 { - t.Error("Resize (expand) lost history data at the end") + w.SetWidth(0) // invalid — no-op + if w.width != 40 { + t.Errorf("SetWidth(0) should be no-op, got %d", w.width) } +} - // shrink the pattern - w.SetWidth(15) - fmt.Println("\nShrunk (Width 15 - Only the latest ramp should be visible):") - fmt.Println(w.Render(style, "WIDTH 15")) +func TestWaveform_RenderContainsLabel(t *testing.T) { + w := NewWaveform(20) + style := lipgloss.NewStyle().Foreground(lipgloss.Color("#AF87FF")) + out := w.Render(style, "MICROPHONE") + if !strings.Contains(out, "MICROPHONE") { + t.Error("Render missing label") + } +} + +func TestWaveform_VisualDemo(t *testing.T) { + w := NewWaveform(40) + style := lipgloss.NewStyle().Foreground(lipgloss.Color("#10b981")) + + fmt.Println("\n--- silent ---") + fmt.Println(w.Render(style, "SILENT")) - if len(w.history) != 15 { - t.Errorf("expected width 15, got %d", len(w.history)) + // drive amplitude up + for _, a := range []float64{0.1, 0.3, 0.6, 0.9, 1.0, 0.8, 0.5, 0.3, 0.1, 0} { + w.Update(a) } - if w.history[14] < 0.9 { - t.Error("Resize (shrink) lost history data at the end") + fmt.Println("\n--- speaking ---") + fmt.Println(w.Render(style, "SPEAKING")) + + // let it decay + for range 10 { + w.Update(0) } - fmt.Println("----------------------------") + fmt.Println("\n--- after decay ---") + fmt.Println(w.Render(style, "DECAYED")) }