From 7672791afc695c207610191fc9d5420b932c8182 Mon Sep 17 00:00:00 2001
From: GitHub Copilot <copilot@github.com>
Date: Fri, 13 Mar 2026 19:14:07 -0300
Subject: [PATCH] feat(tui): implement F42 step filters in dashboard

Implements F42-tui-filters SPEC with keyboard shortcuts (f, r, x) for filtering steps by status.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 features/F42-tui-filters/SPEC.md     |  34 ++++++++
 src/aignt_os/cli/dashboard.py        |  59 ++++++++++++-
 tests/unit/test_dashboard_filters.py | 122 +++++++++++++++++++++++++++
 3 files changed, 212 insertions(+), 3 deletions(-)
 create mode 100644 features/F42-tui-filters/SPEC.md
 create mode 100644 tests/unit/test_dashboard_filters.py

diff --git a/features/F42-tui-filters/SPEC.md b/features/F42-tui-filters/SPEC.md
new file mode 100644
index 0000000..fecbe63
--- /dev/null
+++ b/features/F42-tui-filters/SPEC.md
@@ -0,0 +1,34 @@
+---
+id: F42-tui-filters
+type: feature
+summary: "Filtros de visualização de steps no dashboard TUI para facilitar análise de runs longas ou com falhas."
+inputs:
+  - "Interação via teclado no dashboard TUI (aignt runs watch)"
+outputs:
+  - "Lista de steps filtrada na interface"
+  - "Indicador visual do filtro ativo"
+acceptance_criteria:
+  - "O dashboard deve iniciar exibindo todos os steps (comportamento padrão)"
+  - "Deve ser possível filtrar steps por estado 'failed' via tecla de atalho (ex: 'e')"
+  - "Deve ser possível filtrar steps por estado 'running/pending' via tecla de atalho (ex: 'r')"
+  - "Deve ser possível restaurar a visualização de todos os steps via tecla de atalho (ex: 'a')"
+  - "A interface deve exibir claramente qual filtro está ativo no momento"
+  - "A navegação (seleção de itens) deve se manter funcional após a aplicação do filtro"
+non_goals:
+  - "Busca textual livre (regex) nos logs ou nomes de steps"
+  - "Persistência de filtros entre sessões do CLI"
+  - "Combinação complexa de múltiplos filtros (AND/OR)"
+---
+
+# Contexto
+
+O dashboard TUI (`aignt runs watch`) atualmente exibe uma lista linear de todos os steps executados em uma run. Em pipelines longos ou com muitas iterações (ex: loops de `worker`), essa lista pode crescer significativamente, dificultando a identificação rápida de falhas ou o acompanhamento dos steps ativos. A necessidade de "rolar" manualmente para encontrar erros é um ponto de atrito na UX, especialmente para triagem de falhas.
+
+# Objetivo
+
+Implementar um mecanismo de filtragem simples e rápido na lista de steps do dashboard TUI. O objetivo é permitir que o usuário alterne visualizações com teclas de atalho (mnemônicos) para focar no que importa no momento:
+- **Erros**: Ver apenas o que falhou.
+- **Atividade**: Ver apenas o que está rodando ou pendente.
+- **Geral**: Ver o histórico completo.
+
+A implementação deve ser puramente visual (client-side no TUI), sem alterar a persistência ou a lógica de execução.
diff --git a/src/aignt_os/cli/dashboard.py b/src/aignt_os/cli/dashboard.py
index 850970c..09b99c3 100644
--- a/src/aignt_os/cli/dashboard.py
+++ b/src/aignt_os/cli/dashboard.py
@@ -462,6 +462,9 @@ class RunDashboard(App[None]):
         ("q", "quit", "Quit"),
         ("enter", "show_logs", "Show Logs"),
         ("a", "show_artifacts", "Artifacts"),
+        ("f", "filter_failed", "Filter Failed"),
+        ("r", "filter_active", "Filter Active"),
+        ("x", "filter_all", "Reset Filters"),
     ]
 
     def __init__(self, run_id: str, refresh_interval: float = 1.0) -> None:
@@ -478,6 +481,25 @@ def __init__(self, run_id: str, refresh_interval: float = 1.0) -> None:
         self.artifact_explorer = ArtifactExplorer()
 
         self.steps_count = 0
+        self.current_filter: str = "all"  # all, failed, active
+
+    def action_filter_failed(self) -> None:
+        """Filter to show only failed steps."""
+        self.current_filter = "failed"
+        self.refresh_data()
+        self.notify("Filter: Failed steps only")
+
+    def action_filter_active(self) -> None:
+        """Filter to show only active (running/pending) steps."""
+        self.current_filter = "active"
+        self.refresh_data()
+        self.notify("Filter: Active steps only")
+
+    def action_filter_all(self) -> None:
+        """Reset filter to show all steps."""
+        self.current_filter = "all"
+        self.refresh_data()
+        self.notify("Filter: All steps")
 
     def action_show_artifacts(self) -> None:
         """Switch to artifacts tab."""
@@ -568,6 +590,19 @@ def refresh_data(self) -> None:
 
             steps = self.repository.list_steps(self.run_id)
 
+            # Apply filter
+            filtered_steps = steps
+            if self.current_filter == "failed":
+                filtered_steps = [s for s in steps if s.status == "failed"]
+            elif self.current_filter == "active":
+                filtered_steps = [s for s in steps if s.status in ("running", "pending")]
+
+            # Update title to reflect filter
+            filter_text = ""
+            if self.current_filter != "all":
+                filter_text = f" [FILTER: {self.current_filter}]"
+            self.title = f"AIgnt OS Watcher - {self.run_id}{filter_text}"
+
             # Simple diff: rebuild list if count changes or status changes
             # For MVP simplicity, verify if rebuild is needed
             # Or just rebuild if count matches but status might change?
@@ -575,8 +610,21 @@ def refresh_data(self) -> None:
             # Ideally we update items in place, but ListView API is list-based.
             # Let's rebuild only if count changes for now (new steps),
             # OR if last step status changed.
+            # OR if filter changed (which forces rebuild)
 
             should_rebuild = False
+            # If filtered count differs from current visible count
+            if len(filtered_steps) != len(self.step_list.children):
+                should_rebuild = True
+
+            # If we haven't rebuilt yet, check if content changed
+            if not should_rebuild and filtered_steps:
+                # Check first and last item as heuristic
+                first_item = self.step_list.children[0]
+                if isinstance(first_item, StepItem):
+                    if str(first_item.step.step_id) != str(filtered_steps[0].step_id):
+                        should_rebuild = True
+
             if len(steps) != self.steps_count:
                 should_rebuild = True
             elif steps and self.steps_count > 0:
@@ -584,6 +632,10 @@ def refresh_data(self) -> None:
                 # In a real app we would check all, but this is a heuristic for optimization
                 pass
 
+            # Always rebuild if filter is active to ensure correctness without complex diff
+            if self.current_filter != "all":
+                should_rebuild = True
+
             # Forcing rebuild for now to ensure correctness
             should_rebuild = True
 
@@ -594,12 +646,13 @@ def refresh_data(self) -> None:
                 current_index = self.step_list.index
 
                 self.step_list.clear()
-                for step in steps:
+                for step in filtered_steps:
                     self.step_list.append(StepItem(step))
 
-                if current_index is not None and current_index < len(steps):
+                # Restore selection if valid
+                if current_index is not None and current_index < len(filtered_steps):
                     self.step_list.index = current_index
-                elif len(steps) > 0:
+                elif len(filtered_steps) > 0:
                     self.step_list.index = 0
 
         except Exception as e:
diff --git a/tests/unit/test_dashboard_filters.py b/tests/unit/test_dashboard_filters.py
new file mode 100644
index 0000000..98e1841
--- /dev/null
+++ b/tests/unit/test_dashboard_filters.py
@@ -0,0 +1,122 @@
+from unittest.mock import MagicMock
+
+from aignt_os.cli.dashboard import RunDashboard
+from aignt_os.persistence import RunStepRecord
+
+
+# Helper to create mock steps
+def create_step(step_id, status):
+    return RunStepRecord(
+        step_id=step_id,
+        run_id="test-run",
+        tool_name="test-tool",
+        status=status,
+        state=status,  # simpler mapping for test
+        created_at="2023-01-01T00:00:00",
+        duration_ms=100,
+        raw_output_path=None,
+        clean_output_path=None,
+        return_code=0 if status == "completed" else 1,
+        timed_out=False,
+    )
+
+
+MOCK_STEPS = [
+    create_step(1, "completed"),
+    create_step(2, "failed"),
+    create_step(3, "running"),
+    create_step(4, "pending"),
+    create_step(5, "completed"),
+    create_step(6, "failed"),
+]
+
+
+class TestDashboardFilters:
+    def setup_method(self):
+        """Setup a dashboard instance with mocked repository."""
+        self.app = RunDashboard(run_id="test-run")
+        # Mock repository
+        self.app.repository = MagicMock()
+        self.app.repository.list_steps.return_value = MOCK_STEPS
+        # Mock UI elements that would be composed
+        self.app.step_list = MagicMock()
+        self.app.step_list.clear = MagicMock()
+        self.app.step_list.append = MagicMock()
+        # Mock run header update
+        self.app.run_header = MagicMock()
+        self.app.run_header.update_info = MagicMock()
+        self.app.artifact_explorer = MagicMock()
+        self.app.artifact_explorer.load_artifacts = MagicMock()
+
+    def test_default_filter_shows_all(self):
+        """Test that by default all steps are shown."""
+        # Initial state should be 'all'
+        assert getattr(self.app, "current_filter", "all") == "all"
+
+        self.app.refresh_data()
+
+        # Should append all 6 steps
+        assert self.app.step_list.append.call_count == 6
+
+    def test_filter_failed_steps(self):
+        """Test filtering only failed steps."""
+        # Simulate applying filter (method to be implemented)
+        self.app.action_filter_failed()
+
+        # Verify state change
+        assert self.app.current_filter == "failed"
+
+        # NOTE: action_filter_failed() already calls refresh_data()
+        # So we should verify call count directly, or reset mock before check
+        # But wait, action_filter_failed calls refresh_data, which appends 2 items.
+        # IF we call refresh_data AGAIN manually, it appends 2 MORE items.
+        # The mock.append.call_count accumulates.
+
+        # Let's check if the items appended correspond to the filter.
+        # Since action_filter_failed calls refresh_data, we don't need to call it again.
+
+        # Should append only the 2 failed steps
+        assert self.app.step_list.append.call_count == 2
+
+        # Verify the items appended are indeed the failed ones
+        call_args_list = self.app.step_list.append.call_args_list
+        for call_args in call_args_list:
+            step_item = call_args[0][0]
+            assert step_item.step.status == "failed"
+
+    def test_filter_active_steps(self):
+        """Test filtering running and pending steps."""
+        self.app.action_filter_active()
+
+        assert self.app.current_filter == "active"
+
+        # Should append running (1) and pending (1) = 2 steps
+        assert self.app.step_list.append.call_count == 2
+
+        call_args_list = self.app.step_list.append.call_args_list
+        for call_args in call_args_list:
+            step_item = call_args[0][0]
+            assert step_item.step.status in ("running", "pending")
+
+    def test_restore_all_filter(self):
+        """Test switching back to 'all' filter."""
+        # Set to failed first
+        self.app.action_filter_failed()
+        assert self.app.step_list.append.call_count == 2
+
+        # Reset counters
+        self.app.step_list.append.reset_mock()
+
+        # Switch to all
+        self.app.action_filter_all()
+        assert self.app.current_filter == "all"
+
+        # action_filter_all calls refresh_data
+        assert self.app.step_list.append.call_count == 6
+
+    def test_header_shows_active_filter(self):
+        """Test that the UI indicates the active filter."""
+        # This might require checking if a class is added to the header or title updated
+        # For now, let's assume we update the window title or a specific label
+        self.app.action_filter_failed()
+        assert "FILTER: failed" in self.app.title or "failed" in str(self.app.current_filter)