fix: address CI failures and review feedback

futhgar · futhgar · commit 7d610856321a · 2026-03-18T04:43:46.000-04:00
- Auto-format all Python files with yapf (pep8 style, 120 col limit)
- Fix ruff lint error (1 auto-fixed issue)
- Remove redundant langchain-core and langgraph deps from pyproject.toml
  per reviewer feedback (already included via nvidia-nat[langchain])

Signed-off-by: futhgar &lt;jmaldonado.rosa@gmail.com&gt;
diff --git a/examples/k8s_infra_monitor/pyproject.toml b/examples/k8s_infra_monitor/pyproject.toml
@@ -31,8 +31,6 @@ requires-python = ">=3.11,<3.14"
 description = "Kubernetes Infrastructure Monitor using NeMo Agent Toolkit"
 dependencies = [
     "nvidia-nat[eval,langchain,profiler,test]~=1.5",
-    "langchain-core",
-    "langgraph>=0.0.10",
 ]
 keywords = ["ai", "kubernetes", "monitoring", "agents"]
 classifiers = ["Programming Language :: Python"]
diff --git a/examples/k8s_infra_monitor/src/nat_k8s_infra_monitor/event_collector_tool.py b/examples/k8s_infra_monitor/src/nat_k8s_infra_monitor/event_collector_tool.py
@@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Tool for collecting and analyzing Kubernetes cluster events."""
 
 import json
@@ -87,17 +86,23 @@ def _run_live(kubeconfig_path: str | None, event_limit: int) -> str:
     # Get warning events
     try:
         result = subprocess.run(
-            [*cmd_base, "get", "events", "--all-namespaces",
-             "--field-selector=type=Warning",
-             "--sort-by=.lastTimestamp",
-             "--no-headers"],
-            capture_output=True, text=True, timeout=30, check=False,
+            [
+                *cmd_base,
+                "get",
+                "events",
+                "--all-namespaces",
+                "--field-selector=type=Warning",
+                "--sort-by=.lastTimestamp",
+                "--no-headers"
+            ],
+            capture_output=True,
+            text=True,
+            timeout=30,
+            check=False,
         )
         if result.returncode != 0:
-            sections.append(
-                "Error: kubectl failed while fetching warning events\n"
-                f"```\n{(result.stderr or result.stdout).strip()}\n```"
-            )
+            sections.append("Error: kubectl failed while fetching warning events\n"
+                            f"```\n{(result.stderr or result.stdout).strip()}\n```")
         else:
             warnings = result.stdout.strip()
             if warnings:
@@ -111,27 +116,32 @@ def _run_live(kubeconfig_path: str | None, event_limit: int) -> str:
     # Get recent events summary
     try:
         result = subprocess.run(
-            [*cmd_base, "get", "events", "--all-namespaces",
-             "--sort-by=.lastTimestamp",
-             "-o", "custom-columns="
-             "NAMESPACE:.metadata.namespace,"
-             "TYPE:.type,"
-             "REASON:.reason,"
-             "OBJECT:.involvedObject.kind/.involvedObject.name,"
-             "MESSAGE:.message",
-             "--no-headers"],
-            capture_output=True, text=True, timeout=30, check=False,
+            [
+                *cmd_base,
+                "get",
+                "events",
+                "--all-namespaces",
+                "--sort-by=.lastTimestamp",
+                "-o",
+                "custom-columns="
+                "NAMESPACE:.metadata.namespace,"
+                "TYPE:.type,"
+                "REASON:.reason,"
+                "OBJECT:.involvedObject.kind/.involvedObject.name,"
+                "MESSAGE:.message",
+                "--no-headers"
+            ],
+            capture_output=True,
+            text=True,
+            timeout=30,
+            check=False,
         )
         if result.returncode != 0:
-            sections.append(
-                "Error: kubectl failed while fetching recent events\n"
-                f"```\n{(result.stderr or result.stdout).strip()}\n```"
-            )
+            sections.append("Error: kubectl failed while fetching recent events\n"
+                            f"```\n{(result.stderr or result.stdout).strip()}\n```")
         elif result.stdout.strip():
             lines = result.stdout.strip().split("\n")[:event_limit]
-            sections.append(
-                f"## Recent Events ({len(lines)} most recent)\n```\n" + "\n".join(lines) + "\n```"
-            )
+            sections.append(f"## Recent Events ({len(lines)} most recent)\n```\n" + "\n".join(lines) + "\n```")
     except subprocess.TimeoutExpired:
         pass
 
@@ -140,10 +150,8 @@ def _run_live(kubeconfig_path: str | None, event_limit: int) -> str:
 
 def _get_default_healthy_response() -> str:
     """Return a default healthy event response for offline mode."""
-    return (
-        "## Warning Events\n"
-        "No warning events found.\n\n"
-        "## Recent Events\n"
-        "Recent events are routine: Pulled, Created, Started, Scheduled. "
-        "No abnormal patterns detected."
-    )
+    return ("## Warning Events\n"
+            "No warning events found.\n\n"
+            "## Recent Events\n"
+            "Recent events are routine: Pulled, Created, Started, Scheduled. "
+            "No abnormal patterns detected.")
diff --git a/examples/k8s_infra_monitor/src/nat_k8s_infra_monitor/node_status_tool.py b/examples/k8s_infra_monitor/src/nat_k8s_infra_monitor/node_status_tool.py
@@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Tool for retrieving Kubernetes node status and resource utilization."""
 
 import json
@@ -32,10 +31,8 @@ class NodeStatusToolConfig(FunctionBaseConfig, name="node_status_check"):
     """Configuration for the Kubernetes node status check tool."""
 
     offline_mode: bool = Field(default=True, description="Whether to run in offline mode")
-    kubeconfig_path: str | None = Field(
-        default=None,
-        description="Path to kubeconfig file. If None, uses default kubectl config."
-    )
+    kubeconfig_path: str | None = Field(default=None,
+                                        description="Path to kubeconfig file. If None, uses default kubectl config.")
 
 
 @register_function(config_type=NodeStatusToolConfig)
@@ -88,13 +85,14 @@ def _run_live(kubeconfig_path: str | None) -> str:
     try:
         result = subprocess.run(
             [*cmd_base, "get", "nodes", "-o", "wide", "--no-headers"],
-            capture_output=True, text=True, timeout=30, check=False,
+            capture_output=True,
+            text=True,
+            timeout=30,
+            check=False,
         )
         if result.returncode != 0:
-            sections.append(
-                "Error: kubectl failed while fetching node status\n"
-                f"```\n{(result.stderr or result.stdout).strip()}\n```"
-            )
+            sections.append("Error: kubectl failed while fetching node status\n"
+                            f"```\n{(result.stderr or result.stdout).strip()}\n```")
         else:
             sections.append(f"## Node Status\n```\n{result.stdout.strip()}\n```")
     except subprocess.TimeoutExpired:
@@ -104,13 +102,14 @@ def _run_live(kubeconfig_path: str | None) -> str:
     try:
         result = subprocess.run(
             [*cmd_base, "top", "nodes", "--no-headers"],
-            capture_output=True, text=True, timeout=30, check=False,
+            capture_output=True,
+            text=True,
+            timeout=30,
+            check=False,
         )
         if result.returncode != 0:
-            sections.append(
-                "Error: kubectl top failed\n"
-                f"```\n{(result.stderr or result.stdout).strip()}\n```"
-            )
+            sections.append("Error: kubectl top failed\n"
+                            f"```\n{(result.stderr or result.stdout).strip()}\n```")
         else:
             sections.append(f"## Node Resource Usage\n```\n{result.stdout.strip()}\n```")
     except subprocess.TimeoutExpired:
@@ -121,12 +120,10 @@ def _run_live(kubeconfig_path: str | None) -> str:
 
 def _get_default_healthy_response() -> str:
     """Return a default healthy node status response for offline mode."""
-    return (
-        "## Node Status\n"
-        "All 3 nodes are in Ready state.\n"
-        "- control-plane-1: Ready, SchedulingDisabled (control-plane taint)\n"
-        "- worker-1: Ready, 6 vCPU, 30Gi RAM, 74% CPU, 62% memory\n"
-        "- worker-2: Ready, 6 vCPU, 20Gi RAM, 45% CPU, 51% memory\n\n"
-        "## Node Resource Usage\n"
-        "No resource pressure conditions detected on any node."
-    )
+    return ("## Node Status\n"
+            "All 3 nodes are in Ready state.\n"
+            "- control-plane-1: Ready, SchedulingDisabled (control-plane taint)\n"
+            "- worker-1: Ready, 6 vCPU, 30Gi RAM, 74% CPU, 62% memory\n"
+            "- worker-2: Ready, 6 vCPU, 20Gi RAM, 45% CPU, 51% memory\n\n"
+            "## Node Resource Usage\n"
+            "No resource pressure conditions detected on any node.")
diff --git a/examples/k8s_infra_monitor/src/nat_k8s_infra_monitor/pod_health_tool.py b/examples/k8s_infra_monitor/src/nat_k8s_infra_monitor/pod_health_tool.py
@@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Tool for checking Kubernetes pod health across namespaces."""
 
 import json
@@ -33,10 +32,8 @@ class PodHealthToolConfig(FunctionBaseConfig, name="pod_health_check"):
 
     offline_mode: bool = Field(default=True, description="Whether to run in offline mode")
     kubeconfig_path: str | None = Field(default=None, description="Path to kubeconfig file")
-    namespaces: list[str] | None = Field(
-        default=None,
-        description="Specific namespaces to check. If None, checks all namespaces."
-    )
+    namespaces: list[str] | None = Field(default=None,
+                                         description="Specific namespaces to check. If None, checks all namespaces.")
 
 
 @register_function(config_type=PodHealthToolConfig)
@@ -93,29 +90,39 @@ def _run_live(kubeconfig_path: str | None, namespaces: list[str] | None) -> str:
             try:
                 result = subprocess.run(
                     [*cmd_base, "get", "pods", "-n", ns, "-o", "wide", "--no-headers"],
-                    capture_output=True, text=True, timeout=30, check=False,
+                    capture_output=True,
+                    text=True,
+                    timeout=30,
+                    check=False,
                 )
                 if result.returncode != 0:
-                    sections.append(
-                        f"### Namespace: {ns}\nError: kubectl failed\n"
-                        f"```\n{(result.stderr or result.stdout).strip()}\n```"
-                    )
+                    sections.append(f"### Namespace: {ns}\nError: kubectl failed\n"
+                                    f"```\n{(result.stderr or result.stdout).strip()}\n```")
                 else:
                     sections.append(f"### Namespace: {ns}\n```\n{result.stdout.strip()}\n```")
             except subprocess.TimeoutExpired:
                 sections.append(f"### Namespace: {ns}\nError: kubectl timed out")
     else:
         try:
             result = subprocess.run(
-                [*cmd_base, "get", "pods", *ns_flag, "-o", "wide", "--no-headers",
-                 "--field-selector=status.phase!=Running,status.phase!=Succeeded"],
-                capture_output=True, text=True, timeout=30, check=False,
+                [
+                    *cmd_base,
+                    "get",
+                    "pods",
+                    *ns_flag,
+                    "-o",
+                    "wide",
+                    "--no-headers",
+                    "--field-selector=status.phase!=Running,status.phase!=Succeeded"
+                ],
+                capture_output=True,
+                text=True,
+                timeout=30,
+                check=False,
             )
             if result.returncode != 0:
-                sections.append(
-                    "Error: kubectl failed while fetching pod status\n"
-                    f"```\n{(result.stderr or result.stdout).strip()}\n```"
-                )
+                sections.append("Error: kubectl failed while fetching pod status\n"
+                                f"```\n{(result.stderr or result.stdout).strip()}\n```")
             else:
                 unhealthy = result.stdout.strip()
                 if unhealthy:
@@ -128,11 +135,20 @@ def _run_live(kubeconfig_path: str | None, namespaces: list[str] | None) -> str:
     # Check for pods with high restart counts (> 5)
     try:
         result = subprocess.run(
-            [*cmd_base, "get", "pods", "--all-namespaces", "-o",
-             "jsonpath={range .items[*]}{.metadata.namespace}{' '}"
-             "{.metadata.name}{' '}{range .status.containerStatuses[*]}"
-             "{.restartCount}{' '}{end}{'\\n'}{end}"],
-            capture_output=True, text=True, timeout=30, check=False,
+            [
+                *cmd_base,
+                "get",
+                "pods",
+                "--all-namespaces",
+                "-o",
+                "jsonpath={range .items[*]}{.metadata.namespace}{' '}"
+                "{.metadata.name}{' '}{range .status.containerStatuses[*]}"
+                "{.restartCount}{' '}{end}{'\\n'}{end}"
+            ],
+            capture_output=True,
+            text=True,
+            timeout=30,
+            check=False,
         )
         if result.returncode == 0:
             high_restarts = []
@@ -153,9 +169,7 @@ def _run_live(kubeconfig_path: str | None, namespaces: list[str] | None) -> str:
 
 def _get_default_healthy_response() -> str:
     """Return a default healthy pod status response for offline mode."""
-    return (
-        "## Pod Health Summary\n"
-        "All pods are in Running or Succeeded state across all namespaces.\n\n"
-        "## High Restart Pods\n"
-        "No pods with excessive restart counts detected."
-    )
+    return ("## Pod Health Summary\n"
+            "All pods are in Running or Succeeded state across all namespaces.\n\n"
+            "## High Restart Pods\n"
+            "No pods with excessive restart counts detected.")
diff --git a/examples/k8s_infra_monitor/src/nat_k8s_infra_monitor/register.py b/examples/k8s_infra_monitor/src/nat_k8s_infra_monitor/register.py
@@ -121,11 +121,9 @@ async def _analyze_cluster(input_message: str) -> str:
 
         if not result:
             utils.logger.warning("Agent returned empty report (input_length=%d)", len(input_message))
-            result = (
-                "The agent was unable to generate a diagnostic report for this query. "
-                "This may indicate the LLM model is insufficient for the task complexity. "
-                "Consider using a larger model (e.g. meta/llama-3.3-70b-instruct).\n\n"
-            )
+            result = ("The agent was unable to generate a diagnostic report for this query. "
+                      "This may indicate the LLM model is insufficient for the task complexity. "
+                      "Consider using a larger model (e.g. meta/llama-3.3-70b-instruct).\n\n")
 
         # Append severity classification
         severity = await severity_tool.arun(result)
diff --git a/examples/k8s_infra_monitor/src/nat_k8s_infra_monitor/resource_pressure_tool.py b/examples/k8s_infra_monitor/src/nat_k8s_infra_monitor/resource_pressure_tool.py
diff --git a/examples/k8s_infra_monitor/src/nat_k8s_infra_monitor/severity_classifier.py b/examples/k8s_infra_monitor/src/nat_k8s_infra_monitor/severity_classifier.py

Original file line number	Diff line number	Diff line change
`@@ -31,8 +31,6 @@ requires-python = ">=3.11,<3.14"`
`31`	`31`	`description = "Kubernetes Infrastructure Monitor using NeMo Agent Toolkit"`
`32`	`32`	`dependencies = [`
`33`	`33`	`"nvidia-nat[eval,langchain,profiler,test]~=1.5",`
`34`		`- "langchain-core",`
`35`		`- "langgraph>=0.0.10",`
`36`	`34`	`]`
`37`	`35`	`keywords = ["ai", "kubernetes", "monitoring", "agents"]`
`38`	`36`	`classifiers = ["Programming Language :: Python"]`