From 3a38b6cf708a5991dcc4807192e33c61c128d96d Mon Sep 17 00:00:00 2001 From: Bartosz Burda Date: Sat, 31 Jan 2026 17:07:18 +0000 Subject: [PATCH 1/3] feat(turtlebot3): add anomaly_detector for direct fault reporting - Add anomaly_detector.py that monitors navigation metrics and reports faults directly to FaultManager via /fault_manager/report_fault service - Monitors: navigation goal status (ABORTED/CANCELED), AMCL covariance, and robot progress - Remove inject-collision.sh and inject-controller-failure.sh as they produced redundant/unreliable faults - Simplify remaining inject scripts to rely on anomaly_detector - Update restore-normal.sh to remove lifecycle node reactivation - Add anomaly-detector to manifest (app + fault-management function) - Update README with new fault reporting architecture Fault codes: - NAVIGATION_GOAL_ABORTED: Navigation goal failed - NAVIGATION_GOAL_CANCELED: Navigation goal was canceled - LOCALIZATION_UNCERTAINTY: High AMCL covariance - NAVIGATION_NO_PROGRESS: Robot stuck during active goal --- README.md | 3 + demos/turtlebot3_integration/CMakeLists.txt | 6 + demos/turtlebot3_integration/Dockerfile | 1 + demos/turtlebot3_integration/README.md | 86 ++--- .../config/turtlebot3_manifest.yaml | 13 + .../inject-collision.sh | 52 --- .../inject-controller-failure.sh | 41 --- .../inject-localization-failure.sh | 30 +- .../inject-nav-failure.sh | 23 +- .../launch/demo.launch.py | 14 + .../turtlebot3_integration/restore-normal.sh | 12 +- .../scripts/anomaly_detector.py | 308 ++++++++++++++++++ demos/turtlebot3_integration/stop-demo.sh | 4 +- 13 files changed, 437 insertions(+), 156 deletions(-) delete mode 100755 demos/turtlebot3_integration/inject-collision.sh delete mode 100755 demos/turtlebot3_integration/inject-controller-failure.sh create mode 100755 demos/turtlebot3_integration/scripts/anomaly_detector.py diff --git a/README.md b/README.md index b562408..6bdf3d3 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,9 @@ # selfpatch_demos [![CI](https://github.com/selfpatch/selfpatch_demos/actions/workflows/ci.yml/badge.svg)](https://github.com/selfpatch/selfpatch_demos/actions/workflows/ci.yml) +[![Docs](https://img.shields.io/badge/docs-GitHub%20Pages-blue)](https://selfpatch.github.io/ros2_medkit/) +[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) +[![Discord](https://img.shields.io/badge/Discord-Join%20Us-7289DA?logo=discord&logoColor=white)](https://discord.gg/fEbWKTah) Demonstration projects showcasing [ros2_medkit](https://github.com/selfpatch/ros2_medkit) integration with real ROS 2 systems. diff --git a/demos/turtlebot3_integration/CMakeLists.txt b/demos/turtlebot3_integration/CMakeLists.txt index c839a2f..3c34733 100644 --- a/demos/turtlebot3_integration/CMakeLists.txt +++ b/demos/turtlebot3_integration/CMakeLists.txt @@ -13,4 +13,10 @@ install(DIRECTORY config/ DESTINATION share/${PROJECT_NAME}/config ) +# Install scripts +install(PROGRAMS + scripts/anomaly_detector.py + DESTINATION lib/${PROJECT_NAME} +) + ament_package() diff --git a/demos/turtlebot3_integration/Dockerfile b/demos/turtlebot3_integration/Dockerfile index 8464769..52437ba 100644 --- a/demos/turtlebot3_integration/Dockerfile +++ b/demos/turtlebot3_integration/Dockerfile @@ -59,6 +59,7 @@ RUN git clone --depth 1 --recurse-submodules https://github.com/selfpatch/ros2_m COPY package.xml CMakeLists.txt ${COLCON_WS}/src/turtlebot3_medkit_demo/ COPY config/ ${COLCON_WS}/src/turtlebot3_medkit_demo/config/ COPY launch/ ${COLCON_WS}/src/turtlebot3_medkit_demo/launch/ +COPY scripts/ ${COLCON_WS}/src/turtlebot3_medkit_demo/scripts/ # Build ros2_medkit and demo package WORKDIR ${COLCON_WS} diff --git a/demos/turtlebot3_integration/README.md b/demos/turtlebot3_integration/README.md index 227d95b..605e30b 100644 --- a/demos/turtlebot3_integration/README.md +++ b/demos/turtlebot3_integration/README.md @@ -271,45 +271,67 @@ TurtleBot3 Demo (manifest-based discovery) │ ├── planner-server → component: nav2-stack │ ├── medkit-gateway → component: gateway │ ├── medkit-fault-manager → component: fault-manager -│ └── diagnostic-bridge → component: diagnostic-bridge-unit +│ ├── diagnostic-bridge → component: diagnostic-bridge-unit +│ └── anomaly-detector → component: diagnostic-bridge-unit └── Functions (high-level capabilities) ├── autonomous-navigation → hosted by: amcl, bt-navigator, ... ├── robot-control → hosted by: turtlebot3-node, velocity-smoother - └── fault-management → hosted by: gateway, fault-manager, bridge + └── fault-management → hosted by: gateway, fault-manager, bridge, anomaly-detector ``` ### Fault Reporting -This demo uses the **legacy fault reporting path** via diagnostic_bridge: +This demo uses two fault reporting paths: + +1. **Direct Fault Reporting** via `anomaly_detector`: + - Monitors navigation goal status, AMCL covariance, and robot progress + - Reports faults directly to FaultManager via `/fault_manager/report_fault` service + +2. **Legacy Path** via `diagnostic_bridge`: + - Subscribes to `/diagnostics` topic (DiagnosticArray) + - Converts diagnostics to fault reports ``` -Nav2/TurtleBot3 nodes → /diagnostics topic (DiagnosticArray) - ↓ - diagnostic_bridge subscribes and converts - ↓ - FaultManager receives via ReportFault service - ↓ - Gateway exposes via GET /api/v1/faults +┌─────────────────────────────────────────────────────────────────────┐ +│ Fault Reporting Paths │ +├─────────────────────────────────────────────────────────────────────┤ +│ │ +│ anomaly_detector ─────┬──── /fault_manager/report_fault ──────┐ │ +│ (monitors: │ (direct service call) │ │ +│ - nav goal status) │ │ │ +│ - AMCL covariance) │ ▼ │ +│ - robot progress) │ ┌──────────────┐ +│ │ │ FaultManager │ +│ │ └──────┬───────┘ +│ Nav2/TurtleBot3 ──────┼──── /diagnostics topic ──────┐ │ │ +│ (DiagnosticArray) │ ▼ │ │ +│ │ diagnostic_bridge │ │ +│ └───────────────────────────────┘ │ │ +│ ▼ │ +│ GET /api/v1/faults │ +└─────────────────────────────────────────────────────────────────────┘ ``` | Source | Fault Reporter | Example Faults | |--------|----------------|----------------| +| Navigation Goals | anomaly_detector | `NAVIGATION_GOAL_ABORTED`, `NAVIGATION_GOAL_CANCELED` | +| Localization | anomaly_detector | `LOCALIZATION_UNCERTAINTY` | +| Robot Progress | anomaly_detector | `NAVIGATION_NO_PROGRESS` | | AMCL | diagnostic_bridge | Localization degraded | | Nav2 Controller | diagnostic_bridge | Path following errors | | TurtleBot3 | diagnostic_bridge | Motor/sensor issues | ## Fault Injection Scenarios -This demo includes scripts to inject various fault conditions for testing fault management: +This demo includes scripts to inject various fault conditions for testing fault management. +Faults are detected by `anomaly_detector` and reported directly to FaultManager. ### Available Fault Scenarios | Script | Fault Type | Description | Expected Faults | |--------|-----------|-------------|-----------------| -| `inject-nav-failure.sh` | Navigation | Send goal to unreachable location | BT_NAVIGATOR, PLANNER_SERVER | -| `inject-localization-failure.sh` | Localization | Reset AMCL with high uncertainty | AMCL | -| `inject-controller-failure.sh` | Controller | Set very restrictive velocity limits | VELOCITY_SMOOTHER, CONTROLLER_SERVER | -| `inject-collision.sh` | Collision | Navigate toward obstacles | COLLISION_MONITOR | +| `inject-nav-failure.sh` | Navigation | Send goal to unreachable location | `NAVIGATION_GOAL_ABORTED` | +| `inject-localization-failure.sh` | Localization | Reset AMCL with high uncertainty | `LOCALIZATION_UNCERTAINTY` | | `restore-normal.sh` | Recovery | Restore defaults and clear faults | - | ### Fault Injection Examples @@ -321,7 +343,7 @@ This demo includes scripts to inject various fault conditions for testing fault ./inject-nav-failure.sh # Check resulting faults -curl http://localhost:8080/api/v1/faults | jq '.items[] | {code, severity, message}' +curl http://localhost:8080/api/v1/faults | jq '.items[] | {fault_code, severity_label, description}' ``` #### 2. Localization Failure @@ -330,27 +352,7 @@ curl http://localhost:8080/api/v1/faults | jq '.items[] | {code, severity, messa # Reinitialize AMCL global localization (causes high uncertainty) ./inject-localization-failure.sh -# Watch localization recover -curl http://localhost:8080/api/v1/apps/amcl/data/particlecloud | jq '.poses | length' -``` - -#### 3. Controller Restriction - -```bash -# Set very low velocity limits (robot moves extremely slowly) -./inject-controller-failure.sh - -# Try sending a navigation goal - robot will struggle to follow path -./send-nav-goal.sh 2.0 0.5 -``` - -#### 4. Collision Scenario - -```bash -# Trigger collision avoidance -./inject-collision.sh - -# Monitor collision warnings +# Watch for LOCALIZATION_UNCERTAINTY fault curl http://localhost:8080/api/v1/faults | jq ``` @@ -429,16 +431,16 @@ demos/turtlebot3_integration/ ├── check-faults.sh # View active faults ├── inject-nav-failure.sh # Inject navigation failure scenario ├── inject-localization-failure.sh # Inject AMCL localization issues -├── inject-controller-failure.sh # Inject controller velocity limits -├── inject-collision.sh # Inject collision warning scenario ├── restore-normal.sh # Restore normal operation ├── config/ │ ├── medkit_params.yaml # ros2_medkit gateway config │ ├── turtlebot3_manifest.yaml # SOVD manifest (entity hierarchy) │ ├── nav2_params.yaml # Nav2 navigation parameters │ └── turtlebot3_world.yaml # Map configuration -└── launch/ - └── demo.launch.py # ROS 2 launch file +├── launch/ +│ └── demo.launch.py # ROS 2 launch file +└── scripts/ + └── anomaly_detector.py # Navigation anomaly detector node ``` ## Scripts @@ -452,8 +454,6 @@ demos/turtlebot3_integration/ | `check-faults.sh` | View active faults from gateway | | `inject-nav-failure.sh` | Inject navigation failure (unreachable goal) | | `inject-localization-failure.sh` | Inject localization failure (AMCL reset) | -| `inject-controller-failure.sh` | Inject controller failure (velocity limits) | -| `inject-collision.sh` | Inject collision warning scenario | | `restore-normal.sh` | Restore normal operation and clear faults | ## Manual Setup (Alternative) diff --git a/demos/turtlebot3_integration/config/turtlebot3_manifest.yaml b/demos/turtlebot3_integration/config/turtlebot3_manifest.yaml index d931bf1..073d3fa 100644 --- a/demos/turtlebot3_integration/config/turtlebot3_manifest.yaml +++ b/demos/turtlebot3_integration/config/turtlebot3_manifest.yaml @@ -183,6 +183,18 @@ apps: node_name: diagnostic_bridge namespace: /bridge + - id: anomaly-detector + name: "Anomaly Detector" + category: "diagnostics" + is_located_on: diagnostic-bridge-unit + description: "Monitors navigation metrics and reports faults directly to FaultManager" + depends_on: + - amcl + - bt-navigator + ros_binding: + node_name: anomaly_detector + namespace: /bridge + # ============================================================================= # FUNCTIONS - High-level capabilities # ============================================================================= @@ -213,3 +225,4 @@ functions: - medkit-gateway - medkit-fault-manager - diagnostic-bridge + - anomaly-detector diff --git a/demos/turtlebot3_integration/inject-collision.sh b/demos/turtlebot3_integration/inject-collision.sh deleted file mode 100755 index 23fcb6c..0000000 --- a/demos/turtlebot3_integration/inject-collision.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/bash -# Inject Collision Warning - Send robot toward obstacle -# This will trigger collision avoidance and potential path replanning - -GATEWAY_URL="${GATEWAY_URL:-http://localhost:8080}" -API_BASE="${GATEWAY_URL}/api/v1" - -echo "💥 Injecting COLLISION WARNING fault..." -echo " Sending robot toward known obstacle location in turtlebot3_world" -echo "" - -# Check for jq dependency -if ! command -v jq >/dev/null 2>&1; then - echo "❌ 'jq' is required but not installed." - echo " Please install jq (e.g., 'sudo apt-get install jq') and retry." - exit 1 -fi - -# Check gateway -if ! curl -sf "${API_BASE}/health" > /dev/null 2>&1; then - echo "❌ Gateway not available at ${GATEWAY_URL}" - exit 1 -fi - -# In turtlebot3_world, there are obstacles around the center -# Send goal that requires navigating close to obstacles -echo "Sending navigation goal to obstacle-dense area..." -RESPONSE=$(curl -s -X POST "${API_BASE}/apps/bt-navigator/operations/navigate_to_pose/executions" \ - -H "Content-Type: application/json" \ - -d '{ - "request": { - "pose": { - "header": {"frame_id": "map"}, - "pose": { - "position": {"x": 0.0, "y": 0.0, "z": 0.0}, - "orientation": {"w": 1.0} - } - } - } - }') - -echo "$RESPONSE" | jq '.' 2>/dev/null || echo "$RESPONSE" - -echo "" -echo "✓ Collision scenario triggered!" -echo "" -echo "Expected faults (via diagnostic_bridge):" -echo " - COLLISION_MONITOR: Obstacle detected" -echo " - CONTROLLER_SERVER: Path blocked" -echo "" -echo "The collision monitor should stop the robot if it gets too close." -echo "Check faults with: curl ${API_BASE}/faults | jq" diff --git a/demos/turtlebot3_integration/inject-controller-failure.sh b/demos/turtlebot3_integration/inject-controller-failure.sh deleted file mode 100755 index cb7ff98..0000000 --- a/demos/turtlebot3_integration/inject-controller-failure.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash -# Inject Controller Failure - Set very restrictive velocity limits -# This will cause the controller to struggle following paths - -GATEWAY_URL="${GATEWAY_URL:-http://localhost:8080}" -API_BASE="${GATEWAY_URL}/api/v1" - -echo "🎮 Injecting CONTROLLER FAILURE fault..." -echo " Setting very restrictive velocity limits (robot will move very slowly)" -echo "" - -# Check gateway -if ! curl -sf "${API_BASE}/health" > /dev/null 2>&1; then - echo "❌ Gateway not available at ${GATEWAY_URL}" - exit 1 -fi - -# Set very low max velocity on velocity_smoother -echo "Setting velocity_smoother max_velocity to 0.05 m/s (very slow)..." -curl -s -X PUT "${API_BASE}/apps/velocity-smoother/configurations/max_velocity" \ - -H "Content-Type: application/json" \ - -d '{"value": [0.05, 0.0, 0.3]}' - -echo "" - -# Also reduce controller's max velocity -echo "Setting controller_server FollowPath.max_vel_x to 0.05 m/s..." -curl -s -X PUT "${API_BASE}/apps/controller-server/configurations/FollowPath.max_vel_x" \ - -H "Content-Type: application/json" \ - -d '{"value": 0.05}' - -echo "" -echo "✓ Controller restriction injected!" -echo "" -echo "The robot will now move extremely slowly." -echo "Expected faults (via diagnostic_bridge):" -echo " - VELOCITY_SMOOTHER: Velocity limited" -echo " - CONTROLLER_SERVER: Path following degraded" -echo "" -echo "Restore with: ./restore-normal.sh" -echo "Check faults with: curl ${API_BASE}/faults | jq" diff --git a/demos/turtlebot3_integration/inject-localization-failure.sh b/demos/turtlebot3_integration/inject-localization-failure.sh index fbe66e8..a94b56c 100755 --- a/demos/turtlebot3_integration/inject-localization-failure.sh +++ b/demos/turtlebot3_integration/inject-localization-failure.sh @@ -22,14 +22,30 @@ curl -s -X POST "${API_BASE}/apps/amcl/operations/reinitialize_global_localizati -d '{}' echo "" -echo "✓ Localization failure injected!" +echo "Waiting for particles to scatter..." +sleep 2 + +# Now try to navigate - with scattered particles, localization uncertainty is high +echo "Sending navigation goal (with high localization uncertainty)..." +curl -s -X POST "${API_BASE}/apps/bt-navigator/operations/navigate_to_pose/executions" \ + -H "Content-Type: application/json" \ + -d '{ + "goal": { + "pose": { + "header": {"frame_id": "map"}, + "pose": { + "position": {"x": 2.0, "y": 0.0, "z": 0.0}, + "orientation": {"w": 1.0} + } + } + } + }' | jq '.' 2>/dev/null || true + echo "" -echo "AMCL will now have high uncertainty until it re-localizes." -echo "Expected faults (via diagnostic_bridge):" -echo " - AMCL: Localization confidence low" -echo " - BT_NAVIGATOR: Goal may fail due to uncertain pose" +echo "✓ Localization failure injected!" echo "" -echo "Watch localization recover with:" -echo " curl ${API_BASE}/apps/amcl/data/particlecloud | jq '.poses | length'" +echo "AMCL has been reinitialized - localization uncertainty is high." +echo "The anomaly_detector monitors AMCL covariance and will report:" +echo " - LOCALIZATION_UNCERTAINTY: High AMCL covariance (uncertainty)" echo "" echo "Check faults with: curl ${API_BASE}/faults | jq" diff --git a/demos/turtlebot3_integration/inject-nav-failure.sh b/demos/turtlebot3_integration/inject-nav-failure.sh index 22a7cac..c104216 100755 --- a/demos/turtlebot3_integration/inject-nav-failure.sh +++ b/demos/turtlebot3_integration/inject-nav-failure.sh @@ -27,7 +27,7 @@ echo "Sending navigation goal to (100.0, 100.0) - far outside map..." RESPONSE=$(curl -s -X POST "${API_BASE}/apps/bt-navigator/operations/navigate_to_pose/executions" \ -H "Content-Type: application/json" \ -d '{ - "request": { + "goal": { "pose": { "header": {"frame_id": "map"}, "pose": { @@ -40,11 +40,26 @@ RESPONSE=$(curl -s -X POST "${API_BASE}/apps/bt-navigator/operations/navigate_to echo "$RESPONSE" | jq '.' 2>/dev/null || echo "$RESPONSE" +# Extract execution ID +EXEC_ID=$(echo "$RESPONSE" | jq -r '.id' 2>/dev/null) + +if [ -n "$EXEC_ID" ] && [ "$EXEC_ID" != "null" ]; then + echo "" + echo "Waiting for navigation to fail (checking status)..." + for i in {1..10}; do + sleep 2 + STATUS=$(curl -s "${API_BASE}/apps/bt-navigator/operations/navigate_to_pose/executions/${EXEC_ID}" | jq -r '.status' 2>/dev/null) + echo " Status: $STATUS" + if [ "$STATUS" = "failed" ] || [ "$STATUS" = "completed" ]; then + break + fi + done +fi + echo "" echo "✓ Navigation failure injected!" echo "" -echo "Expected faults (via diagnostic_bridge):" -echo " - BT_NAVIGATOR: Goal rejected or path planning failed" -echo " - PLANNER_SERVER: No valid path to goal" +echo "Expected faults (via anomaly_detector → FaultManager):" +echo " - NAVIGATION_GOAL_ABORTED: Navigation goal ABORTED" echo "" echo "Check faults with: curl ${API_BASE}/faults | jq" diff --git a/demos/turtlebot3_integration/launch/demo.launch.py b/demos/turtlebot3_integration/launch/demo.launch.py index a7e17c5..c1fe52d 100644 --- a/demos/turtlebot3_integration/launch/demo.launch.py +++ b/demos/turtlebot3_integration/launch/demo.launch.py @@ -165,5 +165,19 @@ def generate_launch_description(): }, ], ), + # Launch anomaly detector to monitor navigation and publish diagnostics + Node( + package="turtlebot3_medkit_demo", + executable="anomaly_detector.py", + name="anomaly_detector", + namespace="bridge", + output="screen", + parameters=[ + {"use_sim_time": use_sim_time}, + {"covariance_warn_threshold": 0.3}, + {"covariance_error_threshold": 1.0}, + {"no_progress_timeout_sec": 20.0}, + ], + ), ] ) diff --git a/demos/turtlebot3_integration/restore-normal.sh b/demos/turtlebot3_integration/restore-normal.sh index 5c2c2dd..d61db86 100755 --- a/demos/turtlebot3_integration/restore-normal.sh +++ b/demos/turtlebot3_integration/restore-normal.sh @@ -34,20 +34,18 @@ if echo "$EXECUTIONS" | jq -e '.items[]' > /dev/null 2>&1; then done fi -# Restore velocity smoother defaults -echo "Restoring velocity_smoother defaults..." +# Restore velocity smoother defaults (in case controller-failure was run before removal) +echo "" +echo "Restoring velocity parameters to defaults..." curl -s -X PUT "${API_BASE}/apps/velocity-smoother/configurations/max_velocity" \ -H "Content-Type: application/json" \ - -d '{"value": [0.26, 0.0, 1.0]}' > /dev/null + -d '{"value": [0.26, 0.0, 1.0]}' > /dev/null 2>&1 -# Restore controller defaults -echo "Restoring controller_server defaults..." curl -s -X PUT "${API_BASE}/apps/controller-server/configurations/FollowPath.max_vel_x" \ -H "Content-Type: application/json" \ - -d '{"value": 0.26}' > /dev/null + -d '{"value": 0.26}' > /dev/null 2>&1 # Clear all faults -echo "" echo "Clearing all faults from FaultManager..." curl -s -X DELETE "${API_BASE}/faults" > /dev/null diff --git a/demos/turtlebot3_integration/scripts/anomaly_detector.py b/demos/turtlebot3_integration/scripts/anomaly_detector.py new file mode 100755 index 0000000..33045c6 --- /dev/null +++ b/demos/turtlebot3_integration/scripts/anomaly_detector.py @@ -0,0 +1,308 @@ +#!/usr/bin/env python3 +# Copyright 2026 selfpatch +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Anomaly Detector Node for TurtleBot3 Demo + +Monitors navigation metrics and reports faults directly to FaultManager: +- Navigation goal ABORTED/CANCELED -> ERROR fault +- High AMCL localization uncertainty -> WARN/ERROR fault +- No robot progress when goal active -> WARN fault + +Reports faults directly via /fault_manager/report_fault service. +""" + +import math +import time +from typing import Optional + +import rclpy +from rclpy.node import Node +from rclpy.qos import QoSProfile, ReliabilityPolicy, DurabilityPolicy + +from action_msgs.msg import GoalStatusArray, GoalStatus +from geometry_msgs.msg import PoseWithCovarianceStamped +from nav_msgs.msg import Odometry +from ros2_medkit_msgs.srv import ReportFault + + +# Severity levels (from ReportFault.srv) +SEVERITY_INFO = 0 +SEVERITY_WARN = 1 +SEVERITY_ERROR = 2 +SEVERITY_CRITICAL = 3 + +# Event types +EVENT_FAILED = 0 +EVENT_PASSED = 1 + + +class AnomalyDetectorNode(Node): + """Monitors navigation metrics and reports faults to FaultManager.""" + + def __init__(self): + super().__init__('anomaly_detector') + + # Declare parameters + self.declare_parameter('covariance_warn_threshold', 0.5) + self.declare_parameter('covariance_error_threshold', 2.0) + self.declare_parameter('no_progress_timeout_sec', 15.0) + self.declare_parameter('min_progress_distance', 0.1) + self.declare_parameter('check_interval_sec', 1.0) + + # Get parameters + self.covariance_warn_threshold = self.get_parameter('covariance_warn_threshold').value + self.covariance_error_threshold = self.get_parameter('covariance_error_threshold').value + self.no_progress_timeout_sec = self.get_parameter('no_progress_timeout_sec').value + self.min_progress_distance = self.get_parameter('min_progress_distance').value + check_interval = self.get_parameter('check_interval_sec').value + + # QoS for action status (transient local for late subscribers) + status_qos = QoSProfile( + reliability=ReliabilityPolicy.RELIABLE, + durability=DurabilityPolicy.TRANSIENT_LOCAL, + depth=10 + ) + + # Subscribers + self.goal_status_sub = self.create_subscription( + GoalStatusArray, + '/navigate_to_pose/_action/status', + self.goal_status_callback, + status_qos + ) + + self.amcl_pose_sub = self.create_subscription( + PoseWithCovarianceStamped, + '/amcl_pose', + self.amcl_pose_callback, + 10 + ) + + self.odom_sub = self.create_subscription( + Odometry, + '/odom', + self.odom_callback, + 10 + ) + + # Service client for fault reporting + self.fault_client = self.create_client(ReportFault, '/fault_manager/report_fault') + self.get_logger().info('Waiting for /fault_manager/report_fault service...') + while not self.fault_client.wait_for_service(timeout_sec=5.0): + self.get_logger().warn('Service not available, waiting...') + self.get_logger().info('Connected to fault_manager service') + + # Timer for periodic checks + self.check_timer = self.create_timer(check_interval, self.check_timer_callback) + + # State tracking + self.last_goal_status: dict = {} # goal_id -> last_status + self.has_active_goal = False + self.last_position: Optional[tuple] = None + self.last_progress_time = time.time() + self.current_covariance = 0.0 + + # Throttling + self.last_covariance_report_time = 0.0 + self.last_no_progress_report_time = 0.0 + self.report_throttle_sec = 5.0 + + # Track active faults for PASSED events + self.active_faults: set = set() + + self.get_logger().info( + f'AnomalyDetector started (cov_warn={self.covariance_warn_threshold}, ' + f'cov_error={self.covariance_error_threshold}, ' + f'no_progress_timeout={self.no_progress_timeout_sec}s)' + ) + + def goal_status_callback(self, msg: GoalStatusArray): + """Monitor navigation goal status for failures.""" + for status in msg.status_list: + goal_id = ''.join(f'{b:02x}' for b in status.goal_info.goal_id.uuid) + last_status = self.last_goal_status.get(goal_id) + + # Track active goals + if status.status in [GoalStatus.STATUS_ACCEPTED, GoalStatus.STATUS_EXECUTING]: + self.has_active_goal = True + self.last_progress_time = time.time() + + # Detect status changes to terminal failure states + if last_status != status.status: + self.last_goal_status[goal_id] = status.status + + if status.status == GoalStatus.STATUS_ABORTED: + self.report_fault( + fault_code='NAVIGATION_GOAL_ABORTED', + severity=SEVERITY_ERROR, + description=f'Navigation goal ABORTED - path planning or execution failed (goal: {goal_id[:8]})', + event_type=EVENT_FAILED + ) + self.has_active_goal = False + self.get_logger().warn(f'Navigation goal {goal_id[:8]} ABORTED') + + elif status.status == GoalStatus.STATUS_CANCELED: + self.report_fault( + fault_code='NAVIGATION_GOAL_CANCELED', + severity=SEVERITY_WARN, + description=f'Navigation goal CANCELED (goal: {goal_id[:8]})', + event_type=EVENT_FAILED + ) + self.has_active_goal = False + self.get_logger().info(f'Navigation goal {goal_id[:8]} CANCELED') + + elif status.status == GoalStatus.STATUS_SUCCEEDED: + # Clear navigation faults + self.report_fault( + fault_code='NAVIGATION_GOAL_ABORTED', + severity=SEVERITY_INFO, + description='Navigation goal succeeded', + event_type=EVENT_PASSED + ) + self.report_fault( + fault_code='NAVIGATION_GOAL_CANCELED', + severity=SEVERITY_INFO, + description='Navigation goal succeeded', + event_type=EVENT_PASSED + ) + self.has_active_goal = False + + def amcl_pose_callback(self, msg: PoseWithCovarianceStamped): + """Monitor AMCL localization covariance.""" + # Calculate covariance magnitude from position covariance (indices 0, 7 for x, y variance) + cov = msg.pose.covariance + self.current_covariance = math.sqrt(cov[0]**2 + cov[7]**2) + + now = time.time() + if now - self.last_covariance_report_time < self.report_throttle_sec: + return + + if self.current_covariance > self.covariance_error_threshold: + self.report_fault( + fault_code='LOCALIZATION_UNCERTAINTY', + severity=SEVERITY_ERROR, + description=f'AMCL localization uncertainty very high: {self.current_covariance:.3f}', + event_type=EVENT_FAILED + ) + self.last_covariance_report_time = now + self.get_logger().warn(f'High localization uncertainty: {self.current_covariance:.3f}') + + elif self.current_covariance > self.covariance_warn_threshold: + self.report_fault( + fault_code='LOCALIZATION_UNCERTAINTY', + severity=SEVERITY_WARN, + description=f'AMCL localization uncertainty elevated: {self.current_covariance:.3f}', + event_type=EVENT_FAILED + ) + self.last_covariance_report_time = now + else: + # Send PASSED to clear previous warnings + if 'LOCALIZATION_UNCERTAINTY' in self.active_faults: + self.report_fault( + fault_code='LOCALIZATION_UNCERTAINTY', + severity=SEVERITY_INFO, + description='AMCL localization normal', + event_type=EVENT_PASSED + ) + self.last_covariance_report_time = now + + def odom_callback(self, msg: Odometry): + """Track robot position for progress monitoring.""" + x = msg.pose.pose.position.x + y = msg.pose.pose.position.y + + if self.last_position is not None: + last_x, last_y = self.last_position + distance = math.sqrt((x - last_x)**2 + (y - last_y)**2) + + if distance > self.min_progress_distance: + self.last_progress_time = time.time() + # Clear no-progress fault if robot is moving + if 'NAVIGATION_NO_PROGRESS' in self.active_faults: + self.report_fault( + fault_code='NAVIGATION_NO_PROGRESS', + severity=SEVERITY_INFO, + description='Robot making progress', + event_type=EVENT_PASSED + ) + + self.last_position = (x, y) + + def check_timer_callback(self): + """Periodic check for no-progress condition.""" + if not self.has_active_goal: + return + + now = time.time() + time_since_progress = now - self.last_progress_time + + if time_since_progress > self.no_progress_timeout_sec: + if now - self.last_no_progress_report_time > self.report_throttle_sec: + self.report_fault( + fault_code='NAVIGATION_NO_PROGRESS', + severity=SEVERITY_WARN, + description=f'No navigation progress for {time_since_progress:.1f}s', + event_type=EVENT_FAILED + ) + self.last_no_progress_report_time = now + self.get_logger().warn(f'No navigation progress for {time_since_progress:.1f}s') + + def report_fault(self, fault_code: str, severity: int, description: str, event_type: int): + """Report a fault to FaultManager via service call.""" + request = ReportFault.Request() + request.fault_code = fault_code + request.event_type = event_type + request.severity = severity + request.description = description + request.source_id = self.get_fully_qualified_name() + + # Track active faults + if event_type == EVENT_FAILED: + self.active_faults.add(fault_code) + else: + self.active_faults.discard(fault_code) + + # Async service call + future = self.fault_client.call_async(request) + future.add_done_callback( + lambda f: self._handle_fault_response(f, fault_code, event_type) + ) + + def _handle_fault_response(self, future, fault_code: str, event_type: int): + """Handle response from fault_manager service.""" + try: + response = future.result() + if not response.accepted: + self.get_logger().warn(f'Fault report rejected: {fault_code}') + except Exception as e: + self.get_logger().error(f'Failed to report fault {fault_code}: {e}') + + +def main(args=None): + rclpy.init(args=args) + node = AnomalyDetectorNode() + + try: + rclpy.spin(node) + except KeyboardInterrupt: + pass + finally: + node.destroy_node() + rclpy.shutdown() + + +if __name__ == '__main__': + main() diff --git a/demos/turtlebot3_integration/stop-demo.sh b/demos/turtlebot3_integration/stop-demo.sh index fab9365..18e4324 100755 --- a/demos/turtlebot3_integration/stop-demo.sh +++ b/demos/turtlebot3_integration/stop-demo.sh @@ -60,10 +60,10 @@ done echo "Stopping containers..." if docker compose version &> /dev/null; then # shellcheck disable=SC2086 - docker compose down ${REMOVE_VOLUMES} ${REMOVE_IMAGES} + docker compose --profile cpu --profile nvidia down ${REMOVE_VOLUMES} ${REMOVE_IMAGES} else # shellcheck disable=SC2086 - docker-compose down ${REMOVE_VOLUMES} ${REMOVE_IMAGES} + docker-compose --profile cpu --profile nvidia down ${REMOVE_VOLUMES} ${REMOVE_IMAGES} fi # Cleanup X11 From baf598cd218ffbf5cad87ffb514e48ea724a69ca Mon Sep 17 00:00:00 2001 From: Bartosz Burda Date: Sat, 31 Jan 2026 17:14:10 +0000 Subject: [PATCH 2/3] fix: shellcheck warning - use _ for unused loop variable --- demos/turtlebot3_integration/inject-nav-failure.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demos/turtlebot3_integration/inject-nav-failure.sh b/demos/turtlebot3_integration/inject-nav-failure.sh index c104216..4140611 100755 --- a/demos/turtlebot3_integration/inject-nav-failure.sh +++ b/demos/turtlebot3_integration/inject-nav-failure.sh @@ -46,7 +46,7 @@ EXEC_ID=$(echo "$RESPONSE" | jq -r '.id' 2>/dev/null) if [ -n "$EXEC_ID" ] && [ "$EXEC_ID" != "null" ]; then echo "" echo "Waiting for navigation to fail (checking status)..." - for i in {1..10}; do + for _ in {1..10}; do sleep 2 STATUS=$(curl -s "${API_BASE}/apps/bt-navigator/operations/navigate_to_pose/executions/${EXEC_ID}" | jq -r '.status' 2>/dev/null) echo " Status: $STATUS" From b0a39a780326c99a51f669df42f04880abf0f475 Mon Sep 17 00:00:00 2001 From: Bartosz Burda Date: Sat, 31 Jan 2026 18:52:19 +0000 Subject: [PATCH 3/3] fix(turtlebot3-demo): address PR review comments - Fix covariance calculation (sqrt of sum, not sum of squares) - Fix has_active_goal race condition with multiple goals - Replace time.time() with ROS time for simulation compatibility - Fix docstring to match actual severity levels (ABORTED=ERROR, CANCELED=WARN) - Update launch comment to accurately describe fault reporting - Standardize navigation payload to use 'goal' instead of 'request' - Add execution_id fallback in inject-nav-failure.sh - Log KeyboardInterrupt on shutdown Addresses review comments from Copilot and mfaferek93. --- .../inject-nav-failure.sh | 4 +- .../launch/demo.launch.py | 2 +- .../scripts/anomaly_detector.py | 60 ++++++++++++------- demos/turtlebot3_integration/send-nav-goal.sh | 2 +- 4 files changed, 41 insertions(+), 27 deletions(-) diff --git a/demos/turtlebot3_integration/inject-nav-failure.sh b/demos/turtlebot3_integration/inject-nav-failure.sh index 4140611..07529e3 100755 --- a/demos/turtlebot3_integration/inject-nav-failure.sh +++ b/demos/turtlebot3_integration/inject-nav-failure.sh @@ -40,8 +40,8 @@ RESPONSE=$(curl -s -X POST "${API_BASE}/apps/bt-navigator/operations/navigate_to echo "$RESPONSE" | jq '.' 2>/dev/null || echo "$RESPONSE" -# Extract execution ID -EXEC_ID=$(echo "$RESPONSE" | jq -r '.id' 2>/dev/null) +# Extract execution ID (support both .execution_id and .id) +EXEC_ID=$(echo "$RESPONSE" | jq -r '.execution_id // .id // empty' 2>/dev/null) if [ -n "$EXEC_ID" ] && [ "$EXEC_ID" != "null" ]; then echo "" diff --git a/demos/turtlebot3_integration/launch/demo.launch.py b/demos/turtlebot3_integration/launch/demo.launch.py index c1fe52d..254320b 100644 --- a/demos/turtlebot3_integration/launch/demo.launch.py +++ b/demos/turtlebot3_integration/launch/demo.launch.py @@ -165,7 +165,7 @@ def generate_launch_description(): }, ], ), - # Launch anomaly detector to monitor navigation and publish diagnostics + # Launch anomaly detector to monitor navigation and report faults via fault manager Node( package="turtlebot3_medkit_demo", executable="anomaly_detector.py", diff --git a/demos/turtlebot3_integration/scripts/anomaly_detector.py b/demos/turtlebot3_integration/scripts/anomaly_detector.py index 33045c6..13f848d 100755 --- a/demos/turtlebot3_integration/scripts/anomaly_detector.py +++ b/demos/turtlebot3_integration/scripts/anomaly_detector.py @@ -17,7 +17,8 @@ Anomaly Detector Node for TurtleBot3 Demo Monitors navigation metrics and reports faults directly to FaultManager: -- Navigation goal ABORTED/CANCELED -> ERROR fault +- Navigation goal ABORTED -> ERROR fault +- Navigation goal CANCELED -> WARN fault - High AMCL localization uncertainty -> WARN/ERROR fault - No robot progress when goal active -> WARN fault @@ -25,10 +26,10 @@ """ import math -import time from typing import Optional import rclpy +from rclpy.time import Time from rclpy.node import Node from rclpy.qos import QoSProfile, ReliabilityPolicy, DurabilityPolicy @@ -112,12 +113,12 @@ def __init__(self): self.last_goal_status: dict = {} # goal_id -> last_status self.has_active_goal = False self.last_position: Optional[tuple] = None - self.last_progress_time = time.time() + self.last_progress_time: Optional[Time] = None self.current_covariance = 0.0 - # Throttling - self.last_covariance_report_time = 0.0 - self.last_no_progress_report_time = 0.0 + # Throttling (using ROS time for simulation compatibility) + self.last_covariance_report_time: Optional[Time] = None + self.last_no_progress_report_time: Optional[Time] = None self.report_throttle_sec = 5.0 # Track active faults for PASSED events @@ -131,14 +132,19 @@ def __init__(self): def goal_status_callback(self, msg: GoalStatusArray): """Monitor navigation goal status for failures.""" + # Compute has_active_goal from entire message to avoid iteration-order issues + active_statuses = {GoalStatus.STATUS_ACCEPTED, GoalStatus.STATUS_EXECUTING} + self.has_active_goal = any( + s.status in active_statuses for s in msg.status_list + ) + for status in msg.status_list: goal_id = ''.join(f'{b:02x}' for b in status.goal_info.goal_id.uuid) last_status = self.last_goal_status.get(goal_id) - # Track active goals - if status.status in [GoalStatus.STATUS_ACCEPTED, GoalStatus.STATUS_EXECUTING]: - self.has_active_goal = True - self.last_progress_time = time.time() + # Track active goals - update progress time when goal is active + if status.status in active_statuses: + self.last_progress_time = self.get_clock().now() # Detect status changes to terminal failure states if last_status != status.status: @@ -151,7 +157,6 @@ def goal_status_callback(self, msg: GoalStatusArray): description=f'Navigation goal ABORTED - path planning or execution failed (goal: {goal_id[:8]})', event_type=EVENT_FAILED ) - self.has_active_goal = False self.get_logger().warn(f'Navigation goal {goal_id[:8]} ABORTED') elif status.status == GoalStatus.STATUS_CANCELED: @@ -161,7 +166,6 @@ def goal_status_callback(self, msg: GoalStatusArray): description=f'Navigation goal CANCELED (goal: {goal_id[:8]})', event_type=EVENT_FAILED ) - self.has_active_goal = False self.get_logger().info(f'Navigation goal {goal_id[:8]} CANCELED') elif status.status == GoalStatus.STATUS_SUCCEEDED: @@ -178,17 +182,19 @@ def goal_status_callback(self, msg: GoalStatusArray): description='Navigation goal succeeded', event_type=EVENT_PASSED ) - self.has_active_goal = False def amcl_pose_callback(self, msg: PoseWithCovarianceStamped): """Monitor AMCL localization covariance.""" # Calculate covariance magnitude from position covariance (indices 0, 7 for x, y variance) + # cov[0] and cov[7] are already variances; use their sum to compute a position stddev magnitude cov = msg.pose.covariance - self.current_covariance = math.sqrt(cov[0]**2 + cov[7]**2) + self.current_covariance = math.sqrt(cov[0] + cov[7]) - now = time.time() - if now - self.last_covariance_report_time < self.report_throttle_sec: - return + now = self.get_clock().now() + if self.last_covariance_report_time is not None: + elapsed = (now - self.last_covariance_report_time).nanoseconds / 1e9 + if elapsed < self.report_throttle_sec: + return if self.current_covariance > self.covariance_error_threshold: self.report_fault( @@ -217,7 +223,7 @@ def amcl_pose_callback(self, msg: PoseWithCovarianceStamped): description='AMCL localization normal', event_type=EVENT_PASSED ) - self.last_covariance_report_time = now + self.last_covariance_report_time = now def odom_callback(self, msg: Odometry): """Track robot position for progress monitoring.""" @@ -229,7 +235,7 @@ def odom_callback(self, msg: Odometry): distance = math.sqrt((x - last_x)**2 + (y - last_y)**2) if distance > self.min_progress_distance: - self.last_progress_time = time.time() + self.last_progress_time = self.get_clock().now() # Clear no-progress fault if robot is moving if 'NAVIGATION_NO_PROGRESS' in self.active_faults: self.report_fault( @@ -246,11 +252,19 @@ def check_timer_callback(self): if not self.has_active_goal: return - now = time.time() - time_since_progress = now - self.last_progress_time + if self.last_progress_time is None: + return + + now = self.get_clock().now() + time_since_progress = (now - self.last_progress_time).nanoseconds / 1e9 if time_since_progress > self.no_progress_timeout_sec: - if now - self.last_no_progress_report_time > self.report_throttle_sec: + can_report = True + if self.last_no_progress_report_time is not None: + elapsed = (now - self.last_no_progress_report_time).nanoseconds / 1e9 + can_report = elapsed > self.report_throttle_sec + + if can_report: self.report_fault( fault_code='NAVIGATION_NO_PROGRESS', severity=SEVERITY_WARN, @@ -298,7 +312,7 @@ def main(args=None): try: rclpy.spin(node) except KeyboardInterrupt: - pass + node.get_logger().info('AnomalyDetectorNode interrupted by user (KeyboardInterrupt), shutting down.') finally: node.destroy_node() rclpy.shutdown() diff --git a/demos/turtlebot3_integration/send-nav-goal.sh b/demos/turtlebot3_integration/send-nav-goal.sh index 4335e5a..5e8206a 100755 --- a/demos/turtlebot3_integration/send-nav-goal.sh +++ b/demos/turtlebot3_integration/send-nav-goal.sh @@ -65,7 +65,7 @@ echo "" RESPONSE=$(curl -s -X POST "${API_BASE}/apps/bt-navigator/operations/navigate_to_pose/executions" \ -H "Content-Type: application/json" \ -d "{ - \"request\": { + \"goal\": { \"pose\": { \"header\": {\"frame_id\": \"map\"}, \"pose\": {