diff --git a/dimos/agents/skills/osm.py b/dimos/agents/skills/osm.py index a89e86044f..feb61844b9 100644 --- a/dimos/agents/skills/osm.py +++ b/dimos/agents/skills/osm.py @@ -13,6 +13,8 @@ # limitations under the License. +from typing import Any + from dimos.agents.annotation import skill from dimos.core.module import Module from dimos.core.stream import In @@ -31,8 +33,8 @@ class OsmSkill(Module): gps_location: In[LatLon] - def __init__(self) -> None: - super().__init__() + def __init__(self, **kwargs: Any) -> None: + super().__init__(**kwargs) self._latest_location = None self._current_location_map = CurrentLocationMap(QwenVlModel()) diff --git a/dimos/robot/all_blueprints.py b/dimos/robot/all_blueprints.py index 03a1f47f2a..f4cef25000 100644 --- a/dimos/robot/all_blueprints.py +++ b/dimos/robot/all_blueprints.py @@ -47,7 +47,11 @@ "demo-osm": "dimos.mapping.osm.demo_osm:demo_osm", "demo-skill": "dimos.agents.skills.demo_skill:demo_skill", "drone-agentic": "dimos.robot.drone.blueprints.agentic.drone_agentic:drone_agentic", + "drone-agentic-gazebo": "dimos.robot.drone.blueprints.agentic.drone_agentic:drone_agentic_gazebo", + "drone-agentic-gazebo-spatial": "dimos.robot.drone.blueprints.agentic.drone_agentic:drone_agentic_gazebo_spatial", "drone-basic": "dimos.robot.drone.blueprints.basic.drone_basic:drone_basic", + "drone-basic-gazebo": "dimos.robot.drone.blueprints.basic.drone_basic:drone_basic_gazebo", + "drone-basic-gazebo-spatial": "dimos.robot.drone.blueprints.basic.drone_basic:drone_basic_gazebo_spatial", "dual-xarm6-planner": "dimos.manipulation.blueprints:dual_xarm6_planner", "keyboard-teleop-piper": "dimos.robot.manipulators.piper.blueprints:keyboard_teleop_piper", "keyboard-teleop-xarm6": "dimos.robot.manipulators.xarm.blueprints:keyboard_teleop_xarm6", diff --git a/dimos/robot/drone/README.md b/dimos/robot/drone/README.md index a158586940..bf655129b3 100644 --- a/dimos/robot/drone/README.md +++ b/dimos/robot/drone/README.md @@ -19,6 +19,12 @@ dimos run drone-basic --set outdoor=true # Agentic with LLM control dimos run drone-agentic + +# Gazebo + ArduPilot SITL +dimos run drone-basic-gazebo +dimos run drone-agentic-gazebo +dimos run drone-basic-gazebo-spatial +dimos run drone-agentic-gazebo-spatial # + spatial memory + navigate ``` To interact with the agent, run `dimos humancli` in a separate terminal. @@ -48,6 +54,27 @@ Composes on top of `drone-basic`, adding autonomous capabilities: | `Agent` | LLM agent (default: GPT-4o) | | `WebInput` | Web/CLI interface for human commands | +### `drone-basic-gazebo` / `drone-agentic-gazebo` / `drone-basic-gazebo-spatial` / `drone-agentic-gazebo-spatial` +Same as the non-Gazebo blueprints but with video from Gazebo (RTP/H264 on UDP 5600). Odometry from MAVLink; when SITL sends `LOCAL_POSITION_NED` it is used for position. Spatial variants add `SpatialMemory`. **`drone-agentic-gazebo-spatial`** also adds `DroneSpatialNavSkill` with **`navigate_to_where_i_saw(description)`**: text-search remembered views and send a local NED position target to the drone. + +## Running with Gazebo + ArduPilot + +1. **Install ArduPilot SITL** and a GCS (e.g. MAVProxy). See the [ArduPilot development wiki](https://ardupilot.org/dev/). +2. **Install Gazebo and the ArduPilot Gazebo plugin** (Iris quad, camera streaming, etc.): [Using SITL with Gazebo](https://ardupilot.org/dev/docs/sitl-with-gazebo.html). Set `GZ_SIM_SYSTEM_PLUGIN_PATH` and `GZ_SIM_RESOURCE_PATH` as described there. +3. **Start Gazebo** with the Iris world (camera streams to UDP 5600): + ```bash + gz sim -v4 -r iris_runway.sdf + ``` + Enable camera streaming if needed: + ```bash + gz topic -t /world/iris_runway/model/iris_with_gimbal/model/gimbal/link/pitch_link/sensor/camera/image/enable_streaming -m gz.msgs.Boolean -p "data: 1" + ``` +4. **Start SITL** in another terminal: + ```bash + sim_vehicle.py -v ArduCopter -f gazebo-iris --model JSON --map --console + ``` +5. **Run DimOS** (e.g. `dimos run drone-agentic-gazebo`). MAVLink: default UDP 14550. Video: port 5600. + ## Installation ### Python (included with DimOS) diff --git a/dimos/robot/drone/blueprints/agentic/drone_agentic.py b/dimos/robot/drone/blueprints/agentic/drone_agentic.py index f94af1ac5c..47f5bdcb86 100644 --- a/dimos/robot/drone/blueprints/agentic/drone_agentic.py +++ b/dimos/robot/drone/blueprints/agentic/drone_agentic.py @@ -25,7 +25,12 @@ from dimos.agents.skills.osm import OsmSkill from dimos.agents.web_human_input import WebInput from dimos.core.blueprints import autoconnect -from dimos.robot.drone.blueprints.basic.drone_basic import drone_basic +from dimos.robot.drone.blueprints.basic.drone_basic import ( + drone_basic, + drone_basic_gazebo, + drone_basic_gazebo_spatial, +) +from dimos.robot.drone.drone_spatial_nav_skill import DroneSpatialNavSkill from dimos.robot.drone.drone_tracking_module import DroneTrackingModule DRONE_SYSTEM_PROMPT = """\ @@ -33,6 +38,12 @@ You have access to drone control skills you are already flying so only run move_twist, set_mode, and fly_to. When the user gives commands, use the appropriate skills to control the drone. Always confirm actions and report results. Send fly_to commands only at above 200 meters altitude to be safe. + +Motion skills: move(x, y, z, duration) and move_forward(distance, speed) use velocity commands. +For direct position use go_to_position(x, y, z, vx_ff, vy_ff, vz_ff): position in local NED (m; z negative = up) with optional velocity feedforward. +Yaw control: use rotate_to(heading_deg) to turn to a heading (0–360°, 0=North, 90=East). Remember you have this for "turn left/right", "face North", etc. +Spatial memory (when available): navigate_to_where_i_saw(description) searches remembered camera views by text and flies toward the best matching stored position. + Here are some GPS locations to remember 6th and Natoma intersection: 37.78019978319006, -122.40770815020853, 454 Natoma (Office): 37.780967465525244, -122.40688342010769 @@ -53,7 +64,38 @@ ] ) +drone_agentic_gazebo = autoconnect( + drone_basic_gazebo, + DroneTrackingModule.blueprint(outdoor=False), + GoogleMapsSkillContainer.blueprint(), + OsmSkill.blueprint(), + Agent.blueprint(system_prompt=DRONE_SYSTEM_PROMPT, model="gpt-4o-mini"), + WebInput.blueprint(), +).remappings( + [ + (DroneTrackingModule, "video_input", "video"), + (DroneTrackingModule, "cmd_vel", "movecmd_twist"), + ] +) + +drone_agentic_gazebo_spatial = autoconnect( + drone_basic_gazebo_spatial, + DroneTrackingModule.blueprint(outdoor=False), + GoogleMapsSkillContainer.blueprint(), + OsmSkill.blueprint(), + DroneSpatialNavSkill.blueprint(), + agent(system_prompt=DRONE_SYSTEM_PROMPT, model="gpt-4o-mini"), + web_input(), +).remappings( + [ + (DroneTrackingModule, "video_input", "video"), + (DroneTrackingModule, "cmd_vel", "movecmd_twist"), + ] +) + __all__ = [ "DRONE_SYSTEM_PROMPT", "drone_agentic", + "drone_agentic_gazebo", + "drone_agentic_gazebo_spatial", ] diff --git a/dimos/robot/drone/blueprints/basic/drone_basic.py b/dimos/robot/drone/blueprints/basic/drone_basic.py index fbe6621ae1..7ba582e757 100644 --- a/dimos/robot/drone/blueprints/basic/drone_basic.py +++ b/dimos/robot/drone/blueprints/basic/drone_basic.py @@ -20,6 +20,7 @@ from dimos.core.blueprints import autoconnect from dimos.core.global_config import global_config +from dimos.perception.spatial_perception import spatial_memory from dimos.protocol.pubsub.impl.lcmpubsub import LCM from dimos.robot.drone.camera_module import DroneCameraModule from dimos.robot.drone.connection_module import DroneConnectionModule @@ -44,7 +45,7 @@ def _drone_rerun_blueprint() -> Any: return rrb.Blueprint( rrb.Horizontal( - rrb.Spatial2DView(origin="world/video", name="Camera"), + rrb.Spatial2DView(origin="world/color_image", name="Camera"), rrb.Spatial3DView( origin="world", name="3D", @@ -95,6 +96,25 @@ def _drone_rerun_blueprint() -> Any: WebsocketVisModule.blueprint(), ) +drone_basic_gazebo = autoconnect( + _vis, + DroneConnectionModule.blueprint( + connection_string=connection_string, + video_port=video_port, + video_source="gazebo", + outdoor=False, + ), + DroneCameraModule.blueprint(camera_intrinsics=[1000.0, 1000.0, 960.0, 540.0]), + websocket_vis(), +) + +drone_basic_gazebo_spatial = autoconnect( + drone_basic_gazebo, + spatial_memory(), +) + __all__ = [ "drone_basic", + "drone_basic_gazebo", + "drone_basic_gazebo_spatial", ] diff --git a/dimos/robot/drone/connection_module.py b/dimos/robot/drone/connection_module.py index 863f719bad..e0cf33d32e 100644 --- a/dimos/robot/drone/connection_module.py +++ b/dimos/robot/drone/connection_module.py @@ -15,7 +15,6 @@ """DimOS module wrapper for drone connection.""" -from collections.abc import Generator import json import threading import time @@ -36,6 +35,7 @@ from dimos.msgs.geometry_msgs.Vector3 import Vector3 from dimos.msgs.sensor_msgs.Image import Image from dimos.robot.drone.dji_video_stream import DJIDroneVideoStream +from dimos.robot.drone.gazebo_video_stream import GazeboVideoStream from dimos.robot.drone.mavlink_connection import MavlinkConnection from dimos.utils.logging_config import setup_logger @@ -52,6 +52,7 @@ def _add_disposable(composite: CompositeDisposable, item: Disposable | Any) -> N class Config(ModuleConfig): connection_string: str = "udp:0.0.0.0:14550" video_port: int = 5600 + video_source: str = "dji" outdoor: bool = False @@ -92,13 +93,14 @@ def __init__(self, **kwargs: Any) -> None: """ super().__init__(**kwargs) self.connection: MavlinkConnection | None = None - self.video_stream: DJIDroneVideoStream | None = None + self.video_stream: DJIDroneVideoStream | GazeboVideoStream | None = None self._latest_video_frame = None self._latest_telemetry = None self._latest_status = None self._latest_status_lock = threading.RLock() self._running = False self._telemetry_thread: threading.Thread | None = None + self._last_twist_log_time: float = 0.0 @rpc def start(self) -> None: @@ -116,7 +118,10 @@ def start(self) -> None: ) self.connection.connect() - self.video_stream = DJIDroneVideoStream(port=self.config.video_port) + if self.config.video_source == "gazebo": + self.video_stream = GazeboVideoStream(port=self.config.video_port) + else: + self.video_stream = DJIDroneVideoStream(port=self.config.video_port) if not self.connection.connected: logger.error("Failed to connect to drone") @@ -282,6 +287,56 @@ def move(self, x: float = 0.0, y: float = 0.0, z: float = 0.0, duration: float = if self.connection: self.connection.move(Vector3(x, y, z), duration) + @skill + def move_forward(self, distance: float, speed: float = 0.3) -> None: + """Move forward by a given distance in meters. + + Args: + distance: Distance to move in meters (positive = forward). + speed: Forward speed in m/s. Default 0.3. + """ + if self.connection and abs(distance) >= 0.01: + duration = abs(distance) / max(0.01, speed) + vx = speed if distance >= 0 else -speed + self.connection.move(Vector3(vx, 0.0, 0.0), duration) + + @skill + def go_to_position( + self, x: float, y: float, z: float, + vx_ff: float = 0.0, vy_ff: float = 0.0, vz_ff: float = 0.0, + ) -> bool: + """Send position target in local NED with optional velocity feedforward. + + Args: + x: North position in meters (local NED). + y: East position in meters (local NED). + z: Down position in meters (negative = up). + vx_ff: Velocity feedforward North (m/s). Optional. + vy_ff: Velocity feedforward East (m/s). Optional. + vz_ff: Velocity feedforward Down (m/s). Optional. + + Returns: + True if position target sent successfully + """ + if self.connection: + return self.connection.set_position_target(x, y, z, vx_ff, vy_ff, vz_ff) + return False + + @skill + def rotate_to(self, heading_deg: float, timeout: float = 60.0) -> bool: + """Rotate drone to a specific heading (yaw control). + + Args: + heading_deg: Target heading in degrees (0–360, 0=North, 90=East). + timeout: Max time to rotate in seconds. + + Returns: + True if rotation completed successfully + """ + if self.connection: + return self.connection.rotate_to(heading_deg, timeout) + return False + @skill def takeoff(self, altitude: float = 3.0) -> bool: """Takeoff to specified altitude. @@ -387,49 +442,44 @@ def fly_to(self, lat: float, lon: float, alt: float) -> str: @skill def follow_object( - self, object_description: str, duration: float = 120.0 - ) -> Generator[str, None, None]: + self, object_description: str, duration: float = 0.0 + ) -> str: """Follow an object with visual servoing. Example: - follow_object(object_description="red car", duration=120) + follow_object(object_description="red car", duration=0) # 0 = track until stop_follow Args: object_description (str): A short and clear description of the object. - duration (float, optional): How long to track for. Defaults to 120.0. + duration (float, optional): Max tracking time in seconds; 0 = until stop_follow. Defaults to 0. """ msg = {"object_description": object_description, "duration": duration} self.follow_object_cmd.publish(String(json.dumps(msg))) - yield "Started trying to track. First, trying to find the object." - + # Poll for initial tracking result (found / not found / timeout) start_time = time.time() + timeout = 30.0 if duration <= 0 else min(duration, 30.0) - started_tracking = False - - while time.time() - start_time < duration: - time.sleep(0.01) + while time.time() - start_time < timeout: + time.sleep(0.1) with self._latest_status_lock: if not self._latest_status: continue match self._latest_status.get("status"): case "not_found" | "failed": - yield f"The '{object_description}' object has not been found. Stopped tracking." - break + return f"The '{object_description}' object has not been found. Stopped tracking." case "tracking": - # Only return tracking once. - if not started_tracking: - started_tracking = True - yield f"The '{object_description}' object is now being followed." + return ( + f"Now following '{object_description}'." + + (" Run stop_follow to stop." if duration <= 0 else f" For up to {duration}s.") + ) case "lost": - yield f"The '{object_description}' object has been lost. Stopped tracking." - break + return f"The '{object_description}' object was lost. Stopped tracking." case "stopped": - yield f"Tracking '{object_description}' has stopped." - break - else: - yield f"Stopped tracking '{object_description}'" + return f"Tracking '{object_description}' has stopped." + + return f"Started tracking command for '{object_description}'. No status update yet." def _on_move_twist(self, msg: Twist) -> None: """Handle Twist movement commands from tracking/navigation. @@ -438,8 +488,14 @@ def _on_move_twist(self, msg: Twist) -> None: msg: Twist message with linear and angular velocities """ if self.connection: - # Use move_twist to properly handle Twist messages - self.connection.move_twist(msg, duration=0, lock_altitude=True) + now = time.time() + if now - self._last_twist_log_time >= 0.5: + self._last_twist_log_time = now + logger.info( + "follow→ardupilot: linear x=%.3f y=%.3f z=%.3f yaw_rate=%.3f (m/s, rad/s)", + msg.linear.x, msg.linear.y, msg.linear.z, msg.angular.z, + ) + self.connection.move_twist(msg, duration=0, lock_altitude=False) def _on_gps_goal(self, cmd: LatLon) -> None: if self._latest_telemetry is None or self.connection is None: diff --git a/dimos/robot/drone/drone_spatial_nav_skill.py b/dimos/robot/drone/drone_spatial_nav_skill.py new file mode 100644 index 0000000000..14edea6666 --- /dev/null +++ b/dimos/robot/drone/drone_spatial_nav_skill.py @@ -0,0 +1,115 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Agent skill: text query spatial memory to navigate with position target.""" + +from typing import Any + +from dimos.agents.annotation import skill +from dimos.core.core import rpc +from dimos.core.module import Module +from dimos.robot.drone.specs import DroneGoToPositionSpec, SpatialMemoryQuerySpec + + +def _world_pose_to_local_ned(pos_x: float, pos_y: float, pos_z: float) -> tuple[float, float, float]: + """Map world pose (same as mavlink integrated pose / TF) to MAV_FRAME_LOCAL_NED. + + Matches dimos/robot/drone/mavlink_connection.py: internal x=North, y=-East, z=up. + """ + ned_x = pos_x + ned_y = -pos_y + ned_z = -pos_z + return ned_x, ned_y, ned_z + + +def _metadata_dict(raw: Any) -> dict[str, Any] | None: + if raw is None: + return None + if isinstance(raw, list) and raw: + raw = raw[0] + if isinstance(raw, dict): + return raw + return None + + +def _extract_xyz(metadata: dict[str, Any]) -> tuple[float, float, float] | None: + if "pos_x" in metadata and "pos_y" in metadata and "pos_z" in metadata: + return ( + float(metadata["pos_x"]), + float(metadata["pos_y"]), + float(metadata["pos_z"]), + ) + return None + + +class DroneSpatialNavSkill(Module): + """Bridges SpatialMemory text search and drone position commands.""" + + _memory: SpatialMemoryQuerySpec + _drone: DroneGoToPositionSpec + + @rpc + def start(self) -> None: + super().start() + + @skill + def navigate_to_where_i_saw( + self, description: str, match_index: int = 0, limit: int = 5 + ) -> str: + """Fly toward a place remembered from vision using a text description. + + Queries spatial memory (CLIP) for frames matching the description, takes the + stored robot pose for the chosen match, converts it to local NED, and sends a + position target to the drone. + + Args: + description: What to look for in stored views (e.g. "red building", "runway"). + match_index: Which result to use (0 = best match, 1 = second, etc.). + limit: How many spatial-memory candidates to retrieve. + + Returns: + Result message for the agent. + """ + results = self._memory.query_by_text(description.strip(), limit=limit) + if not results: + return f"No spatial memory matches for: {description!r}" + + if match_index < 0 or match_index >= len(results): + return ( + f"match_index {match_index} out of range; query returned {len(results)} result(s) (0..{len(results) - 1})." + ) + + entry = results[match_index] + meta_raw = entry.get("metadata", {}) + meta = _metadata_dict(meta_raw) + if meta is None: + return "Spatial memory entry has no usable metadata." + + xyz = _extract_xyz(meta) + if xyz is None: + return ( + "Could not read pos_x/pos_y/pos_z from spatial memory metadata for this match." + ) + + ned = _world_pose_to_local_ned(*xyz) + ok = self._drone.go_to_position(ned[0], ned[1], ned[2], 0.0, 0.0, 0.0) + if ok: + return ( + f"Sent position target toward spatial memory match {match_index} for {description!r}: " + f"NED ({ned[0]:.2f}, {ned[1]:.2f}, {ned[2]:.2f}) m." + ) + return "Failed to send position target (no connection or FC rejected)." + + +__all__ = ["DroneSpatialNavSkill"] diff --git a/dimos/robot/drone/drone_tracking_module.py b/dimos/robot/drone/drone_tracking_module.py index 5798db374b..fe790b5804 100644 --- a/dimos/robot/drone/drone_tracking_module.py +++ b/dimos/robot/drone/drone_tracking_module.py @@ -63,6 +63,7 @@ def __init__( x_pid_params: PIDParams | None = None, y_pid_params: PIDParams | None = None, z_pid_params: PIDParams | None = None, + **kwargs: Any, ) -> None: """Initialize the drone tracking module. @@ -75,7 +76,7 @@ def __init__( If None, uses preset based on outdoor flag. z_pid_params: Optional PID parameters for altitude control. """ - super().__init__() + super().__init__(**kwargs) default_params = OUTDOOR_PID_PARAMS if outdoor else INDOOR_PID_PARAMS x_pid_params = x_pid_params if x_pid_params is not None else default_params @@ -84,8 +85,12 @@ def __init__( self._outdoor = outdoor self._max_velocity = None if outdoor else INDOOR_MAX_VELOCITY + # Gazebo and typical sim use front camera; keep forward_camera=True self.servoing_controller = DroneVisualServoingController( - x_pid_params=x_pid_params, y_pid_params=y_pid_params, z_pid_params=z_pid_params + x_pid_params=x_pid_params, + y_pid_params=y_pid_params, + z_pid_params=z_pid_params, + forward_camera=True, ) # Tracking state @@ -135,13 +140,16 @@ def stop(self) -> None: self._stop_tracking() super().stop() + # Forward velocity (vx) is sent only for this many seconds after tracking start; then vx=0, tracking continues. + FORWARD_VELOCITY_DURATION = 10.0 + @rpc - def track_object(self, object_name: str | None = None, duration: float = 120.0) -> str: + def track_object(self, object_name: str | None = None, duration: float = 0.0) -> str: """Track and follow an object using visual servoing. Args: object_name: Name of object to track, or None for most prominent - duration: Maximum tracking duration in seconds + duration: Maximum tracking duration in seconds; 0 = run until stopped (e.g. stop_follow) Returns: String status message @@ -206,17 +214,17 @@ def _visual_servoing_loop(self, tracker: Any, duration: float) -> None: Args: tracker: OpenCV tracker instance - duration: Maximum duration in seconds + duration: Maximum duration in seconds; <=0 means run until _tracking_active is False """ start_time = time.time() frame_count = 0 lost_track_count = 0 max_lost_frames = 100 - logger.info("Starting visual servoing loop") + logger.info("Starting visual servoing loop (forward vx for %.0fs, then vx=0)", self.FORWARD_VELOCITY_DURATION) try: - while self._tracking_active and (time.time() - start_time < duration): + while self._tracking_active and (duration <= 0 or (time.time() - start_time < duration)): # Get latest frame frame = self._get_latest_frame() if frame is None: @@ -252,27 +260,36 @@ def _visual_servoing_loop(self, tracker: Any, duration: float) -> None: center_x = frame_width / 2 center_y = frame_height / 2 - # Compute velocity commands - vx, vy, vz = self.servoing_controller.compute_velocity_control( + # Compute commands: vx (const), vz (vertical error), yaw_rate (lateral error); vy=0 + vx, vy, vz, yaw_rate = self.servoing_controller.compute_velocity_control( target_x=current_x, target_y=current_y, center_x=center_x, center_y=center_y, dt=0.033, # ~30Hz - lock_altitude=True, + lock_altitude=False, ) + # Forward only for first N seconds; then vx=0 (tracking and vz/yaw_rate continue) + if time.time() - start_time >= self.FORWARD_VELOCITY_DURATION: + vx = 0.0 - # Clamp velocity for indoor safety + # Clamp linear velocity for indoor safety if self._max_velocity is not None: vx = max(-self._max_velocity, min(self._max_velocity, vx)) vy = max(-self._max_velocity, min(self._max_velocity, vy)) + vz = max(-self._max_velocity, min(self._max_velocity, vz)) - # Publish velocity command via LCM + # Publish: vx, vz, yaw_rate (vy=0) if self.cmd_vel.transport: twist = Twist() - twist.linear = Vector3(vx, vy, 0) - twist.angular = Vector3(0, 0, 0) # No rotation for now + twist.linear = Vector3(vx, vy, vz) + twist.angular = Vector3(0, 0, yaw_rate) self.cmd_vel.publish(twist) + if frame_count % 30 == 0: + logger.info( + "follow_object cmd: vx=%.3f vz=%.3f yaw_rate=%.3f (m/s, rad/s)", + vx, vz, yaw_rate, + ) # Publish visualization if transport is set if self.tracking_overlay.transport: diff --git a/dimos/robot/drone/drone_visual_servoing_controller.py b/dimos/robot/drone/drone_visual_servoing_controller.py index 72e47331f7..859b87cecb 100644 --- a/dimos/robot/drone/drone_visual_servoing_controller.py +++ b/dimos/robot/drone/drone_visual_servoing_controller.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Minimal visual servoing controller for drone with downward-facing camera.""" +"""Minimal visual servoing controller for drone camera (forward- or downward-facing).""" from typing import TypeAlias @@ -23,25 +23,52 @@ class DroneVisualServoingController: - """Minimal visual servoing for downward-facing drone camera using velocity-only control.""" + """Visual servoing for drone camera using velocity-only control.""" + + # Constant forward speed (m/s) while following; forward from image will be added later. + DEFAULT_FORWARD_SPEED = 0.2 + # Gain: vertical pixel error -> vz (m/s). NED: positive = down. Object below center -> descend. + DEFAULT_VERTICAL_ERROR_GAIN = 0.0012 + MAX_VZ = 0.45 # m/s + # Gain: lateral pixel error -> yaw rate (rad/s). Object right of center -> positive yaw_rate (turn right). + DEFAULT_LATERAL_ERROR_TO_YAW_RATE = 0.001 + MAX_YAW_RATE = 0.5 # rad/s def __init__( self, x_pid_params: PIDParams, y_pid_params: PIDParams, z_pid_params: PIDParams | None = None, + forward_camera: bool = True, + forward_speed: float | None = None, + vertical_error_gain: float | None = None, + lateral_error_to_yaw_rate: float | None = None, ) -> None: """ Initialize drone visual servoing controller. Args: - x_pid_params: (kp, ki, kd, output_limits, integral_limit, deadband) for forward/back - y_pid_params: (kp, ki, kd, output_limits, integral_limit, deadband) for left/right - z_pid_params: Optional params for altitude control + x_pid_params: Reserved for forward from image later. + y_pid_params: Reserved (lateral error drives yaw rate, not strafe). + z_pid_params: Optional; unused when using vertical_error_gain. + forward_camera: Reserved for later. + forward_speed: Constant vx (m/s). Default 0.2. + vertical_error_gain: Image vertical error (px) -> vz. Default 0.0008. + lateral_error_to_yaw_rate: Image lateral error (px) -> yaw_rate (rad/s). Default 0.001. """ self.x_pid = PIDController(*x_pid_params) self.y_pid = PIDController(*y_pid_params) self.z_pid = PIDController(*z_pid_params) if z_pid_params else None + self.forward_camera = forward_camera + self.forward_speed = forward_speed if forward_speed is not None else self.DEFAULT_FORWARD_SPEED + self.vertical_error_gain = ( + vertical_error_gain if vertical_error_gain is not None else self.DEFAULT_VERTICAL_ERROR_GAIN + ) + self.lateral_error_to_yaw_rate = ( + lateral_error_to_yaw_rate + if lateral_error_to_yaw_rate is not None + else self.DEFAULT_LATERAL_ERROR_TO_YAW_RATE + ) def compute_velocity_control( self, @@ -53,44 +80,44 @@ def compute_velocity_control( desired_z: float | None = None, # Optional altitude control dt: float = 0.1, lock_altitude: bool = True, - ) -> tuple[float, float, float]: + ) -> tuple[float, float, float, float]: """ - Compute velocity commands to center target in camera view. + Compute velocity and yaw-rate commands to center target in camera view. - For downward camera: - - Image X error -> Drone Y velocity (left/right strafe) - - Image Y error -> Drone X velocity (forward/backward) + - vx: constant forward speed (no lateral velocity vy). + - Image X error -> yaw rate (rad/s): object right of center -> turn right. + - Image Y error -> vz (altitude): object below center -> descend (NED). Args: target_x: Target X position in image target_y: Target Y position in image center_x: Desired X position (default 0) center_y: Desired Y position (default 0) - target_z: Current altitude (optional) - desired_z: Desired altitude (optional) - dt: Time step + target_z: Unused + desired_z: Unused + dt: Time step (unused for proportional gains) lock_altitude: If True, vz will always be 0 Returns: - tuple: (vx, vy, vz) velocities in m/s + tuple: (vx, vy, vz, yaw_rate) — vy is always 0; yaw_rate in rad/s. """ - # Compute errors (positive = target is to the right/below center) - error_x = target_x - center_x # Lateral error in image - error_y = target_y - center_y # Forward error in image - - # PID control (swap axes for downward camera) - # For downward camera: object below center (positive error_y) = object is behind drone - # Need to negate: positive error_y should give negative vx (move backward) - vy = self.y_pid.update(error_x, dt) # type: ignore[no-untyped-call] # Image X -> Drone Y (strafe) - vx = -self.x_pid.update(error_y, dt) # type: ignore[no-untyped-call] # Image Y -> Drone X (NEGATED) - - # Optional altitude control - vz = 0.0 - if not lock_altitude and self.z_pid and target_z is not None and desired_z is not None: - error_z = target_z - desired_z - vz = self.z_pid.update(error_z, dt) # type: ignore[no-untyped-call] - - return vx, vy, vz + error_x = target_x - center_x # Lateral: positive = target right of center + error_y = target_y - center_y # Vertical: positive = target below center + + vx = self.forward_speed + vy = 0.0 + + # Lateral error -> yaw rate (turn toward target). Right of center -> positive yaw_rate. + yaw_rate = self.lateral_error_to_yaw_rate * error_x + yaw_rate = max(-self.MAX_YAW_RATE, min(self.MAX_YAW_RATE, yaw_rate)) + + if lock_altitude: + vz = 0.0 + else: + vz = self.vertical_error_gain * error_y + vz = max(-self.MAX_VZ, min(self.MAX_VZ, vz)) + + return vx, vy, vz, yaw_rate def reset(self) -> None: """Reset all PID controllers.""" diff --git a/dimos/robot/drone/gazebo_video_stream.py b/dimos/robot/drone/gazebo_video_stream.py new file mode 100644 index 0000000000..4318b5c5cf --- /dev/null +++ b/dimos/robot/drone/gazebo_video_stream.py @@ -0,0 +1,128 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Gazebo RTP/H264 video stream via GStreamer (e.g. ArduPilot SITL).""" + +import subprocess +import threading +import time + +import numpy as np +from reactivex import Subject + +from dimos.msgs.sensor_msgs.Image import Image, ImageFormat +from dimos.utils.logging_config import setup_logger + +logger = setup_logger() + +GAZEBO_RTP_CAPS = "application/x-rtp,media=(string)video,clock-rate=(int)90000,encoding-name=(string)H264" + + +class GazeboVideoStream: + def __init__(self, port: int = 5600, width: int = 640, height: int = 360) -> None: + self.port = port + self.width = width + self.height = height + self._video_subject: Subject[Image] = Subject() + self._process: subprocess.Popen[bytes] | None = None + self._stop_event = threading.Event() + + def start(self) -> bool: + try: + cmd = [ + "gst-launch-1.0", + "-q", + "udpsrc", + f"port={self.port}", + "!", + GAZEBO_RTP_CAPS, + "!", + "rtph264depay", + "!", + "h264parse", + "!", + "avdec_h264", + "!", + "videoscale", + "!", + f"video/x-raw,width={self.width},height={self.height}", + "!", + "videoconvert", + "!", + "video/x-raw,format=RGB", + "!", + "filesink", + "location=/dev/stdout", + "buffer-mode=2", + ] + self._process = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=0 + ) + self._stop_event.clear() + threading.Thread(target=self._capture_loop, daemon=True).start() + threading.Thread(target=self._error_monitor, daemon=True).start() + logger.info("Gazebo video started port %s", self.port) + return True + except Exception as e: + logger.error("Gazebo video failed: %s", e) + return False + + def _capture_loop(self) -> None: + channels = 3 + frame_size = self.width * self.height * channels + while not self._stop_event.is_set(): + try: + frame_data = b"" + bytes_needed = frame_size + while bytes_needed > 0 and not self._stop_event.is_set(): + if self._process is None or self._process.stdout is None: + break + chunk = self._process.stdout.read(bytes_needed) + if not chunk: + time.sleep(0.1) + break + frame_data += chunk + bytes_needed -= len(chunk) + if len(frame_data) == frame_size: + frame = np.frombuffer(frame_data, dtype=np.uint8).reshape( + (self.height, self.width, channels) + ) + self._video_subject.on_next(Image.from_numpy(frame, format=ImageFormat.RGB)) + except Exception as e: + if not self._stop_event.is_set(): + logger.error("Gazebo capture error: %s", e) + time.sleep(0.1) + + def _error_monitor(self) -> None: + while not self._stop_event.is_set() and self._process and self._process.stderr: + line = self._process.stderr.readline() + if not line: + continue + s = line.decode("utf-8", errors="replace").strip() + if "ERROR" in s or "WARNING" in s: + logger.warning("GStreamer: %s", s) + + def stop(self) -> None: + self._stop_event.set() + if self._process: + self._process.terminate() + try: + self._process.wait(timeout=2) + except subprocess.TimeoutExpired: + self._process.kill() + self._process = None + logger.info("Gazebo video stopped") + + def get_stream(self) -> Subject[Image]: + return self._video_subject diff --git a/dimos/robot/drone/mavlink_connection.py b/dimos/robot/drone/mavlink_connection.py index 076d9cd369..46366ca0f4 100644 --- a/dimos/robot/drone/mavlink_connection.py +++ b/dimos/robot/drone/mavlink_connection.py @@ -67,6 +67,7 @@ def __init__( # Flag to prevent concurrent fly_to commands self.flying_to_target = False + self._last_move_twist_log_time: float = 0.0 def connect(self) -> bool: """Connect to drone via MAVLink.""" @@ -142,6 +143,9 @@ def update_telemetry(self, timeout: float = 0.1) -> None: elif msg_type == "ATTITUDE": self._publish_odom() + elif msg_type == "LOCAL_POSITION_NED": + self._publish_odom() + self.telemetry[msg_type] = msg_dict self._publish_telemetry() @@ -178,11 +182,17 @@ def _publish_odom(self) -> None: current_time = time.time() dt = current_time - self._last_update - # Get position data from GLOBAL_POSITION_INT + local_ned = self.telemetry.get("LOCAL_POSITION_NED", {}) pos_data = self.telemetry.get("GLOBAL_POSITION_INT", {}) + if not self.outdoor and local_ned: + # NED → world: x North, -y East→West, -z Down→Up + self._position["x"] = local_ned.get("x", self._position["x"]) + self._position["y"] = -local_ned.get("y", -self._position["y"]) + self._position["z"] = -local_ned.get("z", -self._position["z"]) + # Outdoor mode: Use GPS coordinates - if self.outdoor and pos_data: + elif self.outdoor and pos_data: lat = pos_data.get("lat", 0) # Already in degrees from update_telemetry lon = pos_data.get("lon", 0) # Already in degrees from update_telemetry @@ -212,13 +222,13 @@ def _publish_odom(self) -> None: self._position["x"] += vx * dt # North → X (forward) self._position["y"] += -vy * dt # East → -Y (right in ROS, Y points left/West) - # Altitude handling (same for both modes) - if "ALTITUDE" in self.telemetry: - self._position["z"] = self.telemetry["ALTITUDE"].get("altitude_relative", 0) - elif pos_data: - self._position["z"] = pos_data.get( - "relative_alt", 0 - ) # Already in m from update_telemetry + if not (not self.outdoor and local_ned): + if "ALTITUDE" in self.telemetry: + self._position["z"] = self.telemetry["ALTITUDE"].get("altitude_relative", 0) + elif pos_data: + self._position["z"] = pos_data.get( + "relative_alt", 0 + ) # Already in m from update_telemetry self._last_update = current_time @@ -339,8 +349,11 @@ def move(self, velocity: Vector3, duration: float = 0.0) -> bool: def move_twist(self, twist: Twist, duration: float = 0.0, lock_altitude: bool = True) -> bool: """Move using ROS-style Twist commands. + Sent in body frame (MAV_FRAME_BODY_NED): x forward, y right, z down; yaw_rate + positive = turn right. Used by follow and other velocity-based control. + Args: - twist: Twist message with linear velocities (angular.z ignored for now) + twist: Twist message with linear velocities and angular.z (yaw_rate, rad/s) duration: How long to move (0 = single command) lock_altitude: If True, ignore Z velocity and maintain current altitude @@ -353,56 +366,98 @@ def move_twist(self, twist: Twist, duration: float = 0.0, lock_altitude: bool = # Extract velocities forward = twist.linear.x # m/s forward (body frame) right = twist.linear.y # m/s right (body frame) - down = 0.0 if lock_altitude else -twist.linear.z # Lock altitude by default + # Body NED: positive z = down. Controller vz positive = descend. + down = 0.0 if lock_altitude else twist.linear.z + yaw_rate = twist.angular.z # rad/s + + # type_mask: use vx,vy,vz and yaw_rate; ignore position, accel, force, yaw + mask_vel = 0b0000111111000111 + mask_use_yaw_rate = mask_vel & ~mavutil.mavlink.POSITION_TARGET_TYPEMASK_YAW_RATE_IGNORE + + now = time.time() + if now - self._last_move_twist_log_time >= 0.5: + self._last_move_twist_log_time = now + logger.info( + "ardupilot SET_POSITION_TARGET_LOCAL_NED: vx=%.3f vy=%.3f vz=%.3f (body m/s) yaw_rate=%.3f (rad/s)", + forward, right, down, yaw_rate, + ) if duration > 0: - # Send velocity for duration end_time = time.time() + duration while time.time() < end_time: self.mavlink.mav.set_position_target_local_ned_send( - 0, # time_boot_ms + 0, self.mavlink.target_system, self.mavlink.target_component, - mavutil.mavlink.MAV_FRAME_BODY_NED, # Body frame for strafing - 0b0000111111000111, # type_mask - velocities only, no rotation - 0, - 0, - 0, # positions (ignored) - forward, - right, - down, # velocities in m/s - 0, - 0, - 0, # accelerations (ignored) + mavutil.mavlink.MAV_FRAME_BODY_NED, + mask_use_yaw_rate, + 0, 0, 0, + forward, right, down, + 0, 0, 0, 0, - 0, # yaw, yaw_rate (ignored) + yaw_rate, ) - time.sleep(0.05) # 20Hz - # Send stop command + time.sleep(0.05) self.stop() else: - # Send single command for continuous movement self.mavlink.mav.set_position_target_local_ned_send( - 0, # time_boot_ms + 0, self.mavlink.target_system, self.mavlink.target_component, - mavutil.mavlink.MAV_FRAME_BODY_NED, # Body frame for strafing - 0b0000111111000111, # type_mask - velocities only, no rotation - 0, - 0, - 0, # positions (ignored) - forward, - right, - down, # velocities in m/s - 0, - 0, - 0, # accelerations (ignored) + mavutil.mavlink.MAV_FRAME_BODY_NED, + mask_use_yaw_rate, + 0, 0, 0, + forward, right, down, + 0, 0, 0, 0, - 0, # yaw, yaw_rate (ignored) + yaw_rate, ) return True + def set_position_target( + self, + x: float, + y: float, + z: float, + vx_ff: float = 0.0, + vy_ff: float = 0.0, + vz_ff: float = 0.0, + ) -> bool: + """Send position target in local NED with optional velocity feedforward. + + Uses SET_POSITION_TARGET_LOCAL_NED with position + velocity (type_mask + ignores accel, force, yaw). Frame: MAV_FRAME_LOCAL_NED (x North, y East, z Down). + """ + if not self.connected: + return False + mask = ( + mavutil.mavlink.POSITION_TARGET_TYPEMASK_AX_IGNORE + | mavutil.mavlink.POSITION_TARGET_TYPEMASK_AY_IGNORE + | mavutil.mavlink.POSITION_TARGET_TYPEMASK_AZ_IGNORE + | mavutil.mavlink.POSITION_TARGET_TYPEMASK_YAW_IGNORE + | mavutil.mavlink.POSITION_TARGET_TYPEMASK_YAW_RATE_IGNORE + ) + self.mavlink.mav.set_position_target_local_ned_send( + 0, + self.mavlink.target_system, + self.mavlink.target_component, + mavutil.mavlink.MAV_FRAME_LOCAL_NED, + mask, + x, + y, + z, + vx_ff, + vy_ff, + vz_ff, + 0, + 0, + 0, + 0, + 0, + ) + return True + def stop(self) -> bool: """Stop all movement.""" if not self.connected: @@ -452,42 +507,22 @@ def rotate_to(self, target_heading_deg: float, timeout: float = 60.0) -> bool: while time.time() - start_time < timeout: loop_count += 1 - # Don't call update_telemetry - let background thread handle it - # Just read the current telemetry which should be continuously updated - - if "GLOBAL_POSITION_INT" not in self.telemetry: - logger.warning("No GLOBAL_POSITION_INT in telemetry dict") + # Prefer ATTITUDE.yaw (radians) - available in SITL/Gazebo. Fallback: GLOBAL_POSITION_INT.hdg (degrees). + current_heading_deg = None + if "ATTITUDE" in self.telemetry: + yaw_rad = self.telemetry["ATTITUDE"].get("yaw", 0) + current_heading_deg = math.degrees(yaw_rad) + if current_heading_deg is None and "GLOBAL_POSITION_INT" in self.telemetry: + raw_hdg = self.telemetry["GLOBAL_POSITION_INT"].get("hdg", 0) + current_heading_deg = raw_hdg / 100.0 if raw_hdg > 360 else raw_hdg + + if current_heading_deg is None: + logger.warning("No ATTITUDE or GLOBAL_POSITION_INT for heading in rotate_to") time.sleep(0.1) continue - # Debug: Log what's in telemetry - gps_telem = self.telemetry["GLOBAL_POSITION_INT"] - - # Get current heading - check if already converted or still in centidegrees - raw_hdg = gps_telem.get("hdg", 0) - - # Debug logging to figure out the issue - if loop_count % 5 == 0: # Log every 5th iteration - logger.info(f"DEBUG TELEMETRY: raw hdg={raw_hdg}, type={type(raw_hdg)}") - logger.info(f"DEBUG TELEMETRY keys: {list(gps_telem.keys())[:5]}") # First 5 keys - - # Check if hdg is already converted (should be < 360 if in degrees, > 360 if in centidegrees) - if raw_hdg > 360: - logger.info(f"HDG appears to be in centidegrees: {raw_hdg}") - current_heading_deg = raw_hdg / 100.0 - else: - logger.info(f"HDG appears to be in degrees already: {raw_hdg}") - current_heading_deg = raw_hdg - else: - # Normal conversion - if raw_hdg > 360: - current_heading_deg = raw_hdg / 100.0 - else: - current_heading_deg = raw_hdg - # Normalize to 0-360 - if current_heading_deg > 360: - current_heading_deg = current_heading_deg % 360 + current_heading_deg = current_heading_deg % 360 # Calculate heading error (shortest angular distance) heading_error = target_heading_deg - current_heading_deg diff --git a/dimos/robot/drone/specs.py b/dimos/robot/drone/specs.py new file mode 100644 index 0000000000..85afabb8ae --- /dev/null +++ b/dimos/robot/drone/specs.py @@ -0,0 +1,45 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Protocol specs for drone spatial memory wiring.""" + +from typing import Any, Protocol + +from dimos.spec.utils import Spec + + +class SpatialMemoryQuerySpec(Spec, Protocol): + """Spatial memory text search (CLIP).""" + + def query_by_text(self, text: str, limit: int = 5) -> list[dict[str, Any]]: + """Return ranked matches with metadata (pos_x, pos_y, pos_z).""" + ... + + +class DroneGoToPositionSpec(Spec, Protocol): + """Drone local NED position target.""" + + def go_to_position( + self, + x: float, + y: float, + z: float, + vx_ff: float = 0.0, + vy_ff: float = 0.0, + vz_ff: float = 0.0, + ) -> bool: + ... + + +__all__ = ["SpatialMemoryQuerySpec", "DroneGoToPositionSpec"] diff --git a/dimos/robot/drone/test_drone.py b/dimos/robot/drone/test_drone.py index 0b30c22c35..e884a14c70 100644 --- a/dimos/robot/drone/test_drone.py +++ b/dimos/robot/drone/test_drone.py @@ -954,56 +954,53 @@ def test_output_clamping(self) -> None: y_pid_params=(1.0, 0.0, 0.0, (-max_vel, max_vel), None, 0), ) - # Large error should be clamped - vx, vy, _vz = controller.compute_velocity_control( + # vx constant; vy always 0; yaw_rate and vz clamped + vx, vy, vz, yaw_rate = controller.compute_velocity_control( target_x=1000, target_y=1000, center_x=0, center_y=0, dt=0.1 ) - self.assertLessEqual(abs(vx), max_vel) - self.assertLessEqual(abs(vy), max_vel) + self.assertAlmostEqual(vx, controller.forward_speed) + self.assertAlmostEqual(vy, 0.0) + self.assertLessEqual(abs(yaw_rate), controller.MAX_YAW_RATE) - def test_deadband_prevents_integral_windup(self) -> None: - """Deadband prevents integral accumulation for small errors.""" + def test_lateral_error_to_yaw_rate(self) -> None: + """Lateral image error produces proportional yaw_rate; no strafe (vy=0).""" from dimos.robot.drone.drone_visual_servoing_controller import ( DroneVisualServoingController, ) - deadband = 10 # pixels controller = DroneVisualServoingController( - x_pid_params=(0.0, 1.0, 0.0, (-2.0, 2.0), None, deadband), # integral only - y_pid_params=(0.0, 1.0, 0.0, (-2.0, 2.0), None, deadband), + x_pid_params=(0.0, 0.0, 0.0, (-2.0, 2.0), None, 0), + y_pid_params=(0.0, 0.0, 0.0, (-2.0, 2.0), None, 0), ) - # With error inside deadband, integral should stay at zero - for _ in range(10): - controller.compute_velocity_control( - target_x=5, target_y=5, center_x=0, center_y=0, dt=0.1 - ) - - # Integral should be zero since error < deadband - self.assertEqual(controller.x_pid.integral, 0.0) - self.assertEqual(controller.y_pid.integral, 0.0) + # No lateral error -> yaw_rate ~0, vy=0 + _, vy, _, yaw_rate = controller.compute_velocity_control( + target_x=320, target_y=240, center_x=320, center_y=240, dt=0.1 + ) + self.assertAlmostEqual(vy, 0.0) + self.assertAlmostEqual(yaw_rate, 0.0, places=5) + # Object right of center -> positive yaw_rate + _, vy2, _, yaw_rate2 = controller.compute_velocity_control( + target_x=400, target_y=240, center_x=320, center_y=240, dt=0.1 + ) + self.assertAlmostEqual(vy2, 0.0) + self.assertGreater(yaw_rate2, 0) def test_reset_clears_integral(self) -> None: - """reset() clears accumulated integral to prevent windup.""" + """reset() clears PID state (PIDs reserved for future use).""" from dimos.robot.drone.drone_visual_servoing_controller import ( DroneVisualServoingController, ) controller = DroneVisualServoingController( - x_pid_params=(0.0, 1.0, 0.0, (-10.0, 10.0), None, 0), # Only integral + x_pid_params=(0.0, 1.0, 0.0, (-10.0, 10.0), None, 0), y_pid_params=(0.0, 1.0, 0.0, (-10.0, 10.0), None, 0), ) + # Wind up y_pid integral manually (control output no longer uses it) + for _ in range(5): + controller.y_pid.update(100.0, 0.1) # type: ignore[no-untyped-call] + self.assertNotEqual(controller.y_pid.integral, 0.0) - # Accumulate integral by calling multiple times with error - for _ in range(10): - controller.compute_velocity_control( - target_x=100, target_y=100, center_x=0, center_y=0, dt=0.1 - ) - - # Integral should be non-zero - self.assertNotEqual(controller.x_pid.integral, 0.0) - - # Reset should clear it controller.reset() self.assertEqual(controller.x_pid.integral, 0.0) self.assertEqual(controller.y_pid.integral, 0.0) @@ -1027,7 +1024,7 @@ def test_velocity_from_bbox_center_error(self) -> None: frame_center = (320, 180) bbox_center = (400, 180) - vx, vy, _vz = controller.compute_velocity_control( + vx, vy, vz, yaw_rate = controller.compute_velocity_control( target_x=bbox_center[0], target_y=bbox_center[1], center_x=frame_center[0], @@ -1035,7 +1032,8 @@ def test_velocity_from_bbox_center_error(self) -> None: dt=0.1, ) - # Object to the right -> drone should strafe right (positive vy) - self.assertGreater(vy, 0) - # No vertical offset -> vx should be ~0 - self.assertAlmostEqual(vx, 0, places=1) + # Object to the right -> positive yaw_rate (turn right); no strafe + self.assertGreater(yaw_rate, 0) + self.assertAlmostEqual(vy, 0.0) + self.assertAlmostEqual(vx, controller.forward_speed) + self.assertAlmostEqual(vz, 0, places=4) diff --git a/pyproject.toml b/pyproject.toml index 7e2f38546e..5f56378e41 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,7 +67,7 @@ dependencies = [ "structlog>=25.5.0,<26", "colorlog==6.9.0", # Core Msgs - "opencv-python", + "opencv-contrib-python==4.10.0.84", "open3d-unofficial-arm; platform_system == 'Linux' and platform_machine == 'aarch64'", "open3d>=0.18.0; platform_system != 'Linux' or platform_machine != 'aarch64'", # CLI @@ -322,7 +322,7 @@ docker = [ "pydantic-settings>=2.11.0,<3", "typer>=0.19.2,<1", "requests>=2.28", - "opencv-python-headless", + "opencv-contrib-python==4.10.0.84", "lcm", "sortedcontainers", "PyTurboJPEG", diff --git a/uv.lock b/uv.lock index 529842294b..ea227daf13 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.10" resolution-markers = [ "python_full_version >= '3.14' and sys_platform == 'darwin'", @@ -1692,7 +1692,7 @@ dependencies = [ { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "open3d", marker = "platform_machine != 'aarch64' or sys_platform != 'linux'" }, { name = "open3d-unofficial-arm", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, - { name = "opencv-python" }, + { name = "opencv-contrib-python" }, { name = "pin" }, { name = "plotext" }, { name = "plum-dispatch" }, @@ -1864,7 +1864,7 @@ docker = [ { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "open3d", marker = "platform_machine != 'aarch64' or sys_platform != 'linux'" }, { name = "open3d-unofficial-arm", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, - { name = "opencv-python-headless" }, + { name = "opencv-contrib-python" }, { name = "plum-dispatch" }, { name = "pydantic" }, { name = "pydantic-settings" }, @@ -2052,9 +2052,9 @@ requires-dist = [ { name = "open3d-unofficial-arm", marker = "platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'docker'" }, { name = "openai", marker = "extra == 'agents'" }, { name = "openai-whisper", marker = "extra == 'agents'" }, + { name = "opencv-contrib-python", specifier = "==4.10.0.84" }, + { name = "opencv-contrib-python", marker = "extra == 'docker'", specifier = "==4.10.0.84" }, { name = "opencv-contrib-python", marker = "extra == 'misc'", specifier = "==4.10.0.84" }, - { name = "opencv-python" }, - { name = "opencv-python-headless", marker = "extra == 'docker'" }, { name = "pandas-stubs", marker = "extra == 'dev'", specifier = ">=2.3.2.250926,<3" }, { name = "pillow", marker = "extra == 'perception'" }, { name = "pin", specifier = ">=3.3.0" }, @@ -5620,6 +5620,7 @@ resolution-markers = [ ] wheels = [ { url = "https://files.pythonhosted.org/packages/82/6c/90d3f532f608a03a13c1d6c16c266ffa3828e8011b1549d3b61db2ad59f5/nvidia_cublas_cu12-12.9.1.4-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:7a950dae01add3b415a5a5cdc4ec818fb5858263e9cca59004bb99fdbbd3a5d6", size = 575006342, upload-time = "2025-06-05T20:04:16.902Z" }, + { url = "https://files.pythonhosted.org/packages/77/3c/aa88abe01f3be3d1f8f787d1d33dc83e76fec05945f9a28fbb41cfb99cd5/nvidia_cublas_cu12-12.9.1.4-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:453611eb21a7c1f2c2156ed9f3a45b691deda0440ec550860290dc901af5b4c2", size = 581242350, upload-time = "2025-06-05T20:04:51.979Z" }, { url = "https://files.pythonhosted.org/packages/45/a1/a17fade6567c57452cfc8f967a40d1035bb9301db52f27808167fbb2be2f/nvidia_cublas_cu12-12.9.1.4-py3-none-win_amd64.whl", hash = "sha256:1e5fee10662e6e52bd71dec533fbbd4971bb70a5f24f3bc3793e5c2e9dc640bf", size = 553153899, upload-time = "2025-06-05T20:13:35.556Z" }, ] @@ -5678,6 +5679,7 @@ resolution-markers = [ ] wheels = [ { url = "https://files.pythonhosted.org/packages/bc/e0/0279bd94539fda525e0c8538db29b72a5a8495b0c12173113471d28bce78/nvidia_cuda_runtime_cu12-12.9.79-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:83469a846206f2a733db0c42e223589ab62fd2fabac4432d2f8802de4bded0a4", size = 3515012, upload-time = "2025-06-05T20:00:35.519Z" }, + { url = "https://files.pythonhosted.org/packages/bc/46/a92db19b8309581092a3add7e6fceb4c301a3fd233969856a8cbf042cd3c/nvidia_cuda_runtime_cu12-12.9.79-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:25bba2dfb01d48a9b59ca474a1ac43c6ebf7011f1b0b8cc44f54eb6ac48a96c3", size = 3493179, upload-time = "2025-06-05T20:00:53.735Z" }, { url = "https://files.pythonhosted.org/packages/59/df/e7c3a360be4f7b93cee39271b792669baeb3846c58a4df6dfcf187a7ffab/nvidia_cuda_runtime_cu12-12.9.79-py3-none-win_amd64.whl", hash = "sha256:8e018af8fa02363876860388bd10ccb89eb9ab8fb0aa749aaf58430a9f7c4891", size = 3591604, upload-time = "2025-06-05T20:11:17.036Z" }, ] @@ -6114,25 +6116,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e9/a5/1be1516390333ff9be3a9cb648c9f33df79d5096e5884b5df71a588af463/opencv_python-4.13.0.92-cp37-abi3-win_amd64.whl", hash = "sha256:423d934c9fafb91aad38edf26efb46da91ffbc05f3f59c4b0c72e699720706f5", size = 40212062, upload-time = "2026-02-05T07:02:12.724Z" }, ] -[[package]] -name = "opencv-python-headless" -version = "4.13.0.92" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/79/42/2310883be3b8826ac58c3f2787b9358a2d46923d61f88fedf930bc59c60c/opencv_python_headless-4.13.0.92-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:1a7d040ac656c11b8c38677cc8cccdc149f98535089dbe5b081e80a4e5903209", size = 46247192, upload-time = "2026-02-05T07:01:35.187Z" }, - { url = "https://files.pythonhosted.org/packages/2d/1e/6f9e38005a6f7f22af785df42a43139d0e20f169eb5787ce8be37ee7fcc9/opencv_python_headless-4.13.0.92-cp37-abi3-macosx_14_0_x86_64.whl", hash = "sha256:3e0a6f0a37994ec6ce5f59e936be21d5d6384a4556f2d2da9c2f9c5dc948394c", size = 32568914, upload-time = "2026-02-05T07:01:51.989Z" }, - { url = "https://files.pythonhosted.org/packages/21/76/9417a6aef9def70e467a5bf560579f816148a4c658b7d525581b356eda9e/opencv_python_headless-4.13.0.92-cp37-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5c8cfc8e87ed452b5cecb9419473ee5560a989859fe1d10d1ce11ae87b09a2cb", size = 33703709, upload-time = "2026-02-05T10:24:46.469Z" }, - { url = "https://files.pythonhosted.org/packages/92/ce/bd17ff5772938267fd49716e94ca24f616ff4cb1ff4c6be13085108037be/opencv_python_headless-4.13.0.92-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0525a3d2c0b46c611e2130b5fdebc94cf404845d8fa64d2f3a3b679572a5bd22", size = 56016764, upload-time = "2026-02-05T10:26:48.904Z" }, - { url = "https://files.pythonhosted.org/packages/8f/b4/b7bcbf7c874665825a8c8e1097e93ea25d1f1d210a3e20d4451d01da30aa/opencv_python_headless-4.13.0.92-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:eb60e36b237b1ebd40a912da5384b348df8ed534f6f644d8e0b4f103e272ba7d", size = 35010236, upload-time = "2026-02-05T10:28:11.031Z" }, - { url = "https://files.pythonhosted.org/packages/4b/33/b5db29a6c00eb8f50708110d8d453747ca125c8b805bc437b289dbdcc057/opencv_python_headless-4.13.0.92-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:0bd48544f77c68b2941392fcdf9bcd2b9cdf00e98cb8c29b2455d194763cf99e", size = 60391106, upload-time = "2026-02-05T10:30:14.236Z" }, - { url = "https://files.pythonhosted.org/packages/fb/c3/52cfea47cd33e53e8c0fbd6e7c800b457245c1fda7d61660b4ffe9596a7f/opencv_python_headless-4.13.0.92-cp37-abi3-win32.whl", hash = "sha256:a7cf08e5b191f4ebb530791acc0825a7986e0d0dee2a3c491184bd8599848a4b", size = 30812232, upload-time = "2026-02-05T07:02:29.594Z" }, - { url = "https://files.pythonhosted.org/packages/4a/90/b338326131ccb2aaa3c2c85d00f41822c0050139a4bfe723cfd95455bd2d/opencv_python_headless-4.13.0.92-cp37-abi3-win_amd64.whl", hash = "sha256:77a82fe35ddcec0f62c15f2ba8a12ecc2ed4207c17b0902c7a3151ae29f37fb6", size = 40070414, upload-time = "2026-02-05T07:02:26.448Z" }, -] - [[package]] name = "opentelemetry-api" version = "1.39.1" @@ -9910,12 +9893,19 @@ name = "triton" version = "3.6.0" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/44/ba/b1b04f4b291a3205d95ebd24465de0e5bf010a2df27a4e58a9b5f039d8f2/triton-3.6.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6c723cfb12f6842a0ae94ac307dba7e7a44741d720a40cf0e270ed4a4e3be781", size = 175972180, upload-time = "2026-01-20T16:15:53.664Z" }, { url = "https://files.pythonhosted.org/packages/8c/f7/f1c9d3424ab199ac53c2da567b859bcddbb9c9e7154805119f8bd95ec36f/triton-3.6.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a6550fae429e0667e397e5de64b332d1e5695b73650ee75a6146e2e902770bea", size = 188105201, upload-time = "2026-01-20T16:00:29.272Z" }, + { url = "https://files.pythonhosted.org/packages/0f/2c/96f92f3c60387e14cc45aed49487f3486f89ea27106c1b1376913c62abe4/triton-3.6.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:49df5ef37379c0c2b5c0012286f80174fcf0e073e5ade1ca9a86c36814553651", size = 176081190, upload-time = "2026-01-20T16:16:00.523Z" }, { url = "https://files.pythonhosted.org/packages/e0/12/b05ba554d2c623bffa59922b94b0775673de251f468a9609bc9e45de95e9/triton-3.6.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e8e323d608e3a9bfcc2d9efcc90ceefb764a82b99dea12a86d643c72539ad5d3", size = 188214640, upload-time = "2026-01-20T16:00:35.869Z" }, + { url = "https://files.pythonhosted.org/packages/17/5d/08201db32823bdf77a0e2b9039540080b2e5c23a20706ddba942924ebcd6/triton-3.6.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:374f52c11a711fd062b4bfbb201fd9ac0a5febd28a96fb41b4a0f51dde3157f4", size = 176128243, upload-time = "2026-01-20T16:16:07.857Z" }, { url = "https://files.pythonhosted.org/packages/ab/a8/cdf8b3e4c98132f965f88c2313a4b493266832ad47fb52f23d14d4f86bb5/triton-3.6.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74caf5e34b66d9f3a429af689c1c7128daba1d8208df60e81106b115c00d6fca", size = 188266850, upload-time = "2026-01-20T16:00:43.041Z" }, + { url = "https://files.pythonhosted.org/packages/3c/12/34d71b350e89a204c2c7777a9bba0dcf2f19a5bfdd70b57c4dbc5ffd7154/triton-3.6.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:448e02fe6dc898e9e5aa89cf0ee5c371e99df5aa5e8ad976a80b93334f3494fd", size = 176133521, upload-time = "2026-01-20T16:16:13.321Z" }, { url = "https://files.pythonhosted.org/packages/f9/0b/37d991d8c130ce81a8728ae3c25b6e60935838e9be1b58791f5997b24a54/triton-3.6.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:10c7f76c6e72d2ef08df639e3d0d30729112f47a56b0c81672edc05ee5116ac9", size = 188289450, upload-time = "2026-01-20T16:00:49.136Z" }, + { url = "https://files.pythonhosted.org/packages/ce/4e/41b0c8033b503fd3cfcd12392cdd256945026a91ff02452bef40ec34bee7/triton-3.6.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1722e172d34e32abc3eb7711d0025bb69d7959ebea84e3b7f7a341cd7ed694d6", size = 176276087, upload-time = "2026-01-20T16:16:18.989Z" }, { url = "https://files.pythonhosted.org/packages/35/f8/9c66bfc55361ec6d0e4040a0337fb5924ceb23de4648b8a81ae9d33b2b38/triton-3.6.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d002e07d7180fd65e622134fbd980c9a3d4211fb85224b56a0a0efbd422ab72f", size = 188400296, upload-time = "2026-01-20T16:00:56.042Z" }, + { url = "https://files.pythonhosted.org/packages/49/55/5ecf0dcaa0f2fbbd4420f7ef227ee3cb172e91e5fede9d0ecaddc43363b4/triton-3.6.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ef5523241e7d1abca00f1d240949eebdd7c673b005edbbce0aca95b8191f1d43", size = 176138577, upload-time = "2026-01-20T16:16:25.426Z" }, { url = "https://files.pythonhosted.org/packages/df/3d/9e7eee57b37c80cec63322c0231bb6da3cfe535a91d7a4d64896fcb89357/triton-3.6.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a17a5d5985f0ac494ed8a8e54568f092f7057ef60e1b0fa09d3fd1512064e803", size = 188273063, upload-time = "2026-01-20T16:01:07.278Z" }, + { url = "https://files.pythonhosted.org/packages/48/db/56ee649cab5eaff4757541325aca81f52d02d4a7cd3506776cad2451e060/triton-3.6.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b3a97e8ed304dfa9bd23bb41ca04cdf6b2e617d5e782a8653d616037a5d537d", size = 176274804, upload-time = "2026-01-20T16:16:31.528Z" }, { url = "https://files.pythonhosted.org/packages/f6/56/6113c23ff46c00aae423333eb58b3e60bdfe9179d542781955a5e1514cb3/triton-3.6.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:46bd1c1af4b6704e554cad2eeb3b0a6513a980d470ccfa63189737340c7746a7", size = 188397994, upload-time = "2026-01-20T16:01:14.236Z" }, ]