Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions dimos/agents/skills/osm.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
# limitations under the License.


from typing import Any

from dimos.agents.annotation import skill
from dimos.core.module import Module
from dimos.core.stream import In
Expand All @@ -31,8 +33,8 @@ class OsmSkill(Module):

gps_location: In[LatLon]

def __init__(self) -> None:
super().__init__()
def __init__(self, **kwargs: Any) -> None:
super().__init__(**kwargs)
self._latest_location = None
self._current_location_map = CurrentLocationMap(QwenVlModel())

Expand Down
4 changes: 4 additions & 0 deletions dimos/robot/all_blueprints.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,11 @@
"demo-osm": "dimos.mapping.osm.demo_osm:demo_osm",
"demo-skill": "dimos.agents.skills.demo_skill:demo_skill",
"drone-agentic": "dimos.robot.drone.blueprints.agentic.drone_agentic:drone_agentic",
"drone-agentic-gazebo": "dimos.robot.drone.blueprints.agentic.drone_agentic:drone_agentic_gazebo",
"drone-agentic-gazebo-spatial": "dimos.robot.drone.blueprints.agentic.drone_agentic:drone_agentic_gazebo_spatial",
"drone-basic": "dimos.robot.drone.blueprints.basic.drone_basic:drone_basic",
"drone-basic-gazebo": "dimos.robot.drone.blueprints.basic.drone_basic:drone_basic_gazebo",
"drone-basic-gazebo-spatial": "dimos.robot.drone.blueprints.basic.drone_basic:drone_basic_gazebo_spatial",
"dual-xarm6-planner": "dimos.manipulation.blueprints:dual_xarm6_planner",
"keyboard-teleop-piper": "dimos.robot.manipulators.piper.blueprints:keyboard_teleop_piper",
"keyboard-teleop-xarm6": "dimos.robot.manipulators.xarm.blueprints:keyboard_teleop_xarm6",
Expand Down
27 changes: 27 additions & 0 deletions dimos/robot/drone/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ dimos run drone-basic --set outdoor=true

# Agentic with LLM control
dimos run drone-agentic

# Gazebo + ArduPilot SITL
dimos run drone-basic-gazebo
dimos run drone-agentic-gazebo
dimos run drone-basic-gazebo-spatial
dimos run drone-agentic-gazebo-spatial # + spatial memory + navigate
```

To interact with the agent, run `dimos humancli` in a separate terminal.
Expand Down Expand Up @@ -48,6 +54,27 @@ Composes on top of `drone-basic`, adding autonomous capabilities:
| `Agent` | LLM agent (default: GPT-4o) |
| `WebInput` | Web/CLI interface for human commands |

### `drone-basic-gazebo` / `drone-agentic-gazebo` / `drone-basic-gazebo-spatial` / `drone-agentic-gazebo-spatial`
Same as the non-Gazebo blueprints but with video from Gazebo (RTP/H264 on UDP 5600). Odometry from MAVLink; when SITL sends `LOCAL_POSITION_NED` it is used for position. Spatial variants add `SpatialMemory`. **`drone-agentic-gazebo-spatial`** also adds `DroneSpatialNavSkill` with **`navigate_to_where_i_saw(description)`**: text-search remembered views and send a local NED position target to the drone.

## Running with Gazebo + ArduPilot

1. **Install ArduPilot SITL** and a GCS (e.g. MAVProxy). See the [ArduPilot development wiki](https://ardupilot.org/dev/).
2. **Install Gazebo and the ArduPilot Gazebo plugin** (Iris quad, camera streaming, etc.): [Using SITL with Gazebo](https://ardupilot.org/dev/docs/sitl-with-gazebo.html). Set `GZ_SIM_SYSTEM_PLUGIN_PATH` and `GZ_SIM_RESOURCE_PATH` as described there.
3. **Start Gazebo** with the Iris world (camera streams to UDP 5600):
```bash
gz sim -v4 -r iris_runway.sdf
```
Enable camera streaming if needed:
```bash
gz topic -t /world/iris_runway/model/iris_with_gimbal/model/gimbal/link/pitch_link/sensor/camera/image/enable_streaming -m gz.msgs.Boolean -p "data: 1"
```
4. **Start SITL** in another terminal:
```bash
sim_vehicle.py -v ArduCopter -f gazebo-iris --model JSON --map --console
```
5. **Run DimOS** (e.g. `dimos run drone-agentic-gazebo`). MAVLink: default UDP 14550. Video: port 5600.

## Installation

### Python (included with DimOS)
Expand Down
44 changes: 43 additions & 1 deletion dimos/robot/drone/blueprints/agentic/drone_agentic.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,25 @@
from dimos.agents.skills.osm import OsmSkill
from dimos.agents.web_human_input import WebInput
from dimos.core.blueprints import autoconnect
from dimos.robot.drone.blueprints.basic.drone_basic import drone_basic
from dimos.robot.drone.blueprints.basic.drone_basic import (
drone_basic,
drone_basic_gazebo,
drone_basic_gazebo_spatial,
)
from dimos.robot.drone.drone_spatial_nav_skill import DroneSpatialNavSkill
from dimos.robot.drone.drone_tracking_module import DroneTrackingModule

DRONE_SYSTEM_PROMPT = """\
You are controlling a DJI drone with MAVLink interface.
You have access to drone control skills you are already flying so only run move_twist, set_mode, and fly_to.
When the user gives commands, use the appropriate skills to control the drone.
Always confirm actions and report results. Send fly_to commands only at above 200 meters altitude to be safe.

Motion skills: move(x, y, z, duration) and move_forward(distance, speed) use velocity commands.
For direct position use go_to_position(x, y, z, vx_ff, vy_ff, vz_ff): position in local NED (m; z negative = up) with optional velocity feedforward.
Yaw control: use rotate_to(heading_deg) to turn to a heading (0–360°, 0=North, 90=East). Remember you have this for "turn left/right", "face North", etc.
Spatial memory (when available): navigate_to_where_i_saw(description) searches remembered camera views by text and flies toward the best matching stored position.

Here are some GPS locations to remember
6th and Natoma intersection: 37.78019978319006, -122.40770815020853,
454 Natoma (Office): 37.780967465525244, -122.40688342010769
Expand All @@ -53,7 +64,38 @@
]
)

drone_agentic_gazebo = autoconnect(
drone_basic_gazebo,
DroneTrackingModule.blueprint(outdoor=False),
GoogleMapsSkillContainer.blueprint(),
OsmSkill.blueprint(),
Agent.blueprint(system_prompt=DRONE_SYSTEM_PROMPT, model="gpt-4o-mini"),
WebInput.blueprint(),
).remappings(
[
(DroneTrackingModule, "video_input", "video"),
(DroneTrackingModule, "cmd_vel", "movecmd_twist"),
]
)

drone_agentic_gazebo_spatial = autoconnect(
drone_basic_gazebo_spatial,
DroneTrackingModule.blueprint(outdoor=False),
GoogleMapsSkillContainer.blueprint(),
OsmSkill.blueprint(),
DroneSpatialNavSkill.blueprint(),
agent(system_prompt=DRONE_SYSTEM_PROMPT, model="gpt-4o-mini"),
web_input(),
).remappings(
[
(DroneTrackingModule, "video_input", "video"),
(DroneTrackingModule, "cmd_vel", "movecmd_twist"),
]
)

__all__ = [
"DRONE_SYSTEM_PROMPT",
"drone_agentic",
"drone_agentic_gazebo",
"drone_agentic_gazebo_spatial",
]
22 changes: 21 additions & 1 deletion dimos/robot/drone/blueprints/basic/drone_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from dimos.core.blueprints import autoconnect
from dimos.core.global_config import global_config
from dimos.perception.spatial_perception import spatial_memory
from dimos.protocol.pubsub.impl.lcmpubsub import LCM
from dimos.robot.drone.camera_module import DroneCameraModule
from dimos.robot.drone.connection_module import DroneConnectionModule
Expand All @@ -44,7 +45,7 @@ def _drone_rerun_blueprint() -> Any:

return rrb.Blueprint(
rrb.Horizontal(
rrb.Spatial2DView(origin="world/video", name="Camera"),
rrb.Spatial2DView(origin="world/color_image", name="Camera"),
rrb.Spatial3DView(
origin="world",
name="3D",
Expand Down Expand Up @@ -95,6 +96,25 @@ def _drone_rerun_blueprint() -> Any:
WebsocketVisModule.blueprint(),
)

drone_basic_gazebo = autoconnect(
_vis,
DroneConnectionModule.blueprint(
connection_string=connection_string,
video_port=video_port,
video_source="gazebo",
outdoor=False,
),
DroneCameraModule.blueprint(camera_intrinsics=[1000.0, 1000.0, 960.0, 540.0]),
websocket_vis(),
)

drone_basic_gazebo_spatial = autoconnect(
drone_basic_gazebo,
spatial_memory(),
)

__all__ = [
"drone_basic",
"drone_basic_gazebo",
"drone_basic_gazebo_spatial",
]
110 changes: 83 additions & 27 deletions dimos/robot/drone/connection_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

"""DimOS module wrapper for drone connection."""

from collections.abc import Generator
import json
import threading
import time
Expand All @@ -36,6 +35,7 @@
from dimos.msgs.geometry_msgs.Vector3 import Vector3
from dimos.msgs.sensor_msgs.Image import Image
from dimos.robot.drone.dji_video_stream import DJIDroneVideoStream
from dimos.robot.drone.gazebo_video_stream import GazeboVideoStream
from dimos.robot.drone.mavlink_connection import MavlinkConnection
from dimos.utils.logging_config import setup_logger

Expand All @@ -52,6 +52,7 @@ def _add_disposable(composite: CompositeDisposable, item: Disposable | Any) -> N
class Config(ModuleConfig):
connection_string: str = "udp:0.0.0.0:14550"
video_port: int = 5600
video_source: str = "dji"
outdoor: bool = False


Expand Down Expand Up @@ -92,13 +93,14 @@ def __init__(self, **kwargs: Any) -> None:
"""
super().__init__(**kwargs)
self.connection: MavlinkConnection | None = None
self.video_stream: DJIDroneVideoStream | None = None
self.video_stream: DJIDroneVideoStream | GazeboVideoStream | None = None
self._latest_video_frame = None
self._latest_telemetry = None
self._latest_status = None
self._latest_status_lock = threading.RLock()
self._running = False
self._telemetry_thread: threading.Thread | None = None
self._last_twist_log_time: float = 0.0

@rpc
def start(self) -> None:
Expand All @@ -116,7 +118,10 @@ def start(self) -> None:
)
self.connection.connect()

self.video_stream = DJIDroneVideoStream(port=self.config.video_port)
if self.config.video_source == "gazebo":
self.video_stream = GazeboVideoStream(port=self.config.video_port)
else:
self.video_stream = DJIDroneVideoStream(port=self.config.video_port)

if not self.connection.connected:
logger.error("Failed to connect to drone")
Expand Down Expand Up @@ -282,6 +287,56 @@ def move(self, x: float = 0.0, y: float = 0.0, z: float = 0.0, duration: float =
if self.connection:
self.connection.move(Vector3(x, y, z), duration)

@skill
def move_forward(self, distance: float, speed: float = 0.3) -> None:
"""Move forward by a given distance in meters.

Args:
distance: Distance to move in meters (positive = forward).
speed: Forward speed in m/s. Default 0.3.
"""
if self.connection and abs(distance) >= 0.01:
duration = abs(distance) / max(0.01, speed)
vx = speed if distance >= 0 else -speed
self.connection.move(Vector3(vx, 0.0, 0.0), duration)

@skill
def go_to_position(
self, x: float, y: float, z: float,
vx_ff: float = 0.0, vy_ff: float = 0.0, vz_ff: float = 0.0,
) -> bool:
"""Send position target in local NED with optional velocity feedforward.

Args:
x: North position in meters (local NED).
y: East position in meters (local NED).
z: Down position in meters (negative = up).
vx_ff: Velocity feedforward North (m/s). Optional.
vy_ff: Velocity feedforward East (m/s). Optional.
vz_ff: Velocity feedforward Down (m/s). Optional.

Returns:
True if position target sent successfully
"""
if self.connection:
return self.connection.set_position_target(x, y, z, vx_ff, vy_ff, vz_ff)
return False

@skill
def rotate_to(self, heading_deg: float, timeout: float = 60.0) -> bool:
"""Rotate drone to a specific heading (yaw control).

Args:
heading_deg: Target heading in degrees (0–360, 0=North, 90=East).
timeout: Max time to rotate in seconds.

Returns:
True if rotation completed successfully
"""
if self.connection:
return self.connection.rotate_to(heading_deg, timeout)
return False

@skill
def takeoff(self, altitude: float = 3.0) -> bool:
"""Takeoff to specified altitude.
Expand Down Expand Up @@ -387,49 +442,44 @@ def fly_to(self, lat: float, lon: float, alt: float) -> str:

@skill
def follow_object(
self, object_description: str, duration: float = 120.0
) -> Generator[str, None, None]:
self, object_description: str, duration: float = 0.0
) -> str:
"""Follow an object with visual servoing.

Example:

follow_object(object_description="red car", duration=120)
follow_object(object_description="red car", duration=0) # 0 = track until stop_follow

Args:
object_description (str): A short and clear description of the object.
duration (float, optional): How long to track for. Defaults to 120.0.
duration (float, optional): Max tracking time in seconds; 0 = until stop_follow. Defaults to 0.
"""
msg = {"object_description": object_description, "duration": duration}
self.follow_object_cmd.publish(String(json.dumps(msg)))

yield "Started trying to track. First, trying to find the object."

# Poll for initial tracking result (found / not found / timeout)
start_time = time.time()
timeout = 30.0 if duration <= 0 else min(duration, 30.0)

started_tracking = False

while time.time() - start_time < duration:
time.sleep(0.01)
while time.time() - start_time < timeout:
time.sleep(0.1)
with self._latest_status_lock:
if not self._latest_status:
continue
match self._latest_status.get("status"):
case "not_found" | "failed":
yield f"The '{object_description}' object has not been found. Stopped tracking."
break
return f"The '{object_description}' object has not been found. Stopped tracking."
case "tracking":
# Only return tracking once.
if not started_tracking:
started_tracking = True
yield f"The '{object_description}' object is now being followed."
return (
f"Now following '{object_description}'."
+ (" Run stop_follow to stop." if duration <= 0 else f" For up to {duration}s.")
)
case "lost":
yield f"The '{object_description}' object has been lost. Stopped tracking."
break
return f"The '{object_description}' object was lost. Stopped tracking."
case "stopped":
yield f"Tracking '{object_description}' has stopped."
break
else:
yield f"Stopped tracking '{object_description}'"
return f"Tracking '{object_description}' has stopped."

return f"Started tracking command for '{object_description}'. No status update yet."

def _on_move_twist(self, msg: Twist) -> None:
"""Handle Twist movement commands from tracking/navigation.
Expand All @@ -438,8 +488,14 @@ def _on_move_twist(self, msg: Twist) -> None:
msg: Twist message with linear and angular velocities
"""
if self.connection:
# Use move_twist to properly handle Twist messages
self.connection.move_twist(msg, duration=0, lock_altitude=True)
now = time.time()
if now - self._last_twist_log_time >= 0.5:
self._last_twist_log_time = now
logger.info(
"follow→ardupilot: linear x=%.3f y=%.3f z=%.3f yaw_rate=%.3f (m/s, rad/s)",
msg.linear.x, msg.linear.y, msg.linear.z, msg.angular.z,
)
self.connection.move_twist(msg, duration=0, lock_altitude=False)

def _on_gps_goal(self, cmd: LatLon) -> None:
if self._latest_telemetry is None or self.connection is None:
Expand Down
Loading