From 20f49c1ac25b48b1db8c8215a78e2c04207a8383 Mon Sep 17 00:00:00 2001 From: Swastika Yadav Date: Wed, 6 May 2026 18:39:33 +0530 Subject: [PATCH 1/5] added docs.json for mintlify config --- docs/docs.json | 178 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) create mode 100644 docs/docs.json diff --git a/docs/docs.json b/docs/docs.json new file mode 100644 index 0000000000..6de9e407b5 --- /dev/null +++ b/docs/docs.json @@ -0,0 +1,178 @@ +{ + "theme": "mint", + "name": "Dimensional", + "description": "Official documentation for Dimensional (DimOS) — the modern operating system for generalist robotics: Python-first, agent-native, and hardware-agnostic.", + "colors": { + "primary": "#1682a3", + "light": "#1682a3", + "dark": "#1682a3" + }, + "favicon": "/favicon.svg", + "seo": { + "metatags": { + "og:site_name": "Dimensional", + "apple-mobile-web-app-title": "Dimensional" + } + }, + "navigation": { + "groups": [ + { + "group": "Getting Started", + "pages": [ + "requirements", + { + "group": "Installation", + "pages": [ + "installation/ubuntu", + "installation/osx", + "installation/nix" + ] + } + ] + }, + { + "group": "Usage", + "pages": [ + "usage/concepts", + "usage/blueprints", + "usage/modules", + "usage/native_modules", + "usage/configuration", + "usage/cli", + "usage/python-api", + "usage/lcm", + "usage/transforms", + "usage/tool_streams", + "usage/visualization", + { + "group": "Data Streams", + "pages": [ + "usage/data_streams/overview", + "usage/data_streams/reactivex", + "usage/data_streams/advanced_streams", + "usage/data_streams/quality_filter", + "usage/data_streams/temporal_alignment", + "usage/data_streams/storage_replay" + ] + }, + { + "group": "Sensor Streams", + "pages": [ + "usage/sensor_streams/overview", + "usage/sensor_streams/reactivex", + "usage/sensor_streams/advanced_streams", + "usage/sensor_streams/quality_filter", + "usage/sensor_streams/temporal_alignment", + "usage/sensor_streams/storage_replay" + ] + }, + { + "group": "Transports", + "pages": [ + "usage/transports/overview", + "usage/transports/dds" + ] + } + ] + }, + { + "group": "Capabilities", + "pages": [ + "capabilities/agents", + { + "group": "Navigation", + "pages": [ + "capabilities/navigation/overview", + "capabilities/navigation/native-go2" + ] + }, + { + "group": "Manipulation", + "pages": [ + "capabilities/manipulation/overview", + "capabilities/manipulation/adding_a_custom_arm", + "capabilities/manipulation/openarm_integration" + ] + }, + { + "group": "Memory", + "pages": [ + "capabilities/memory/overview", + "capabilities/memory/plot", + "capabilities/memory/algo_comparison" + ] + }, + "capabilities/perception" + ] + }, + { + "group": "Platforms", + "pages": [ +"platforms/quadruped-go2", + "platforms/humanoid-g1" + ] + }, + { + "group": "Development", + "pages": [ + "development/docker", + "development/testing", + "development/grid_testing", + "development/large_file_management", + "development/profiling_dimos", + "development/writing_docs" + ] + }, + { + "group": "For Agents", + "pages": [ + "agents/overview", + "agents/style", + "agents/testing", + { + "group": "Writing Docs", + "pages": [ + "agents/docs/overview", + "agents/docs/codeblocks", + "agents/docs/doclinks" + ] + } + ] + } + ] + }, + "logo": { + "light": "/logo/light.svg", + "dark": "/logo/dark.svg" + }, + "navbar": { + "links": [ + { + "label": "Dimensional", + "href": "/" + } + ], + "primary": { + "type": "button", + "label": "Get started", + "href": "https://github.com/dimensionalOS/dimos" + } + }, + "contextual": { + "options": [ + "copy", + "view", + "chatgpt", + "claude", + "perplexity", + "mcp", + "cursor", + "vscode" + ] + }, + "footer": { + "socials": { + "github": "https://github.com/dimensionalOS/dimos" + } + } +} From 0a8f905092917c46e5392fc7f99630b17cd523fc Mon Sep 17 00:00:00 2001 From: Swastika Yadav Date: Wed, 6 May 2026 19:14:50 +0530 Subject: [PATCH 2/5] update mdx syntax, rename files, and flatten nested folder for mintlify parsing --- .../docs/{codeblocks.md => codeblocks.mdx} | 36 ++-- .../agents/docs/{doclinks.md => doclinks.mdx} | 4 +- docs/agents/docs/{index.md => overview.mdx} | 23 +-- docs/agents/{index.md => overview.mdx} | 2 - docs/agents/{style.md => style.mdx} | 2 - docs/agents/{testing.md => testing.mdx} | 2 - .../{agents/readme.md => agents.mdx} | 2 - ..._custom_arm.md => adding_a_custom_arm.mdx} | 2 - ...integration.md => openarm_integration.mdx} | 2 - .../manipulation/{readme.md => overview.mdx} | 2 - ...algo_comparison.md => algo_comparison.mdx} | 4 +- .../memory/{index.md => overview.mdx} | 24 ++- .../capabilities/memory/{plot.md => plot.mdx} | 32 +-- .../navigation/assets/1-lidar.png | 3 + .../navigation/assets/2-globalmap.png | 3 + .../navigation/assets/3-globalcostmap.png | 3 + .../navigation/assets/4-navcostmap.png | 3 + docs/capabilities/navigation/assets/5-all.png | 3 + .../navigation/assets/coverage.png | 3 + .../navigation/assets/frontier.png | 3 + .../navigation/assets/go2_blueprint.svg | 188 ++++++++++++++++++ .../navigation/assets/go2nav_dataflow.svg | 22 ++ .../navigation/assets/noros_nav.gif | 3 + .../navigation/assets/patrol_path.png | 3 + .../capabilities/navigation/assets/random.png | 3 + .../{native/index.md => native-go2.mdx} | 56 +++--- docs/capabilities/navigation/overview.mdx | 7 + docs/capabilities/navigation/readme.md | 11 - .../{perception/readme.md => perception.mdx} | 2 - docs/development/{docker.md => docker.mdx} | 10 +- .../{grid_testing.md => grid_testing.mdx} | 2 - ...anagement.md => large_file_management.mdx} | 26 +-- ...profiling_dimos.md => profiling_dimos.mdx} | 2 - docs/development/{testing.md => testing.mdx} | 2 - .../{writing_docs.md => writing_docs.mdx} | 2 - docs/installation/{nix.md => nix.mdx} | 2 - docs/installation/{osx.md => osx.mdx} | 12 +- docs/installation/{ubuntu.md => ubuntu.mdx} | 12 +- docs/platforms/humanoid-g1.mdx | 165 +++++++++++++++ docs/platforms/humanoid/g1/index.md | 132 ------------ .../go2/index.md => quadruped-go2.mdx} | 4 +- docs/{requirements.md => requirements.mdx} | 2 - docs/usage/{blueprints.md => blueprints.mdx} | 2 - docs/usage/{cli.md => cli.mdx} | 2 - docs/usage/{README.md => concepts.mdx} | 2 - .../{configuration.md => configuration.mdx} | 6 +- ...vanced_streams.md => advanced_streams.mdx} | 29 +-- .../data_streams/{README.md => overview.mdx} | 2 - .../{quality_filter.md => quality_filter.mdx} | 30 ++- .../{reactivex.md => reactivex.mdx} | 40 ++-- .../{storage_replay.md => storage_replay.mdx} | 2 - ...al_alignment.md => temporal_alignment.mdx} | 35 ++-- docs/usage/{lcm.md => lcm.mdx} | 10 +- docs/usage/{modules.md => modules.mdx} | 138 +++++-------- .../{native_modules.md => native_modules.mdx} | 4 +- docs/usage/{python-api.md => python-api.mdx} | 2 - ...vanced_streams.md => advanced_streams.mdx} | 29 +-- .../{README.md => overview.mdx} | 2 - .../{quality_filter.md => quality_filter.mdx} | 30 ++- .../{reactivex.md => reactivex.mdx} | 40 ++-- .../{storage_replay.md => storage_replay.mdx} | 2 - ...al_alignment.md => temporal_alignment.mdx} | 35 ++-- .../{tool_streams.md => tool_streams.mdx} | 2 - docs/usage/{transforms.md => transforms.mdx} | 24 +-- docs/usage/transports/{dds.md => dds.mdx} | 2 - .../transports/{index.md => overview.mdx} | 121 ++++------- .../{visualization.md => visualization.mdx} | 83 ++------ 67 files changed, 796 insertions(+), 704 deletions(-) rename docs/agents/docs/{codeblocks.md => codeblocks.mdx} (93%) rename docs/agents/docs/{doclinks.md => doclinks.mdx} (91%) rename docs/agents/docs/{index.md => overview.mdx} (92%) rename docs/agents/{index.md => overview.mdx} (93%) rename docs/agents/{style.md => style.mdx} (98%) rename docs/agents/{testing.md => testing.mdx} (99%) rename docs/capabilities/{agents/readme.md => agents.mdx} (99%) rename docs/capabilities/manipulation/{adding_a_custom_arm.md => adding_a_custom_arm.mdx} (99%) rename docs/capabilities/manipulation/{openarm_integration.md => openarm_integration.mdx} (99%) rename docs/capabilities/manipulation/{readme.md => overview.mdx} (99%) rename docs/capabilities/memory/{algo_comparison.md => algo_comparison.mdx} (97%) rename docs/capabilities/memory/{index.md => overview.mdx} (93%) rename docs/capabilities/memory/{plot.md => plot.mdx} (93%) create mode 100644 docs/capabilities/navigation/assets/1-lidar.png create mode 100644 docs/capabilities/navigation/assets/2-globalmap.png create mode 100644 docs/capabilities/navigation/assets/3-globalcostmap.png create mode 100644 docs/capabilities/navigation/assets/4-navcostmap.png create mode 100644 docs/capabilities/navigation/assets/5-all.png create mode 100644 docs/capabilities/navigation/assets/coverage.png create mode 100644 docs/capabilities/navigation/assets/frontier.png create mode 100644 docs/capabilities/navigation/assets/go2_blueprint.svg create mode 100644 docs/capabilities/navigation/assets/go2nav_dataflow.svg create mode 100644 docs/capabilities/navigation/assets/noros_nav.gif create mode 100644 docs/capabilities/navigation/assets/patrol_path.png create mode 100644 docs/capabilities/navigation/assets/random.png rename docs/capabilities/navigation/{native/index.md => native-go2.mdx} (83%) create mode 100644 docs/capabilities/navigation/overview.mdx delete mode 100644 docs/capabilities/navigation/readme.md rename docs/capabilities/{perception/readme.md => perception.mdx} (50%) rename docs/development/{docker.md => docker.mdx} (97%) rename docs/development/{grid_testing.md => grid_testing.mdx} (99%) rename docs/development/{large_file_management.md => large_file_management.mdx} (95%) rename docs/development/{profiling_dimos.md => profiling_dimos.mdx} (95%) rename docs/development/{testing.md => testing.mdx} (99%) rename docs/development/{writing_docs.md => writing_docs.mdx} (97%) rename docs/installation/{nix.md => nix.mdx} (97%) rename docs/installation/{osx.md => osx.mdx} (86%) rename docs/installation/{ubuntu.md => ubuntu.mdx} (82%) create mode 100644 docs/platforms/humanoid-g1.mdx delete mode 100644 docs/platforms/humanoid/g1/index.md rename docs/platforms/{quadruped/go2/index.md => quadruped-go2.mdx} (97%) rename docs/{requirements.md => requirements.mdx} (99%) rename docs/usage/{blueprints.md => blueprints.mdx} (99%) rename docs/usage/{cli.md => cli.mdx} (99%) rename docs/usage/{README.md => concepts.mdx} (98%) rename docs/usage/{configuration.md => configuration.mdx} (98%) rename docs/usage/data_streams/{advanced_streams.md => advanced_streams.mdx} (96%) rename docs/usage/data_streams/{README.md => overview.mdx} (99%) rename docs/usage/data_streams/{quality_filter.md => quality_filter.mdx} (95%) rename docs/usage/data_streams/{reactivex.md => reactivex.mdx} (97%) rename docs/usage/data_streams/{storage_replay.md => storage_replay.mdx} (99%) rename docs/usage/data_streams/{temporal_alignment.md => temporal_alignment.mdx} (95%) rename docs/usage/{lcm.md => lcm.mdx} (98%) rename docs/usage/{modules.md => modules.mdx} (83%) rename docs/usage/{native_modules.md => native_modules.mdx} (99%) rename docs/usage/{python-api.md => python-api.mdx} (99%) rename docs/usage/sensor_streams/{advanced_streams.md => advanced_streams.mdx} (96%) rename docs/usage/sensor_streams/{README.md => overview.mdx} (99%) rename docs/usage/sensor_streams/{quality_filter.md => quality_filter.mdx} (95%) rename docs/usage/sensor_streams/{reactivex.md => reactivex.mdx} (97%) rename docs/usage/sensor_streams/{storage_replay.md => storage_replay.mdx} (99%) rename docs/usage/sensor_streams/{temporal_alignment.md => temporal_alignment.mdx} (95%) rename docs/usage/{tool_streams.md => tool_streams.mdx} (99%) rename docs/usage/{transforms.md => transforms.mdx} (98%) rename docs/usage/transports/{dds.md => dds.mdx} (97%) rename docs/usage/transports/{index.md => overview.mdx} (75%) rename docs/usage/{visualization.md => visualization.mdx} (51%) diff --git a/docs/agents/docs/codeblocks.md b/docs/agents/docs/codeblocks.mdx similarity index 93% rename from docs/agents/docs/codeblocks.md rename to docs/agents/docs/codeblocks.mdx index 958c14a7b1..ec352b0066 100644 --- a/docs/agents/docs/codeblocks.md +++ b/docs/agents/docs/codeblocks.mdx @@ -1,5 +1,3 @@ -# Executable Code Blocks - We use [md-babel-py](https://github.com/leshy/md-babel-py/) to execute code blocks in markdown and insert results. ## Golden Rule @@ -38,7 +36,7 @@ Use `skip` when a block would pull in **CUDA / GPU-only** stacks (for example pe Execute code blocks in markdown files and insert the results. -![Demo](assets/screencast.gif) +Demo **Use cases:** - Keep documentation examples up-to-date automatically @@ -54,7 +52,7 @@ Execute code blocks in markdown files and insert the results. echo "cwd: $(pwd)" ``` - +{/* Result: */} ``` cwd: /work ``` @@ -66,7 +64,7 @@ a = "hello world" print(a) ``` - +{/* Result: */} ``` hello world ``` @@ -77,7 +75,7 @@ Sessions preserve state between code blocks: print(a, "again") ``` - +{/* Result: */} ``` hello world again ``` @@ -89,7 +87,7 @@ console.log("Hello from Node.js"); console.log(`Node version: ${process.version}`); ``` - +{/* Result: */} ``` Hello from Node.js Node version: v22.21.1 @@ -112,8 +110,8 @@ plt.grid(alpha=0.3) plt.savefig('{output}', transparent=True) ``` - -![output](assets/matplotlib-demo.svg) +{/* Result: */} +output ### Pikchr @@ -155,8 +153,8 @@ arrow from X to Out.w - -![output](assets/pikchr-demo.svg) +{/* Result: */} +output ### Asymptote @@ -183,8 +181,8 @@ xaxis("$x$",BottomTop,LeftTicks,p=white); yaxis("$dP/dx$",LeftRight,RightTicks(trailingzero),p=white); ``` - -![output](assets/histogram.svg) +{/* Result: */} +output ### Graphviz @@ -193,8 +191,8 @@ A -> B -> C A -> C ``` - -![output](assets/graph.svg) +{/* Result: */} +output ### OpenSCAD @@ -203,8 +201,8 @@ cube([10, 10, 10]); sphere(r=7); ``` - -![output](assets/cube-sphere.png) +{/* Result: */} +output ### Diagon @@ -214,7 +212,7 @@ ASCII art diagrams: 1 + 1/2 + sum(i,0,10) ``` - +{/* Result: */} ``` 10 ___ @@ -229,7 +227,7 @@ A -> B -> C A -> C ``` - +{/* Result: */} ``` ┌───┐ │A │ diff --git a/docs/agents/docs/doclinks.md b/docs/agents/docs/doclinks.mdx similarity index 91% rename from docs/agents/docs/doclinks.md rename to docs/agents/docs/doclinks.mdx index d5533c5983..c6590836b1 100644 --- a/docs/agents/docs/doclinks.md +++ b/docs/agents/docs/doclinks.mdx @@ -4,13 +4,13 @@ Full documentation if needed: [`utils/docs/doclinks.md`](/dimos/utils/docs/docli ## Syntax - +{/* doclinks-ignore-start */} | Pattern | Example | |-------------|-----------------------------------------------------| | Code file | `[`service/spec.py`]()` → resolves path | | With symbol | `Configurable` in `[`spec.py`]()` → adds `#L` | | Doc link | `[Configuration](.md)` → resolves to doc | - +{/* doclinks-ignore-end */} ## Usage diff --git a/docs/agents/docs/index.md b/docs/agents/docs/overview.mdx similarity index 92% rename from docs/agents/docs/index.md rename to docs/agents/docs/overview.mdx index 09dabad7ee..ada49ef3aa 100644 --- a/docs/agents/docs/index.md +++ b/docs/agents/docs/overview.mdx @@ -1,6 +1,3 @@ - -# Code Blocks - **All code blocks must be executable.** Never write illustrative/pseudo code blocks. If you're showing an API usage pattern, create a minimal working example that actually runs. This ensures documentation stays correct as the codebase evolves. @@ -75,8 +72,8 @@ C: box "Step 3" rad 5px fit wid 170% ht 170% - -![output](assets/pikchr_basic.svg) +{/* Result: */} +output ## Box sizing @@ -99,8 +96,8 @@ C: box "two lines" "of text" rad 5px fit wid 170% ht 170% - -![output](assets/pikchr_sizing.svg) +{/* Result: */} +output The pattern `fit wid 170% ht 170%` means: auto-size to text, then scale width by 170% and height by 170%. @@ -120,8 +117,8 @@ B: box "Step 2" rad 5px fit wid 170% ht 170% - -![output](assets/pikchr_explicit.svg) +{/* Result: */} +output ## Common settings @@ -156,8 +153,8 @@ D: box "Path B" rad 5px fit wid 170% ht 170% - -![output](assets/pikchr_branch.svg) +{/* Result: */} +output **Tip:** For tree/hierarchy diagrams, prefer left-to-right layout (root on left, children branching right). This reads more naturally and avoids awkward vertical stacking. @@ -176,8 +173,8 @@ text "label below" at (A.x, A.y - 0.4in) - -![output](assets/pikchr_labels.svg) +{/* Result: */} +output ## Reference diff --git a/docs/agents/index.md b/docs/agents/overview.mdx similarity index 93% rename from docs/agents/index.md rename to docs/agents/overview.mdx index 4170a0e898..657d39d5ac 100644 --- a/docs/agents/index.md +++ b/docs/agents/overview.mdx @@ -1,5 +1,3 @@ -# For Agents - ├── testing.md (docs about writing tests) ├── docs (these are docs about writing docs) │   ├── codeblocks.md diff --git a/docs/agents/style.md b/docs/agents/style.mdx similarity index 98% rename from docs/agents/style.md rename to docs/agents/style.mdx index 3e13faae9b..317e67c326 100644 --- a/docs/agents/style.md +++ b/docs/agents/style.mdx @@ -1,5 +1,3 @@ -# Code Style Guidelines - Rules for writing code in dimos. These address recurring issues found in code review. ## No comment banners diff --git a/docs/agents/testing.md b/docs/agents/testing.mdx similarity index 99% rename from docs/agents/testing.md rename to docs/agents/testing.mdx index 4c556cfeca..fcafc47c39 100644 --- a/docs/agents/testing.md +++ b/docs/agents/testing.mdx @@ -1,5 +1,3 @@ -# Testing Guidelines - Rules for writing tests in dimos. These address recurring issues found in code review. For grid testing (spec/impl tests across multiple backends), see [Grid Testing Strategy](/docs/development/grid_testing.md). diff --git a/docs/capabilities/agents/readme.md b/docs/capabilities/agents.mdx similarity index 99% rename from docs/capabilities/agents/readme.md rename to docs/capabilities/agents.mdx index 7cb26b7463..fb7f15ef1a 100644 --- a/docs/capabilities/agents/readme.md +++ b/docs/capabilities/agents.mdx @@ -1,5 +1,3 @@ -# Agents - LLM agents run as native DimOS modules. They subscribe to camera, LiDAR, odometry, and spatial memory streams and they control the robot through skills. ## Architecture diff --git a/docs/capabilities/manipulation/adding_a_custom_arm.md b/docs/capabilities/manipulation/adding_a_custom_arm.mdx similarity index 99% rename from docs/capabilities/manipulation/adding_a_custom_arm.md rename to docs/capabilities/manipulation/adding_a_custom_arm.mdx index 0fd27b4e46..6c25891605 100644 --- a/docs/capabilities/manipulation/adding_a_custom_arm.md +++ b/docs/capabilities/manipulation/adding_a_custom_arm.mdx @@ -1,5 +1,3 @@ -# How to Integrate a New Manipulator Arm - This guide walks through integrating a new robot arm with DimOS, from writing the hardware adapter to creating blueprints for planning and control. ## Architecture Overview diff --git a/docs/capabilities/manipulation/openarm_integration.md b/docs/capabilities/manipulation/openarm_integration.mdx similarity index 99% rename from docs/capabilities/manipulation/openarm_integration.md rename to docs/capabilities/manipulation/openarm_integration.mdx index 6869864f5a..05c3bbb059 100644 --- a/docs/capabilities/manipulation/openarm_integration.md +++ b/docs/capabilities/manipulation/openarm_integration.mdx @@ -1,5 +1,3 @@ -# OpenArm Integration - Guide for running the **OpenArm** — an open-source bimanual 7-DOF research arm built from Damiao DM-J quasi-direct-drive motors — under the dimos manipulation + control stack. **If you're standing in front of the hardware and just want to run it, skip to [Quick start](#quick-start).** diff --git a/docs/capabilities/manipulation/readme.md b/docs/capabilities/manipulation/overview.mdx similarity index 99% rename from docs/capabilities/manipulation/readme.md rename to docs/capabilities/manipulation/overview.mdx index 9a489c84b7..c1ea62637e 100644 --- a/docs/capabilities/manipulation/readme.md +++ b/docs/capabilities/manipulation/overview.mdx @@ -1,5 +1,3 @@ -# Manipulation - Motion planning and teleoperation for robotic manipulators. Uses Drake for physics simulation and Meshcat for 3D visualization. ## Quick Start diff --git a/docs/capabilities/memory/algo_comparison.md b/docs/capabilities/memory/algo_comparison.mdx similarity index 97% rename from docs/capabilities/memory/algo_comparison.md rename to docs/capabilities/memory/algo_comparison.mdx index ffb1cb3dbf..f63d0f09b3 100644 --- a/docs/capabilities/memory/algo_comparison.md +++ b/docs/capabilities/memory/algo_comparison.mdx @@ -84,9 +84,9 @@ delta_plot.to_svg("assets/plot_brightness_algo_delta.svg") ``` -![output](assets/plot_brightness_algo.svg) +output -![output](assets/plot_brightness_algo_delta.svg) +output We see that new algo is strictly better. diff --git a/docs/capabilities/memory/index.md b/docs/capabilities/memory/overview.mdx similarity index 93% rename from docs/capabilities/memory/index.md rename to docs/capabilities/memory/overview.mdx index 290cdfd37d..5f600e4d25 100644 --- a/docs/capabilities/memory/index.md +++ b/docs/capabilities/memory/overview.mdx @@ -1,4 +1,5 @@ -
Python +
+Python ```python fold session=mem output=none import pickle @@ -12,6 +13,7 @@ from dimos.utils.data import get_data from dimos.memory2.vis.space.elements import Point ``` +
we init our recording, investigate available streams @@ -23,7 +25,7 @@ for name, stream in store.streams.items(): print(stream.summary()) ``` - +{/* Result: */} ``` Stream("color_image"): 4164 items, 2025-12-26 11:09:08 — 2025-12-26 11:14:00 (292.5s) Stream("color_image_embedded"): 267 items, 2025-12-26 11:09:12 — 2025-12-26 11:14:00 (288.4s) @@ -48,7 +50,7 @@ drawing.to_svg("assets/color_image.svg") our drawing system applies turbo color scheme to timestamps by default -![output](assets/color_image.svg) +output we can create new streams by querying existing streams, and we can save, further transform or draw those @@ -67,7 +69,7 @@ drawing.add( drawing.to_svg("assets/speed.svg") ``` -![output](assets/speed.svg) +output we can do all kinds of things with this, for example map out room lighting @@ -87,7 +89,7 @@ drawing.add( drawing.to_svg("assets/brightness.svg") ``` -![output](assets/brightness.svg) +output So knowing above, we can create embeddings for the full stream, @@ -134,7 +136,7 @@ drawing.add(store.streams.color_image_embedded.search(search_vector)) drawing.to_svg("assets/embedding.svg") ``` -![output](assets/embedding.svg) +output We don't really have to deal with the whole global map actually, let's get top 10 embeddings, and render only lidar around those. @@ -160,15 +162,16 @@ drawing.add(matches) drawing.to_svg("assets/embedding_focused.svg") ``` - +{/* Result: */} ``` Stream("color_image_embedded") | vector_search(k=30) 08:19:54.129 [inf][dimos/mapping/voxels.py ] VoxelGrid using device: CUDA:0 ``` -![output](assets/embedding_focused.svg) +output -
Python +
+Python ```python fold session=mem import matplotlib @@ -199,6 +202,7 @@ def plot_mosaic(frames, path, cols=5): ``` +
let's view those images @@ -207,4 +211,4 @@ let's view those images plot_mosaic(matches.map(lambda obs: obs.data).to_list(), "assets/grid.png") ``` -![output](assets/grid.png) +output diff --git a/docs/capabilities/memory/plot.md b/docs/capabilities/memory/plot.mdx similarity index 93% rename from docs/capabilities/memory/plot.md rename to docs/capabilities/memory/plot.mdx index 17420a4e5d..6f210e9093 100644 --- a/docs/capabilities/memory/plot.md +++ b/docs/capabilities/memory/plot.mdx @@ -26,7 +26,7 @@ for i in range(14): color_check.to_svg("assets/plot_colors.svg") ``` -![output](assets/plot_colors.svg) +output named colors can also be used explicitly. when you pin a series to one of the named colors, the auto-cycle excludes it for the remaining series, so @@ -48,7 +48,7 @@ p.add(HLine(y=0, style=Style.dashed, opacity=0.5, color="#ff0000")) p.to_svg("assets/plot_named.svg") ``` -![output](assets/plot_named.svg) +output ## speed plot @@ -88,7 +88,7 @@ plot.add( plot.to_svg("assets/plot_robot_data.svg") ``` -![output](assets/plot_robot_data.svg) +output ## Semantic search @@ -135,14 +135,14 @@ plot.add(plantness_similarity, plot.to_svg("assets/plot_plantness.svg") ``` - +{/* Result: */} ``` Stream("color_image_embedded") | vector_search() | order_by(ts) Stream("materialize") Stream("materialize"): 267 items, 2025-12-26 11:09:12 — 2025-12-26 11:14:00 (288.4s) ``` -![output](assets/plot_plantness.svg) +output We can be pretty sure the robot saw some plants by peaks at beginning and end of data, but this graph doesn't look great, why? @@ -170,7 +170,7 @@ plot.add(HLine(y=0.15, style=Style.dashed, color=color.red)) plot.to_svg("assets/plot_plantness_brightness.svg") ``` -![output](assets/plot_plantness_brightness.svg) +output We see that stuff isn't embedded below some minimum brightness. Let's now fill the gaps in our semantic graph a bit, looks super ugly above, we will tell plotter to consider unmapped values as zero and connect values that are within 7.5 seconds, smooth with 5 second time window, and normalize the data @@ -193,7 +193,7 @@ plot.to_svg("assets/plot_plantness_gap_fill.svg") ``` -![output](assets/plot_plantness_gap_fill.svg) +output Looks better, these are some very obvious peaks, I'm curious let's see what was captured then. @@ -241,7 +241,7 @@ m = mosaic(semantic_peaks.map_data(lambda obs: moondream.query_detections(obs.da m.data.save("assets/plants_auto.png") ``` - +{/* Result: */} ``` 14:59:33.042 [inf][dimos/mapping/voxels.py ] VoxelGrid using device: CUDA:0 t= 14.1s score=0.224 prominence=0.031 @@ -262,11 +262,11 @@ t= 279.6s score=0.230 prominence=0.030 ``` -![output](assets/plot_plantness_autopeaks.svg) +output -![output](assets/plants_auto.png) +output -![output](assets/plot_plantness_autopeaks_map.svg) +output ## Which peaks are significant? @@ -297,9 +297,9 @@ plot.to_svg("assets/plot_plantness_significant.svg") ``` -![output](assets/plot_plantness_significant.svg) +output -![output](assets/plants_meaningful.png) +output Rule of thumb: keep a small absolute floor on `peaks(prominence=...)` to reject shape-noise, then let `significant()` pick the statistical cutoff. @@ -352,8 +352,8 @@ m = mosaic(detections) m.data.save("assets/plants_peak_detections.png") ``` -![output](assets/peak_space.svg) -![output](assets/plants_peak_detections.png) +output +output ## 3D Projection @@ -407,7 +407,7 @@ drawing.to_svg("assets/peak_detections.svg") ``` -![output](assets/peak_detections.svg) +output # TODO further steps diff --git a/docs/capabilities/navigation/assets/1-lidar.png b/docs/capabilities/navigation/assets/1-lidar.png new file mode 100644 index 0000000000..6584ee90cb --- /dev/null +++ b/docs/capabilities/navigation/assets/1-lidar.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d76742ada18d20dc0e3a3be04159d3412e7df6acee8596ff37916f0f269d3e0 +size 597386 diff --git a/docs/capabilities/navigation/assets/2-globalmap.png b/docs/capabilities/navigation/assets/2-globalmap.png new file mode 100644 index 0000000000..55541a8fcb --- /dev/null +++ b/docs/capabilities/navigation/assets/2-globalmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc2f27ec2dcc4048acde6b53229c7596b3a7f6ed6afad30c4cd062cf5751bd24 +size 1104485 diff --git a/docs/capabilities/navigation/assets/3-globalcostmap.png b/docs/capabilities/navigation/assets/3-globalcostmap.png new file mode 100644 index 0000000000..907d0b0448 --- /dev/null +++ b/docs/capabilities/navigation/assets/3-globalcostmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1f9e6c142b220f1a4be7b08950f628a2d34e26caba8a1f5c100726bec6c88ef +size 793366 diff --git a/docs/capabilities/navigation/assets/4-navcostmap.png b/docs/capabilities/navigation/assets/4-navcostmap.png new file mode 100644 index 0000000000..6c40bce0e0 --- /dev/null +++ b/docs/capabilities/navigation/assets/4-navcostmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ee4332e3d92162ddf41a0137c2ab5b6a885d758aa5a27037e413cdd4d946436 +size 741912 diff --git a/docs/capabilities/navigation/assets/5-all.png b/docs/capabilities/navigation/assets/5-all.png new file mode 100644 index 0000000000..655be72c1c --- /dev/null +++ b/docs/capabilities/navigation/assets/5-all.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a777d315beac6f4773adcb5c27384fd983720083941b4f62060958ddf6c16d2 +size 1209867 diff --git a/docs/capabilities/navigation/assets/coverage.png b/docs/capabilities/navigation/assets/coverage.png new file mode 100644 index 0000000000..2ad2112071 --- /dev/null +++ b/docs/capabilities/navigation/assets/coverage.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c5ef9943e14c2d02fa2e19032ffeb2fc79f927c903e552e7c0db01b858f5297 +size 256502 diff --git a/docs/capabilities/navigation/assets/frontier.png b/docs/capabilities/navigation/assets/frontier.png new file mode 100644 index 0000000000..97089338f5 --- /dev/null +++ b/docs/capabilities/navigation/assets/frontier.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f2e35b3a6cc1e82667958f6bb3120a5f0bb5bba99f156df7283b774559168b5 +size 251903 diff --git a/docs/capabilities/navigation/assets/go2_blueprint.svg b/docs/capabilities/navigation/assets/go2_blueprint.svg new file mode 100644 index 0000000000..51b0e7c40f --- /dev/null +++ b/docs/capabilities/navigation/assets/go2_blueprint.svg @@ -0,0 +1,188 @@ + + + + + + +modules + +cluster_mapping + +mapping + + +cluster_navigation + +navigation + + +cluster_robot + +robot + + +cluster_visualization + +visualization + + + +CostMapper + +CostMapper + + + +chan_global_costmap_OccupancyGrid + + + +global_costmap:OccupancyGrid + + + +CostMapper->chan_global_costmap_OccupancyGrid + + + + +VoxelGridMapper + +VoxelGridMapper + + + +chan_global_map_PointCloud2 + + + +global_map:PointCloud2 + + + +VoxelGridMapper->chan_global_map_PointCloud2 + + + + +ReplanningAStarPlanner + +ReplanningAStarPlanner + + + +chan_cmd_vel_Twist + + + +cmd_vel:Twist + + + +ReplanningAStarPlanner->chan_cmd_vel_Twist + + + + +chan_goal_reached_Bool + + + +goal_reached:Bool + + + +ReplanningAStarPlanner->chan_goal_reached_Bool + + + + +WavefrontFrontierExplorer + +WavefrontFrontierExplorer + + + +chan_goal_request_PoseStamped + + + +goal_request:PoseStamped + + + +WavefrontFrontierExplorer->chan_goal_request_PoseStamped + + + + +GO2Connection + +GO2Connection + + + +chan_lidar_PointCloud2 + + + +lidar:PointCloud2 + + + +GO2Connection->chan_lidar_PointCloud2 + + + + +RerunBridgeModule + +RerunBridgeModule + + + +chan_cmd_vel_Twist->GO2Connection + + + + + +chan_global_costmap_OccupancyGrid->ReplanningAStarPlanner + + + + + +chan_global_costmap_OccupancyGrid->WavefrontFrontierExplorer + + + + + +chan_global_map_PointCloud2->CostMapper + + + + + +chan_goal_reached_Bool->WavefrontFrontierExplorer + + + + + +chan_goal_request_PoseStamped->ReplanningAStarPlanner + + + + + +chan_lidar_PointCloud2->VoxelGridMapper + + + + + diff --git a/docs/capabilities/navigation/assets/go2nav_dataflow.svg b/docs/capabilities/navigation/assets/go2nav_dataflow.svg new file mode 100644 index 0000000000..94bb3e39ee --- /dev/null +++ b/docs/capabilities/navigation/assets/go2nav_dataflow.svg @@ -0,0 +1,22 @@ + + +Go2 + + + +VoxelGridMapper + + + +CostMapper + + + +Navigation +PointCloud2 +PointCloud2 +OccupancyGrid + + +Twist + diff --git a/docs/capabilities/navigation/assets/noros_nav.gif b/docs/capabilities/navigation/assets/noros_nav.gif new file mode 100644 index 0000000000..ab47bb9cb5 --- /dev/null +++ b/docs/capabilities/navigation/assets/noros_nav.gif @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60f842cd2fda539338443b3c501197fbb875f5c5f3883ba3ffdd17005e9bd786 +size 612786 diff --git a/docs/capabilities/navigation/assets/patrol_path.png b/docs/capabilities/navigation/assets/patrol_path.png new file mode 100644 index 0000000000..4d53c29409 --- /dev/null +++ b/docs/capabilities/navigation/assets/patrol_path.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cecf773affedca3d14d781e956d20ec9b396df53b5473e41fb7a182d700bef2 +size 476239 diff --git a/docs/capabilities/navigation/assets/random.png b/docs/capabilities/navigation/assets/random.png new file mode 100644 index 0000000000..b407034eb6 --- /dev/null +++ b/docs/capabilities/navigation/assets/random.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18ab48a549d02d1cd63c8c21b4294acc9c235e8c8f704e2c6ee71d0399ca4aa0 +size 260526 diff --git a/docs/capabilities/navigation/native/index.md b/docs/capabilities/navigation/native-go2.mdx similarity index 83% rename from docs/capabilities/navigation/native/index.md rename to docs/capabilities/navigation/native-go2.mdx index fb38a95e01..8608a0b49f 100644 --- a/docs/capabilities/navigation/native/index.md +++ b/docs/capabilities/navigation/native-go2.mdx @@ -1,6 +1,4 @@ -# Go2 Non-ROS Navigation - - + The Go2 navigation stack runs entirely without ROS. It uses a **column-carving voxel map** strategy: each new LiDAR frame replaces the corresponding region of the global map entirely, ensuring the map always reflects the latest observations. @@ -9,7 +7,7 @@ The Go2 navigation stack runs entirely without ROS. It uses a **column-carving v
diagram source -```pikchr fold output=assets/go2nav_dataflow.svg +```pikchr fold output=native/assets/go2nav_dataflow.svg color = white fill = none @@ -35,17 +33,18 @@ M4: dot at 1/2 way between Go2.s and Nav.s invisible text "Twist" italic at (M4.x, Nav.s.y - 0.45in) ``` +
- -![output](assets/go2nav_dataflow.svg) +{/* Result: */} +output ## Pipeline Steps ### 1. LiDAR Frame — [`GO2Connection`](/dimos/robot/unitree/go2/connection.py) We don't connect to the LiDAR directly — instead we use Unitree's WebRTC client (via [legion's webrtc driver](https://github.com/legion1581/unitree_webrtc_connect)), which streams a heavily preprocessed 5cm voxel grid rather than raw point cloud data. This allows us to support stock, unjailbroken Go2 Air and Pro models out of the box. -![LiDAR frame](assets/1-lidar.png) +LiDAR frame ### 2. Global Voxel Map — [`VoxelGridMapper`](/dimos/mapping/voxels.py) @@ -72,7 +71,7 @@ We don't have proper loop closure and stable odometry, we trust the data go2 odo | `carve_columns` | `true` | Enable column carving (disable for append-only mapping) | | `publish_interval` | 0 | Seconds between map publishes (0 = every frame) | -![Global map](assets/2-globalmap.png) +Global map ### 3. Global Costmap — [`CostMapper`](/dimos/mapping/costmapper.py) @@ -100,7 +99,7 @@ class HeightCostConfig(OccupancyConfig): | 100 | Steep or impassable (≥15cm rise per cell in case of go2) | | -1 | Unknown (no observations) | -![Global costmap](assets/3-globalcostmap.png) +Global costmap ### 4. Navigation Costmap — [`ReplanningAStarPlanner`](/dimos/navigation/replanning_a_star/module.py) @@ -108,13 +107,13 @@ The planner will process the terrain gradient and compute it's own algo-relevant We run the planner in a constant loop so it will dynamically react to obstacles encountered. -![Navigation costmap with path](assets/4-navcostmap.png) +Navigation costmap with path ### 5. All Layers Combined All visualization layers shown together -![All layers](assets/5-all.png) +All layers ## Patrolling @@ -146,36 +145,35 @@ Goal candidates are filtered through a **safe mask** — the free-space region e | Coverage | Frontier | Random | |----------|----------|--------| -| ![coverage](assets/coverage.png) | ![frontier](assets/frontier.png) | ![random](assets/random.png) | +| coverage | frontier | random | ### Sample patrol trace (26 min) -![Patrol path](assets/patrol_path.png) +Patrol path ## Blueprint Composition The navigation stack is composed in the [`unitree_go2`](/dimos/robot/unitree/go2/blueprints/smart/unitree_go2.py) blueprint: -```python skip fold output=assets/go2_blueprint.svg +```python fold output=native/assets/go2_blueprint.svg from dimos.core.coordination.blueprints import autoconnect -from dimos.core.introspection.svg import to_svg -from dimos.mapping.costmapper import CostMapper -from dimos.mapping.voxels import VoxelGridMapper -from dimos.navigation.frontier_exploration.wavefront_frontier_goal_selector import ( - WavefrontFrontierExplorer, -) -from dimos.navigation.replanning_a_star.module import ReplanningAStarPlanner +from dimos.core.introspection import to_svg +from dimos.mapping.costmapper import cost_mapper +from dimos.mapping.voxels import voxel_mapper +from dimos.navigation.frontier_exploration import wavefront_frontier_explorer +from dimos.navigation.replanning_a_star.module import replanning_a_star_planner from dimos.robot.unitree.go2.blueprints.basic.unitree_go2_basic import unitree_go2_basic unitree_go2 = autoconnect( - unitree_go2_basic, - VoxelGridMapper.blueprint(), - CostMapper.blueprint(), - ReplanningAStarPlanner.blueprint(), - WavefrontFrontierExplorer.blueprint(), + unitree_go2_basic, # robot connection + visualization + voxel_mapper(voxel_size=0.05), # 3D voxel mapping + cost_mapper(), # 2D costmap generation + replanning_a_star_planner(), # path planning + wavefront_frontier_explorer(), # exploration ).global_config(n_workers=6, robot_model="unitree_go2") -to_svg(unitree_go2, "assets/go2_blueprint.svg") +to_svg(unitree_go2, "native/assets/go2_blueprint.svg") ``` - -![output](assets/go2_blueprint.svg) + +{/* Result: */} +output diff --git a/docs/capabilities/navigation/overview.mdx b/docs/capabilities/navigation/overview.mdx new file mode 100644 index 0000000000..8c5233e7c9 --- /dev/null +++ b/docs/capabilities/navigation/overview.mdx @@ -0,0 +1,7 @@ +## Non-ROS + +- [Go2 Navigation](/docs/capabilities/navigation/native/index.md) — column-carving voxel mapping + slope-based costmap + +## ROS + +See [ROS Transports](/docs/usage/transports/index.md) for bridging DimOS streams to ROS topics. diff --git a/docs/capabilities/navigation/readme.md b/docs/capabilities/navigation/readme.md deleted file mode 100644 index 66c59d3540..0000000000 --- a/docs/capabilities/navigation/readme.md +++ /dev/null @@ -1,11 +0,0 @@ -# Navigation - -Note: in the future these will be merged into one system. - -## Nav Stack - -- [Nav Stack](/docs/capabilities/navigation/nav_stack.md) — modular navigation stack with terrain analysis, local/global planning, PGO, and exploration - -## Simple Nav - -- [Simple Navigation](/docs/capabilities/navigation/native/index.md) — column-carving voxel mapping + slope-based costmap diff --git a/docs/capabilities/perception/readme.md b/docs/capabilities/perception.mdx similarity index 50% rename from docs/capabilities/perception/readme.md rename to docs/capabilities/perception.mdx index 5d6e089dbf..afefda94bb 100644 --- a/docs/capabilities/perception/readme.md +++ b/docs/capabilities/perception.mdx @@ -1,3 +1 @@ -# Perception - ## Detections diff --git a/docs/development/docker.md b/docs/development/docker.mdx similarity index 97% rename from docs/development/docker.md rename to docs/development/docker.mdx index c6622ff87f..74b2262ab3 100644 --- a/docs/development/docker.md +++ b/docs/development/docker.mdx @@ -1,10 +1,9 @@ -# Docker Images - Dimos uses parallel Docker image hierarchies for ROS and non-ROS builds, allowing you to choose the environment that fits your use case. ## Image Hierarchy -
Pikchr +
+Pikchr ```pikchr fold output=assets/docker-hierarchy.svg color = white @@ -38,10 +37,11 @@ line dashed from D.e right 0.3in then down until even with RD then right to RD.w text "same dockerfiles" at (D.e.x + 1.2in, D.e.y + 0.4in) ``` +
- -![output](assets/docker-hierarchy.svg) +{/* Result: */} +output ## Images diff --git a/docs/development/grid_testing.md b/docs/development/grid_testing.mdx similarity index 99% rename from docs/development/grid_testing.md rename to docs/development/grid_testing.mdx index c72bda0fcb..006e509276 100644 --- a/docs/development/grid_testing.md +++ b/docs/development/grid_testing.mdx @@ -1,5 +1,3 @@ -# Grid Testing Strategy - Grid tests run the same test logic across multiple implementations or configurations using pytest's parametrize feature. ## Case Type Pattern diff --git a/docs/development/large_file_management.md b/docs/development/large_file_management.mdx similarity index 95% rename from docs/development/large_file_management.md rename to docs/development/large_file_management.mdx index 87fe6ce4be..74de997b9c 100644 --- a/docs/development/large_file_management.md +++ b/docs/development/large_file_management.mdx @@ -1,5 +1,3 @@ -# Data Loading - The [`get_data`](/dimos/utils/data.py) function provides access to test data and model files, handling Git LFS downloads automatically. ## Basic Usage @@ -13,7 +11,7 @@ print(f"Path: {data_path}") print(f"Exists: {data_path.exists()}") ``` - +{/* Result: */} ``` Path: /home/lesh/coding/dimos/data/cafe.jpg Exists: True @@ -21,7 +19,8 @@ Exists: True ## How It Works -
Pikchr +
+Pikchr ```pikchr fold output=assets/get_data_flow.svg color = white @@ -44,10 +43,11 @@ arrow right 0.3in F: box "Return path" rad 5px fit wid 170% ht 170% ``` +
- -![output](assets/get_data_flow.svg) +{/* Result: */} +output 1. Checks if `data/{name}` already exists locally 2. If missing, pulls the `.tar.gz` archive from Git LFS @@ -66,7 +66,7 @@ image = Image.from_file(get_data("cafe.jpg")) print(f"Image shape: {image.data.shape}") ``` - +{/* Result: */} ``` Image shape: (771, 1024, 3) ``` @@ -81,7 +81,7 @@ checkpoint = model_dir / "yolo11n.pt" print(f"Checkpoint: {checkpoint.name} ({checkpoint.stat().st_size // 1024}KB)") ``` - +{/* Result: */} ``` Checkpoint: yolo11n.pt (5482KB) ``` @@ -98,7 +98,7 @@ print(f"Replay {replay} loaded from: {data_dir.name}") print(replay.find_closest_seek(1)) ``` - +{/* Result: */} ``` Replay loaded from: unitree_office_walk {'type': 'msg', 'topic': 'rt/utlidar/voxel_map_compressed', 'data': {'stamp': 1751591000.0, 'frame_id': 'odom', 'resolution': 0.05, 'src_size': 77824, 'origin': [-3.625, -3.275, -0.575], 'width': [128, 128, 38], 'data': {'points': array([[ 2.725, -1.025, -0.575], @@ -120,7 +120,7 @@ pointcloud = read_pointcloud(get_data("apartment") / "sum.ply") print(f"Loaded pointcloud with {len(pointcloud.points)} points") ``` - +{/* Result: */} ``` Loaded pointcloud with 63672 points ``` @@ -129,7 +129,8 @@ Loaded pointcloud with 63672 points Data files live in `data/` at the repo root. Large files are stored in `data/.lfs/` as `.tar.gz` archives tracked by Git LFS. -
Diagram +
+Diagram ```diagon fold mode=Tree data/ @@ -141,9 +142,10 @@ data/ apartment.tar.gz ``` +
- +{/* Result: */} ``` data/ ├──cafe.jpg diff --git a/docs/development/profiling_dimos.md b/docs/development/profiling_dimos.mdx similarity index 95% rename from docs/development/profiling_dimos.md rename to docs/development/profiling_dimos.mdx index 2ff3082299..d3a35a6b12 100644 --- a/docs/development/profiling_dimos.md +++ b/docs/development/profiling_dimos.mdx @@ -1,5 +1,3 @@ -# Profiling dimos - You can use py-spy to profile a particular blueprint: ```bash diff --git a/docs/development/testing.md b/docs/development/testing.mdx similarity index 99% rename from docs/development/testing.md rename to docs/development/testing.mdx index bdce5bfe97..ce6da5c717 100644 --- a/docs/development/testing.md +++ b/docs/development/testing.mdx @@ -1,5 +1,3 @@ -# Testing - For development, you should install all dependencies so that tests have access to them. ```bash diff --git a/docs/development/writing_docs.md b/docs/development/writing_docs.mdx similarity index 97% rename from docs/development/writing_docs.md rename to docs/development/writing_docs.mdx index 8b24dc620b..92e1c1b12d 100644 --- a/docs/development/writing_docs.md +++ b/docs/development/writing_docs.mdx @@ -1,5 +1,3 @@ -# Writing Docs - 1. Where to put your docs: - If it only matters to people who contribute to dimos (like this doc), put them in `docs/development` - Otherwise put them in `docs/usage` diff --git a/docs/installation/nix.md b/docs/installation/nix.mdx similarity index 97% rename from docs/installation/nix.md rename to docs/installation/nix.mdx index 3d79e313f3..69c6d812ae 100644 --- a/docs/installation/nix.md +++ b/docs/installation/nix.mdx @@ -1,5 +1,3 @@ -# Nix install (required for nix managed dimos) - You need to have [nix](https://nixos.org/) installed and [flakes](https://nixos.wiki/wiki/Flakes) enabled, [official install docs](https://nixos.org/download/) recommended, but here is a quickstart: diff --git a/docs/installation/osx.md b/docs/installation/osx.mdx similarity index 86% rename from docs/installation/osx.md rename to docs/installation/osx.mdx index f2bd77e3ff..7044f00a75 100644 --- a/docs/installation/osx.md +++ b/docs/installation/osx.mdx @@ -1,6 +1,4 @@ -# macOS Install (12.6 or newer) - -```sh skip +```sh # install homebrew /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" # install dependencies @@ -12,7 +10,7 @@ curl -LsSf https://astral.sh/uv/install.sh | sh && export PATH="$HOME/.local/bin # Using DimOS as a library -```sh skip +```sh mkdir myproject && cd myproject uv venv --python 3.12 @@ -25,13 +23,13 @@ uv pip install 'dimos[misc,sim,visualization,agents,web,perception,unitree,manip # Developing on DimOS -```sh skip +```sh # this allows getting large files on-demand (and not pulling all immediately) export GIT_LFS_SKIP_SMUDGE=1 -git clone https://github.com/dimensionalOS/dimos.git +git clone -b dev https://github.com/dimensionalOS/dimos.git cd dimos -uv sync --extra all +uv sync --all-extras --no-extra dds # type check uv run mypy dimos diff --git a/docs/installation/ubuntu.md b/docs/installation/ubuntu.mdx similarity index 82% rename from docs/installation/ubuntu.md rename to docs/installation/ubuntu.mdx index d0880b0616..4a1beaf8e3 100644 --- a/docs/installation/ubuntu.md +++ b/docs/installation/ubuntu.mdx @@ -1,6 +1,4 @@ -# System Dependencies Install (Ubuntu 22.04 or 24.04) - -```sh skip +```sh sudo apt-get update sudo apt-get install -y curl g++ portaudio19-dev git-lfs libturbojpeg python3-dev pre-commit @@ -10,7 +8,7 @@ curl -LsSf https://astral.sh/uv/install.sh | sh && export PATH="$HOME/.local/bin # Using DimOS as a library -```sh skip +```sh mkdir myproject && cd myproject uv venv --python 3.12 @@ -23,13 +21,13 @@ uv pip install 'dimos[misc,sim,visualization,agents,web,perception,unitree,manip # Developing on DimOS -```sh skip +```sh # this allows getting large files on-demand (and not pulling all immediately) export GIT_LFS_SKIP_SMUDGE=1 -git clone https://github.com/dimensionalOS/dimos.git +git clone -b dev https://github.com/dimensionalOS/dimos.git cd dimos -uv sync --extra all +uv sync --all-extras --no-extra dds # type check uv run mypy dimos diff --git a/docs/platforms/humanoid-g1.mdx b/docs/platforms/humanoid-g1.mdx new file mode 100644 index 0000000000..aab67ce733 --- /dev/null +++ b/docs/platforms/humanoid-g1.mdx @@ -0,0 +1,165 @@ +The Unitree G1 is a humanoid robot platform with full-body locomotion, arm gesture control, and agentic capabilities — no ROS required for basic operation. + +## Requirements + +- Unitree G1 (stock firmware) +- Ubuntu 22.04/24.04 with CUDA GPU (recommended), or macOS (experimental) +- Python 3.12 +- ZED camera (mounted at chest height) for perception blueprints +- ROS 2 for navigation (the G1 navigation stack uses ROS nav) + +## Install + +First, install system dependencies for your platform: +- [Ubuntu](/docs/installation/ubuntu.md) +- [macOS](/docs/installation/osx.md) +- [Nix](/docs/installation/nix.md) + +Then install DimOS: + +```bash +uv venv --python "3.12" +source .venv/bin/activate +uv pip install 'dimos[base,unitree]' +``` + +## MuJoCo Simulation + +No hardware? Start with simulation: + +```bash +uv pip install 'dimos[base,unitree,sim]' +dimos --simulation run unitree-g1-basic-sim +``` + +This runs the G1 in MuJoCo with the native A* navigation stack — same blueprint structure, simulated robot. Opens the command center at [localhost:7779](http://localhost:7779) with Rerun 3D visualization. + +## Run on Your G1 + +```bash +export ROBOT_IP= +dimos run unitree-g1-basic +``` + +DimOS connects via WebRTC, starts the ROS navigation stack, and opens the command center. + +### What's Running + +| Module | What It Does | +|--------|-------------| +| **G1Connection** | WebRTC connection to the robot — streams video, odometry | +| **Webcam** | ZED camera capture (stereo left, 15 fps) | +| **VoxelGridMapper** | Builds a 3D voxel map using column-carving (CUDA accelerated) | +| **CostMapper** | Converts 3D map → 2D costmap via terrain slope analysis | +| **WavefrontFrontierExplorer** | Autonomous exploration of unmapped areas | +| **ROSNav** | ROS 2 navigation integration for path planning | +| **RerunBridge** | 3D visualization in browser | +| **WebsocketVis** | Command center at localhost:7779 | + +### Send Goals + +From the command center ([localhost:7779](http://localhost:7779)): +- Click on the map to set navigation goals +- Toggle autonomous exploration +- Monitor robot pose, costmap, and planned path + +## Agentic Control + +Natural language control with an LLM agent that understands physical space and can command arm gestures: + +```bash +export OPENAI_API_KEY= +export ROBOT_IP= +dimos run unitree-g1-agentic +``` + +Then use the human CLI: + +```bash +humancli +> wave hello +> explore the room +> give me a high five +``` + +The agent subscribes to camera and spatial memory streams and has access to G1-specific skills including arm gestures and movement modes. + +### Arm Gestures + +The G1 agent can perform expressive arm gestures: + +| Gesture | Description | +|---------|-------------| +| Handshake | Perform a handshake gesture with the right hand | +| HighFive | Give a high five with the right hand | +| Hug | Perform a hugging gesture with both arms | +| HighWave | Wave with the hand raised high | +| Clap | Clap hands together | +| FaceWave | Wave near the face level | +| LeftKiss | Blow a kiss with the left hand | +| ArmHeart | Make a heart shape with both arms overhead | +| RightHeart | Make a heart gesture with the right hand | +| HandsUp | Raise both hands up in the air | +| RightHandUp | Raise only the right hand up | +| Reject | Make a rejection or "no" gesture | +| CancelAction | Cancel any current arm action and return to neutral | + +### Movement Modes + +| Mode | Description | +|------|-------------| +| WalkMode | Normal walking | +| WalkControlWaist | Walking with waist control | +| RunMode | Running | + +## Keyboard Teleop + +Direct keyboard control via a pygame-based joystick: + +```bash +export ROBOT_IP= +dimos run unitree-g1-joystick +``` + +## Available Blueprints + +| Blueprint | Description | +|-----------|-------------| +| `unitree-g1-basic` | Connection + ROS navigation + visualization | +| `unitree-g1-basic-sim` | Simulation with A* navigation | +| `unitree-g1` | Navigation + perception + spatial memory | +| `unitree-g1-sim` | Simulation with perception + spatial memory | +| `unitree-g1-agentic` | Full stack with LLM agent and G1 skills | +| `unitree-g1-agentic-sim` | Agentic stack in simulation | +| `unitree-g1-full` | Agentic + SHM image transport + keyboard teleop | +| `unitree-g1-joystick` | Navigation + keyboard teleop | +| `unitree-g1-detection` | Navigation + YOLO person detection and tracking | +| `unitree-g1-shm` | Navigation + perception with shared memory image transport | +| `uintree-g1-primitive-no-nav` | Sensors + visualization only (no navigation, base for custom blueprints) | + +### Blueprint Hierarchy + +Blueprints compose incrementally: + +``` +primitive (sensors + vis) +├── basic (+ connection + navigation) +│ ├── basic-sim (sim connection + A* nav) +│ ├── joystick (+ keyboard teleop) +│ └── detection (+ YOLO person tracking) +├── perceptive (+ spatial memory + object tracking) +│ ├── sim (sim variant) +│ └── shm (+ shared memory transport) +└── agentic (+ LLM agent + G1 skills) + ├── agentic-sim (sim variant) + └── full (+ SHM + keyboard teleop) +``` + +## Deep Dive + +- [Navigation Stack](/docs/capabilities/navigation/readme.md) — path planning and autonomous exploration +- [Visualization](/docs/usage/visualization.md) — Rerun, Foxglove, performance tuning +- [Data Streams](/docs/usage/data_streams) — RxPY streams, backpressure, quality filtering +- [Transports](/docs/usage/transports/index.md) — LCM, SHM, DDS +- [Blueprints](/docs/usage/blueprints.md) — composing modules +- [Agents](/docs/capabilities/agents/readme.md) — LLM agent framework diff --git a/docs/platforms/humanoid/g1/index.md b/docs/platforms/humanoid/g1/index.md deleted file mode 100644 index 80a990fb60..0000000000 --- a/docs/platforms/humanoid/g1/index.md +++ /dev/null @@ -1,132 +0,0 @@ -# Unitree G1 - -## Requirements - -- Unitree G1 EDU (need SDK/SSH access) -- Laptop/Desktop with Ubuntu 22.04/24.04 with CUDA GPU (recommended), or macOS (experimental) - -## 1. Get SSH Access - -### Get Ethernet Working -1. Plug an Ethernet cable from the robot into your Laptop -2. Open up your Laptop's graphical network manager, manually set the IP addr of your system to `192.168.123.100` -3. Run wired ssh command: - -```bash -ssh -L 3030:localhost:3030 unitree@192.168.123.164 -# Password: 123 -``` - -### Use Ethernet to get WiFi Working - -After ssh-ing in, find additional IPs: -```bash -hostname -I -``` -The second address allows SSH after disconnecting Ethernet. - -WiFi passwords (varies by unit): `888888888` or `00000000` - -### Network Interface Names - -Common interface names needed for SDK examples: -- `eth0` / `enp2s0` — Ethernet -- `wlan0` — WiFi - -Check with: `ip addr show` - -### Remote Network - -Recommended to setup [tailscale](https://tailscale.com/tailscale-ssh) to avoid needing to setup rounter specific configuraions for wireless control. - -## 2. Install DimOS - -SSH into the robot, then: - -```bash -# pick the "developer" setup -bash <(curl -fsSL https://pub-4767fdd15e6a41b6b2ce2558d71ec8d9.r2.dev/install.sh) -``` - -#### Notes - -DimOS handles DDS setup automatically. If you're using the Unitree SDK directly, set: -```bash -export CYCLONEDDS_HOME="$HOME/cyclonedds/install" -``` - -## 3. Get the G1 in Sport Mode - -**WARNING**: You *need* to have the G1 in a good physical position before running this. - -Get the hand-held controller for the G1. - -Note: this button combination may vary based on the model of the G1 - -1. If you have a gantry, hang the robot up where its feet are touching the floor, knees straight. - - Press **L2 + B** (no movement, color change) - - Press **L2 + Up** (should straighten out) - - Press **R2 + A** (will attempt to self-balance) -2. If don't have a gantry, there is a make-shift way to get it working. You should get a second person to help. - - Make the robot lie down flat on the ground - - Press **L2 + B** (no movement, color change) - - Press **L2 + Up** (should straighten out) - - The robot will be super stiff now. Manually pick it up into a standing position and hold it there. - - Press **R2 + A** (will attempt to self-balance) - -## 4. Run the Blueprint - -In the ssh terminal `ssh -L 3030:localhost:3030 unitree@192.168.123.164` - -```sh -source .venv/bin/activate -uv run dimos --rerun-host 0.0.0.0 run unitree-g1-nav-onboard -# should print out something like: -# ============================================================ -# Rerun gRPC server running (no viewer opened) -# -# Connect a viewer: -# dimos-viewer --connect rerun+http://0.0.0.0:9877/proxy --ws-url ws://0.0.0.0:3030/ws -# dimos-viewer --connect rerun+http://192.168.123.164:9877/proxy --ws-url ws://192.168.123.164:3030/ws # eth0 -# dimos-viewer --connect rerun+http://100.88.236.73:9877/proxy --ws-url ws://100.88.236.73:3030/ws # tailscale0 -# dimos-viewer --connect rerun+http://10.0.0.197:9877/proxy --ws-url ws://10.0.0.197:3030/ws # wlan0 -# dimos-viewer --connect rerun+http://172.17.0.1:9877/proxy --ws-url ws://172.17.0.1:3030/ws # docker0 -# -# hostname: ubuntu -# ============================================================ -``` - -On your laptop: - -```sh -# install uv -curl -LsSf https://astral.sh/uv/install.sh | sh -uv venv --python "3.12" -# use uv to get the dimos viewer -uvx dimos-viewer --version - -# run the connect command. NOTE: the address will be different for you -uvx dimos-viewer --connect rerun+http://100.88.236.73:9877/proxy --ws-url ws://100.88.236.73:3030/ws -``` - -The viewer should open up. It'll run in faster-than-real speed until its caught up with reality, then should show what's happening in real time. - -### Troubleshooting - -#### Keyboard Controls Not Working - -This usually means port `3030` wasn't forwarded. The `3030:localhost:3030` in the ssh command is what forwards the port. If you use VS Code with the SSH plugin, ports will be forwarded automatically. However sometimes the auto-forward will map 3030 to 3031 - thus breaking the connect command. Clear whatever is on port 3030 (on the G1 sid and the Laptop) then try again. - -#### Viewer Crashing - -If the viewer keeps crashing for you, there are two options for now: -1. On the G1 (ssh connection) change `vis_throttle=0.5` (inside `dimos/robot/unitree/g1/blueprints/navigation/unitree_g1_nav_onboard.py`) to a lower number, like 0.3 or 0.2 -2. Get more RAM - - - -## External Resources - -- [Unitree Developer Docs](https://support.unitree.com/home/en/developer) -- [Sport Mode Services](https://support.unitree.com/home/en/developer/sports_services) -- [Unitree SDK2 Python](https://github.com/unitreerobotics/unitree_sdk2_python) diff --git a/docs/platforms/quadruped/go2/index.md b/docs/platforms/quadruped-go2.mdx similarity index 97% rename from docs/platforms/quadruped/go2/index.md rename to docs/platforms/quadruped-go2.mdx index a0ad157be9..4ee9a366c3 100644 --- a/docs/platforms/quadruped/go2/index.md +++ b/docs/platforms/quadruped-go2.mdx @@ -1,5 +1,3 @@ -# Unitree Go2 — Getting Started - The Unitree Go2 is DimOS's primary reference platform. Full autonomous navigation, mapping, and agentic control — no ROS required. ## Requirements @@ -71,7 +69,7 @@ export ROBOT_IP= ### Pre-flight checks -1. Robot is reachable and low latency <10ms, 0% packet loss +1. Robot is reachable and low latency (<10ms, 0% packet loss) ```bash ping $ROBOT_IP ``` diff --git a/docs/requirements.md b/docs/requirements.mdx similarity index 99% rename from docs/requirements.md rename to docs/requirements.mdx index f27d2a0a47..80039d12c9 100644 --- a/docs/requirements.md +++ b/docs/requirements.mdx @@ -1,5 +1,3 @@ -# System Requirements - ## Hardware | Component | Minimum | Recommended | diff --git a/docs/usage/blueprints.md b/docs/usage/blueprints.mdx similarity index 99% rename from docs/usage/blueprints.md rename to docs/usage/blueprints.mdx index fcdae4c937..e5684ee305 100644 --- a/docs/usage/blueprints.md +++ b/docs/usage/blueprints.mdx @@ -1,5 +1,3 @@ -# Blueprints - Blueprints (`BlueprintAtom`) are instructions for how to initialize a `Module`. You don't typically want to run a single module, so multiple blueprints are handled together in `Blueprint`. diff --git a/docs/usage/cli.md b/docs/usage/cli.mdx similarity index 99% rename from docs/usage/cli.md rename to docs/usage/cli.mdx index be5c36e24a..87ce0bef36 100644 --- a/docs/usage/cli.md +++ b/docs/usage/cli.mdx @@ -1,5 +1,3 @@ -# CLI Reference - The `dimos` CLI manages the full lifecycle of a DimOS robot stack — start, stop, inspect, and interact. ## Global Options diff --git a/docs/usage/README.md b/docs/usage/concepts.mdx similarity index 98% rename from docs/usage/README.md rename to docs/usage/concepts.mdx index 071b6fc0b2..fbf2c98932 100644 --- a/docs/usage/README.md +++ b/docs/usage/concepts.mdx @@ -1,5 +1,3 @@ -# Concepts - This page explains general concepts. ## Table of Contents diff --git a/docs/usage/configuration.md b/docs/usage/configuration.mdx similarity index 98% rename from docs/usage/configuration.md rename to docs/usage/configuration.mdx index 8e0fc44b5a..d9d239846c 100644 --- a/docs/usage/configuration.md +++ b/docs/usage/configuration.mdx @@ -1,5 +1,3 @@ -# Configuration - Dimos provides a `Configurable` base class. See [`service/spec.py`](/dimos/protocol/service/spec.py#L22). This allows using pydantic models to specify configuration structure and default values per module. @@ -33,7 +31,7 @@ except (TypeError, ValidationError) as e: ``` - +{/* Result: */} ``` Config(x=3, hello='world') Config(x=3, hello='override') @@ -76,7 +74,7 @@ myModule = MyModule(frame_id="frame_id_override", device="CPU") ``` - +{/* Result: */} ``` Config( rpc_transport=, diff --git a/docs/usage/data_streams/advanced_streams.md b/docs/usage/data_streams/advanced_streams.mdx similarity index 96% rename from docs/usage/data_streams/advanced_streams.md rename to docs/usage/data_streams/advanced_streams.mdx index 68c1ed5bfa..e3f6d6f928 100644 --- a/docs/usage/data_streams/advanced_streams.md +++ b/docs/usage/data_streams/advanced_streams.mdx @@ -1,5 +1,3 @@ -# Advanced Stream Handling - > **Prerequisite:** Read [ReactiveX Fundamentals](/docs/usage/data_streams/reactivex.md) first for Observable basics. ## Backpressure and Parallel Subscribers to Hardware @@ -9,7 +7,8 @@ In robotics, we deal with hardware that produces data at its own pace - a camera **The problem:** A fast producer can overwhelm a slow consumer, causing memory buildup or dropped frames. We might have multiple subscribers to the same hardware that operate at different speeds. -
Pikchr +
+Pikchr ```pikchr fold output=assets/backpressure.svg color = white @@ -24,10 +23,11 @@ Slow: box "ML Model" "2 fps" rad 5px fit wid 130% ht 130% text "items pile up!" at (Queue.x, Queue.y - 0.45in) ``` +
- -![output](assets/backpressure.svg) +{/* Result: */} +output **The solution:** The `backpressure()` wrapper handles this by: @@ -67,7 +67,7 @@ print(f"slow got {len(slow_results)} items (skipped {len(fast_results) - len(slo scheduler.executor.shutdown(wait=True) ``` - +{/* Result: */} ``` fast got 20 items: [0, 1, 2, 3, 4]... slow got 7 items (skipped 13) @@ -76,7 +76,8 @@ slow got 7 items (skipped 13) ### How it works -
Pikchr +
+Pikchr ```pikchr fold output=assets/backpressure_solution.svg color = white @@ -94,10 +95,11 @@ arrow Slow: box "Slow Sub" rad 5px fit wid 170% ht 170% ``` +
- -![output](assets/backpressure_solution.svg) +{/* Result: */} +output The `LATEST` strategy means: when the slow subscriber finishes processing, it gets whatever the most recent value is, skipping any values that arrived while it was busy. @@ -232,10 +234,11 @@ Blk2: box dashed color 0x5c9ff0 with .nw at Cold2.nw + (-0.1in, 0.25in) wid (Dis text "blocking" italic with .n at Blk2.n + (0, -0.05in) ``` +
- -![output](assets/getter_hot_cold.svg) +{/* Result: */} +output **Prefer `getter_cold()`** when you can afford to wait and warmup isn't expensive. It's simpler (no cleanup needed) and doesn't hold resources. Only use `getter_hot()` when you need instant reads or the source is expensive to start. @@ -265,7 +268,7 @@ print("after 700ms:", get_val()) get_val.dispose() # Don't forget to clean up! ``` - +{/* Result: */} ``` first call: 0 after 350ms: 3 @@ -288,7 +291,7 @@ print("call 2:", get_val()) # subscribes again, gets 0, disposes print("call 3:", get_val()) # subscribes again, gets 0, disposes ``` - +{/* Result: */} ``` call 1: 0 call 2: 0 diff --git a/docs/usage/data_streams/README.md b/docs/usage/data_streams/overview.mdx similarity index 99% rename from docs/usage/data_streams/README.md rename to docs/usage/data_streams/overview.mdx index 014970a401..adbc8de962 100644 --- a/docs/usage/data_streams/README.md +++ b/docs/usage/data_streams/overview.mdx @@ -1,5 +1,3 @@ -# Sensor Streams - Dimos uses reactive streams (RxPY) to handle sensor data. This approach naturally fits robotics where multiple sensors emit data asynchronously at different rates, and downstream processors may be slower than the data sources. ## Guides diff --git a/docs/usage/data_streams/quality_filter.md b/docs/usage/data_streams/quality_filter.mdx similarity index 95% rename from docs/usage/data_streams/quality_filter.md rename to docs/usage/data_streams/quality_filter.mdx index c928b3ff97..fc569dda15 100644 --- a/docs/usage/data_streams/quality_filter.md +++ b/docs/usage/data_streams/quality_filter.mdx @@ -1,5 +1,3 @@ -# Quality-Based Stream Filtering - When processing sensor streams, you often want to reduce frequency while keeping the best quality data. For discrete data like images that can't be averaged or merged, instead of blindly dropping frames, `quality_barrier` selects the highest quality item within each time window. ## The Problem @@ -40,7 +38,7 @@ print("Selected:", [r["id"] for r in result]) print("Qualities:", [r["quality"] for r in result]) ``` - +{/* Result: */} ``` Selected: [2] Qualities: [0.9] @@ -74,7 +72,7 @@ print("Sharpness scores:") show_frames(input_frames) ``` - +{/* Result: */} ``` Loaded 20 frames from Go2 camera Frame resolution: 1280x720 @@ -105,7 +103,7 @@ print(f"Output: {len(sharp_frames)} frame(s) (selected sharpest per window)") show_frames(sharp_frames) ``` - +{/* Result: */} ``` Output: 3 frame(s) (selected sharpest per window) Frame 0: 0.351 @@ -167,15 +165,15 @@ Visualizing which frames were selected (green border = selected as sharpest in w plot_mosaic(input_frames, sharp_frames, '{output}') ``` - -![output](assets/frame_mosaic.jpg) +{/* Result: */} +output ```python skip session=qb output=assets/sharpness_graph.svg plot_sharpness(input_frames, sharp_frames, '{output}') ``` - -![output](assets/sharpness_graph.svg) +{/* Result: */} +output Let's request a higher frequency. @@ -189,7 +187,7 @@ print(f"Output: {len(sharp_frames)} frame(s) (selected sharpest per window)") show_frames(sharp_frames) ``` - +{/* Result: */} ``` Output: 6 frame(s) (selected sharpest per window) Frame 0: 0.351 @@ -204,16 +202,16 @@ Output: 6 frame(s) (selected sharpest per window) plot_mosaic(input_frames, sharp_frames, '{output}') ``` - -![output](assets/frame_mosaic2.jpg) +{/* Result: */} +output ```python skip session=qb output=assets/sharpness_graph2.svg plot_sharpness(input_frames, sharp_frames, '{output}') ``` - -![output](assets/sharpness_graph2.svg) +{/* Result: */} +output As we can see the system is trying to strike a balance between requested frequency and quality that's available @@ -261,7 +259,7 @@ print(f"Mean gradient magnitude: {magnitude.mean():.2f}") print(f"Normalized sharpness: {img.sharpness:.3f}") ``` - +{/* Result: */} ``` Mean gradient magnitude: 230.00 Normalized sharpness: 0.332 @@ -287,7 +285,7 @@ result = rx.of(*detections).pipe( print(f"Selected: {result[0]['name']} (conf: {result[0]['confidence']})") ``` - +{/* Result: */} ``` Selected: dog (conf: 0.95) ``` diff --git a/docs/usage/data_streams/reactivex.md b/docs/usage/data_streams/reactivex.mdx similarity index 97% rename from docs/usage/data_streams/reactivex.md rename to docs/usage/data_streams/reactivex.mdx index b356714083..07c0d1cf77 100644 --- a/docs/usage/data_streams/reactivex.md +++ b/docs/usage/data_streams/reactivex.mdx @@ -1,5 +1,3 @@ -# ReactiveX (RxPY) Quick Reference - RxPY provides composable asynchronous data streams. This is a practical guide focused on common patterns in this codebase. ## Quick Start: Using an Observable @@ -19,7 +17,7 @@ source.subscribe(lambda x: received.append(x)) print("received:", received) ``` - +{/* Result: */} ``` received: [0, 1, 2, 3, 4] ``` @@ -43,7 +41,7 @@ observable.subscribe(lambda x: result.append(x)) print("transformed:", result) ``` - +{/* Result: */} ``` transformed: [6, 8] ``` @@ -58,7 +56,7 @@ rx.of(1, 2, 3).pipe( ).subscribe(print) ``` - +{/* Result: */} ``` item_1 item_2 @@ -74,7 +72,7 @@ rx.of(1, 2, 3, 4, 5).pipe( ).subscribe(print) ``` - +{/* Result: */} ``` 2 4 @@ -89,7 +87,7 @@ rx.of(1, 2, 3, 4, 5).pipe( ).subscribe(print) ``` - +{/* Result: */} ``` 1 2 @@ -106,7 +104,7 @@ rx.of(1, 2).pipe( ).subscribe(print) ``` - +{/* Result: */} ``` 1 10 @@ -133,7 +131,7 @@ results = rx.interval(0.05).pipe( print("sample() got:", results) ``` - +{/* Result: */} ``` sample() got: [2, 6, 9] ``` @@ -151,7 +149,7 @@ results = rx.interval(0.05).pipe( print("throttle_first() got:", results) ``` - +{/* Result: */} ``` throttle_first() got: [0, 3, 6, 9] ``` @@ -169,7 +167,7 @@ print("sample: latest value at each tick") print("throttle_first: first value, then block") ``` - +{/* Result: */} ``` sample: latest value at each tick throttle_first: first value, then block @@ -228,8 +226,8 @@ Handler: box "callback" rad 5px fit wid 170% ht 170%
- -![output](assets/observable_flow.svg) +{/* Result: */} +output **Key property: Observables are lazy.** Nothing happens until you call `.subscribe()`. This means you can build up complex pipelines without any work being done, then start the flow when ready. @@ -244,7 +242,7 @@ rx.of(1, 2, 3).subscribe( ) ``` - +{/* Result: */} ``` value: 1 value: 2 @@ -268,7 +266,7 @@ subscription.dispose() # Stop receiving values, clean up resources print("disposed") ``` - +{/* Result: */} ``` disposed ``` @@ -305,7 +303,7 @@ time.sleep(0.25) module.stop() ``` - +{/* Result: */} ``` got 0 got 1 @@ -360,7 +358,7 @@ sub.dispose() print("callbacks after dispose:", len(sensor._callbacks)) ``` - +{/* Result: */} ``` received: ['reading_1', 'reading_2'] callbacks after dispose: 0 @@ -398,7 +396,7 @@ sub.dispose() print("callbacks after dispose:", len(pubsub._callbacks)) ``` - +{/* Result: */} ``` received: ['msg_1', 'msg_2'] callbacks after dispose: 0 @@ -425,7 +423,7 @@ obs.subscribe( print("results:", results) ``` - +{/* Result: */} ``` cleaned up results: ['first', 'second', 'DONE'] @@ -451,7 +449,7 @@ time.sleep(0.2) print(f"received {len(received)} items before dispose") ``` - +{/* Result: */} ``` received 2 items before dispose ``` @@ -474,7 +472,7 @@ disposables.dispose() print("after dispose:", disposables.is_disposed) ``` - +{/* Result: */} ``` subscriptions: 2 after dispose: True diff --git a/docs/usage/data_streams/storage_replay.md b/docs/usage/data_streams/storage_replay.mdx similarity index 99% rename from docs/usage/data_streams/storage_replay.md rename to docs/usage/data_streams/storage_replay.mdx index 37777ce4d9..a11190010e 100644 --- a/docs/usage/data_streams/storage_replay.md +++ b/docs/usage/data_streams/storage_replay.mdx @@ -1,5 +1,3 @@ -# Sensor Storage and Replay - Record sensor streams to disk and replay them with original timing. Useful for testing, debugging, and creating reproducible datasets. ## Quick Start diff --git a/docs/usage/data_streams/temporal_alignment.md b/docs/usage/data_streams/temporal_alignment.mdx similarity index 95% rename from docs/usage/data_streams/temporal_alignment.md rename to docs/usage/data_streams/temporal_alignment.mdx index 18f28b2e3d..b30f6eed2e 100644 --- a/docs/usage/data_streams/temporal_alignment.md +++ b/docs/usage/data_streams/temporal_alignment.mdx @@ -1,10 +1,9 @@ -# Temporal Message Alignment - Robots have multiple sensors emitting data at different rates and latencies. A camera might run at 30fps, while lidar scans at 10Hz, and each has different processing delays. For perception tasks like projecting 2D detections into 3D pointclouds, we need to match data from these streams by timestamp. `align_timestamped` solves this by buffering messages and matching them within a time tolerance. -
Pikchr +
+Pikchr ```pikchr fold output=assets/alignment_overview.svg color = white @@ -21,10 +20,11 @@ arrow from Align.e right 0.4in Out: box "(image, pointcloud)" rad 5px fit wid 170% ht 170% ``` +
- -![output](assets/alignment_overview.svg) +{/* Result: */} +output ## Basic Usage @@ -68,6 +68,7 @@ lidar_stream = lidar_replay.stream(from_timestamp=seek_ts, duration=2.0).pipe( ``` +
Streams would normally come from an actual robot into your module via `In` inputs. [`detection/module3D.py`](/dimos/perception/detection/module3D.py#L11) is a good example of this. @@ -95,7 +96,7 @@ if aligned_pairs: print(f"\nFirst matched pair: Δ{dt*1000:.1f}ms") ``` - +{/* Result: */} ``` Video: 29 frames, Lidar: 15 scans Aligned pairs: 11 out of 29 video frames @@ -157,14 +158,15 @@ def plot_alignment_timeline(video_frames, lidar_scans, aligned_pairs, path): plt.close() ``` +
```python skip session=align output=assets/alignment_timeline.png plot_alignment_timeline(video_frames, lidar_scans, aligned_pairs, '{output}') ``` - -![output](assets/alignment_timeline.png) +{/* Result: */} +output If we loosen up our match tolerance, we might get multiple pairs matching the same lidar frame. @@ -180,7 +182,7 @@ print(f"Video: {len(video_frames)} frames, Lidar: {len(lidar_scans)} scans") print(f"Aligned pairs: {len(aligned_pairs)} out of {len(video_frames)} video frames") ``` - +{/* Result: */} ``` Video: 58 frames, Lidar: 30 scans Aligned pairs: 23 out of 58 video frames @@ -191,8 +193,8 @@ Aligned pairs: 23 out of 58 video frames plot_alignment_timeline(video_frames, lidar_scans, aligned_pairs, '{output}') ``` - -![output](assets/alignment_timeline2.png) +{/* Result: */} +output ## Combine Frame Alignment with a Quality Filter @@ -226,7 +228,7 @@ print(f"Aligned pairs: {len(aligned_pairs)} out of {len(video_frames)} video fra ``` - +{/* Result: */} ``` Video: 6 frames, Lidar: 15 scans Aligned pairs: 1 out of 6 video frames @@ -236,8 +238,8 @@ Aligned pairs: 1 out of 6 video frames plot_alignment_timeline(video_frames, lidar_scans, aligned_pairs, '{output}') ``` - -![output](assets/alignment_timeline3.png) +{/* Result: */} +output We are very picky but data is high quality. Best frame, with closest lidar match in this window. @@ -273,10 +275,11 @@ Buffer: box "Buffer" "primary" rad 5px fit wid 170% ht 170% text "waiting..." at (Buffer.w.x - 0.4in, Buffer.w.y - 0.15in) ``` +
- -![output](assets/alignment_flow.svg) +{/* Result: */} +output ## Parameters diff --git a/docs/usage/lcm.md b/docs/usage/lcm.mdx similarity index 98% rename from docs/usage/lcm.md rename to docs/usage/lcm.mdx index 38d2520822..fb70d12947 100644 --- a/docs/usage/lcm.md +++ b/docs/usage/lcm.mdx @@ -1,5 +1,3 @@ -# LCM Messages - DimOS uses [LCM (Lightweight Communications and Marshalling)](https://github.com/lcm-proj/lcm) for inter-process communication on a local machine (similar to how ROS uses DDS). LCM is a simple [UDP multicast](https://lcm-proj.github.io/lcm/content/udp-multicast-protocol.html#lcm-udp-multicast-protocol-description) pubsub protocol with a straightforward [message definition language](https://lcm-proj.github.io/lcm/content/lcm-type-ref.html#lcm-type-specification-language). The LCM project provides pubsub clients and code generators for many languages. For us the power of LCM is its message definition format, multi-language classes that encode themselves to a compact binary format. This means LCM messages can be sent over any transport (WebSocket, SSH, shared memory, etc.) between differnt programming languages. @@ -45,7 +43,7 @@ decoded = LCMVector3.lcm_decode(binary) print(f"Decoded: x={decoded.x}, y={decoded.y}, z={decoded.z}") ``` - +{/* Result: */} ``` Encoded to 32 bytes: ae7e5fba5eeca11e3ff000000000000040000000000000004008000000000000 Decoded: x=1.0, y=2.0, z=3.0 @@ -78,7 +76,7 @@ binary = v1.lcm_encode() print(f"LCM encoded: {len(binary)} bytes") ``` - +{/* Result: */} ``` v1 + v2 = (5.0, 7.0, 9.0) v1 dot v2 = 32.0 @@ -117,7 +115,7 @@ pc2 = PointCloud2.lcm_decode(binary) print(f"Decoded: {len(pc2)} points") ``` - +{/* Result: */} ``` PointCloud: 100 points, frame=camera Center: ↘ Vector Vector([0.47497518 0.49878164 0.43788878]) @@ -151,7 +149,7 @@ decoded = Vector3.lcm_decode(binary) print(f"Raw binary transport: decoded {decoded}") ``` - +{/* Result: */} ``` Memory transport: received ↘ Vector Vector([1. 2. 3.]) Raw binary transport: decoded ↘ Vector Vector([1. 2. 3.]) diff --git a/docs/usage/modules.md b/docs/usage/modules.mdx similarity index 83% rename from docs/usage/modules.md rename to docs/usage/modules.mdx index 687db5a8b9..8580e82cda 100644 --- a/docs/usage/modules.md +++ b/docs/usage/modules.mdx @@ -1,5 +1,3 @@ -# DimOS Modules - Modules are subsystems on a robot that operate autonomously and communicate with other subsystems using standardized messages. Some examples of modules are: @@ -16,34 +14,35 @@ Below is an example of a structure for controlling a robot. Black blocks represe > brew install graphviz # macOS > ``` -```python skip output=assets/go2_nav.svg -from dimos.core.introspection.svg import to_svg -from dimos.robot.unitree.go2.blueprints.smart.unitree_go2 import unitree_go2 - -to_svg(unitree_go2, "assets/go2_nav.svg") +```python output=assets/go2_nav.svg +from dimos.core.introspection import to_svg +from dimos.robot.unitree_webrtc.unitree_go2_blueprints import nav +to_svg(nav, "assets/go2_nav.svg") ``` - -![output](assets/go2_nav.svg) + +{/* Result: */} +output ## Camera Module Let's learn how to build stuff like the above, starting with a simple camera module. -```python skip session=camera_module_demo output=assets/camera_module.svg +```python session=camera_module_demo output=assets/camera_module.svg from dimos.hardware.sensors.camera.module import CameraModule -from dimos.core.introspection.svg import to_svg +from dimos.core.introspection import to_svg to_svg(CameraModule.module_info(), "assets/camera_module.svg") ``` +{/* Result: */} +output + We can also print Module I/O quickly to the console via the `.io()` call. We will do this from now on. ```python session=camera_module_demo ansi=false -from dimos.hardware.sensors.camera.module import CameraModule - print(CameraModule.io()) ``` - +{/* Result: */} ``` ┌┴─────────────┐ │ CameraModule │ @@ -51,13 +50,10 @@ print(CameraModule.io()) ├─ color_image: Image ├─ camera_info: CameraInfo │ - ├─ RPC build() -> None - ├─ RPC get_skills() -> list - ├─ RPC set_module_ref(name: str, module_ref: RPCClient) -> None - ├─ RPC set_transport(stream_name: str, transport: Transport) -> bool - ├─ RPC start() -> None - ├─ RPC stop() -> None - ├─ RPC take_a_picture() -> Image + ├─ RPC start() + ├─ RPC stop() + │ + ├─ Skill take_a_picture ``` We can see that the camera module outputs two streams: @@ -71,7 +67,7 @@ It also exposes an agentic [skill](/docs/usage/blueprints.md#defining-skills) ca We can start this module and explore the output of its streams in real time (this will use your webcam). -```python skip session=camera_module_demo ansi=false +```python session=camera_module_demo ansi=false import time camera = CameraModule() @@ -85,7 +81,7 @@ time.sleep(0.5) camera.stop() ``` - +{/* Result: */} ``` Out color_image[Image] @ CameraModule Image(shape=(480, 640, 3), format=RGB, dtype=uint8, dev=cpu, ts=2025-12-31 15:54:16) @@ -105,14 +101,14 @@ Image(shape=(480, 640, 3), format=RGB, dtype=uint8, dev=cpu, ts=2025-12-31 15:54 Let's load a standard 2D detector module and hook it up to a camera. -```python skip ansi=false session=detection_module +```python ansi=false session=detection_module from dimos.perception.detection.module2D import Detection2DModule, Config print(Detection2DModule.io()) ``` - +{/* Result: */} ``` - ├─ color_image: Image + ├─ image: Image ┌┴──────────────────┐ │ Detection2DModule │ └┬──────────────────┘ @@ -122,19 +118,16 @@ print(Detection2DModule.io()) ├─ detected_image_1: Image ├─ detected_image_2: Image │ - ├─ RPC build() -> None - ├─ RPC get_skills() -> list - ├─ RPC set_module_ref(name: str, module_ref: RPCClient) -> None ├─ RPC set_transport(stream_name: str, transport: Transport) -> bool ├─ RPC start() -> None ├─ RPC stop() -> None ``` - +{/* TODO: add easy way to print config */} Looks like the detector just needs an image input and outputs some sort of detection and annotation messages. Let's connect it to a camera. -```python skip ansi=false +```python ansi=false import time from dimos.perception.detection.module2D import Detection2DModule, Config from dimos.hardware.sensors.camera.module import CameraModule @@ -153,6 +146,14 @@ detector.stop() camera.stop() ``` +{/* Result: */} +``` +Detection(Person(1)) +Detection(Person(1)) +Detection(Person(1)) +Detection(Person(1)) +``` + ## Distributed Execution As we build module structures, we'll quickly want to utilize all cores on the machine (which Python doesn't allow as a single process) and potentially distribute modules across machines or even the internet. @@ -170,7 +171,7 @@ via `importlib.reload`, then redeploys it onto a fresh worker process while keeping its stream transports and reconnecting any other modules that held a reference to it. -```python skip +```python from dimos.core.coordination.module_coordinator import ModuleCoordinator from dimos.core.global_config import GlobalConfig from dimos.hardware.sensors.camera.module import CameraModule @@ -234,10 +235,6 @@ Each handler runs in a per-handler dispatcher task on `self._loop`. Handlers are - From inside the loop (another async `@rpc`, a `handle_*`, or a `process_observable` callback), it returns the coroutine so the caller can `await` it. ```python -from dimos.core.core import rpc -from dimos.core.module import Module - - class NameModule(Module): @rpc async def say_hello(self, name: str) -> str: @@ -253,12 +250,6 @@ Async and sync `@rpc` methods are interchangeable for cross-module linking. Both When the consumer types a module ref using a Spec that declares `async def`, the proxy automatically exposes those methods as awaitables: `await self._name_module.say_hello(name)`. ```python -from typing import Protocol - -from dimos.core.module import Module -from dimos.spec.utils import Spec - - class NameSpec(Spec, Protocol): async def say_hello(self, name: str) -> str: ... async def set_my_name(self, new_name: str) -> None: ... @@ -275,11 +266,6 @@ class StartModule(Module): `NameModule` is async. But if you need to call it from a sync module, you just need to create a `SyncNameSpec`: ```python -from typing import Protocol - -from dimos.spec.utils import Spec - - class SyncNameSpec(Spec, Protocol): def say_hello(self, name: str) -> str: ... def set_my_name(self, new_name: str) -> None: ... @@ -294,35 +280,28 @@ The reverse is also true: you can call a sync module from async code. When you need to start a long-running async task from `start()` (e.g., a timer loop), use `self.spawn(coro)` instead of `asyncio.run_coroutine_threadsafe(coro, self._loop)`. The helper wires up a done-callback that surfaces unhandled exceptions to the module logger. bare `run_coroutine_threadsafe` silently stores the exception on the returned Future, where it disappears unless the user remembers to read `.result()`. ```python -import asyncio - -from dimos.core.core import rpc -from dimos.core.module import Module - - -class TimerExample(Module): - @rpc - def start(self) -> None: - super().start() - self._timer_future = self.spawn(self._timer_loop()) +@rpc +def start(self) -> None: + super().start() + self._timer_future = self.spawn(self._timer_loop()) - async def _timer_loop(self) -> None: - while True: - await asyncio.sleep(1.0) - ... +async def _timer_loop(self) -> None: + while True: + await asyncio.sleep(1.0) + ... - @rpc - def stop(self) -> None: - if self._timer_future is not None: - self._timer_future.cancel() - super().stop() +@rpc +def stop(self) -> None: + if self._timer_future is not None: + self._timer_future.cancel() + super().stop() ``` ### `process_observable`: async subscriptions to arbitrary observables Sometimes you have rxpy observables which you need to run inside `self._loop`. You can do this with `self.process_observable(observable, async_handler)` . -```python skip +```python @rpc def start(self) -> None: super().start() @@ -338,21 +317,6 @@ async def _on_fast_foo(self, v: int) -> None: When a module owns a resource that needs construction at startup *and* explicit cleanup at shutdown, define `async def main(self)` as an **async generator with exactly one `yield`**. Code before `yield` runs at `start()`, code after `yield` runs at `stop()`. ```python -from collections.abc import AsyncIterator -from typing import Any - -from dimos.core.module import Module - - -def create(name: str) -> Any: - del name - class _Model: - def stop(self) -> None: - pass - - return _Model() - - class PersonFollowSkillContainer(Module): async def main(self) -> AsyncIterator[None]: # setup @@ -372,15 +336,15 @@ A blueprint is a predefined structure of interconnected modules. You can include A basic Unitree Go2 blueprint looks like what we saw before. -```python skip session=blueprints output=assets/go2_agentic.svg -from dimos.core.introspection.svg import to_svg +```python session=blueprints output=assets/go2_agentic.svg +from dimos.core.introspection import to_svg from dimos.robot.unitree_webrtc.unitree_go2_blueprints import agentic to_svg(agentic, "assets/go2_agentic.svg") ``` - -![output](assets/go2_agentic.svg) +{/* Result: */} +output To see more information on how to use Blueprints, see [Blueprints](/docs/usage/blueprints.md). diff --git a/docs/usage/native_modules.md b/docs/usage/native_modules.mdx similarity index 99% rename from docs/usage/native_modules.md rename to docs/usage/native_modules.mdx index e4af928ee3..c56e246ae0 100644 --- a/docs/usage/native_modules.md +++ b/docs/usage/native_modules.mdx @@ -1,5 +1,3 @@ -# Native Modules - Prerequisite for this is to understand dimos [Modules](/docs/usage/modules.md) and [Blueprints](/docs/usage/blueprints.md). Native modules let you wrap **any executable** as a first-class DimOS module, given it speaks LCM. @@ -67,7 +65,7 @@ mylidar.imu.transport = LCMTransport("/imu", Imu) mylidar.start() ``` - +{/* Result: */} ``` 2026-02-14T11:22:12.123963Z [info ] Starting native process [dimos/core/native_module.py] cmd='./build/my_lidar --pointcloud /lidar#sensor_msgs.PointCloud2 --imu /imu#sensor_msgs.Imu --host_ip 192.168.1.5 --frequency 10.0' cwd=/home/lesh/coding/dimos/docs/usage/build ``` diff --git a/docs/usage/python-api.md b/docs/usage/python-api.mdx similarity index 99% rename from docs/usage/python-api.md rename to docs/usage/python-api.mdx index 8eb317e623..b3e9ad4872 100644 --- a/docs/usage/python-api.md +++ b/docs/usage/python-api.mdx @@ -1,5 +1,3 @@ -# Python API - The `Dimos` class is the main entry point for using DimOS from Python. There are two modes: 1. **Local** — `Dimos()` creates and runs modules in the current process. diff --git a/docs/usage/sensor_streams/advanced_streams.md b/docs/usage/sensor_streams/advanced_streams.mdx similarity index 96% rename from docs/usage/sensor_streams/advanced_streams.md rename to docs/usage/sensor_streams/advanced_streams.mdx index 588a7928ac..8d4245c9e4 100644 --- a/docs/usage/sensor_streams/advanced_streams.md +++ b/docs/usage/sensor_streams/advanced_streams.mdx @@ -1,5 +1,3 @@ -# Advanced Stream Handling - > **Prerequisite:** Read [ReactiveX Fundamentals](/docs/usage/sensor_streams/reactivex.md) first for Observable basics. ## Backpressure and Parallel Subscribers to Hardware @@ -9,7 +7,8 @@ In robotics, we deal with hardware that produces data at its own pace - a camera **The problem:** A fast producer can overwhelm a slow consumer, causing memory buildup or dropped frames. We might have multiple subscribers to the same hardware that operate at different speeds. -
Pikchr +
+Pikchr ```pikchr fold output=assets/backpressure.svg color = white @@ -24,10 +23,11 @@ Slow: box "ML Model" "2 fps" rad 5px fit wid 130% ht 130% text "items pile up!" at (Queue.x, Queue.y - 0.45in) ``` +
- -![output](assets/backpressure.svg) +{/* Result: */} +output **The solution:** The `backpressure()` wrapper handles this by: @@ -67,7 +67,7 @@ print(f"slow got {len(slow_results)} items (skipped {len(fast_results) - len(slo scheduler.executor.shutdown(wait=True) ``` - +{/* Result: */} ``` fast got 20 items: [0, 1, 2, 3, 4]... slow got 7 items (skipped 13) @@ -76,7 +76,8 @@ slow got 7 items (skipped 13) ### How it works -
Pikchr +
+Pikchr ```pikchr fold output=assets/backpressure_solution.svg color = white @@ -94,10 +95,11 @@ arrow Slow: box "Slow Sub" rad 5px fit wid 170% ht 170% ``` +
- -![output](assets/backpressure_solution.svg) +{/* Result: */} +output The `LATEST` strategy means: when the slow subscriber finishes processing, it gets whatever the most recent value is, skipping any values that arrived while it was busy. @@ -232,10 +234,11 @@ Blk2: box dashed color 0x5c9ff0 with .nw at Cold2.nw + (-0.1in, 0.25in) wid (Dis text "blocking" italic with .n at Blk2.n + (0, -0.05in) ``` +
- -![output](assets/getter_hot_cold.svg) +{/* Result: */} +output **Prefer `getter_cold()`** when you can afford to wait and warmup isn't expensive. It's simpler (no cleanup needed) and doesn't hold resources. Only use `getter_hot()` when you need instant reads or the source is expensive to start. @@ -265,7 +268,7 @@ print("after 700ms:", get_val()) get_val.dispose() # Don't forget to clean up! ``` - +{/* Result: */} ``` first call: 0 after 350ms: 3 @@ -288,7 +291,7 @@ print("call 2:", get_val()) # subscribes again, gets 0, disposes print("call 3:", get_val()) # subscribes again, gets 0, disposes ``` - +{/* Result: */} ``` call 1: 0 call 2: 0 diff --git a/docs/usage/sensor_streams/README.md b/docs/usage/sensor_streams/overview.mdx similarity index 99% rename from docs/usage/sensor_streams/README.md rename to docs/usage/sensor_streams/overview.mdx index 1f32897768..89a9752e19 100644 --- a/docs/usage/sensor_streams/README.md +++ b/docs/usage/sensor_streams/overview.mdx @@ -1,5 +1,3 @@ -# Sensor Streams - Dimos uses reactive streams (RxPY) to handle sensor data. This approach naturally fits robotics where multiple sensors emit data asynchronously at different rates, and downstream processors may be slower than the data sources. ## Guides diff --git a/docs/usage/sensor_streams/quality_filter.md b/docs/usage/sensor_streams/quality_filter.mdx similarity index 95% rename from docs/usage/sensor_streams/quality_filter.md rename to docs/usage/sensor_streams/quality_filter.mdx index c928b3ff97..fc569dda15 100644 --- a/docs/usage/sensor_streams/quality_filter.md +++ b/docs/usage/sensor_streams/quality_filter.mdx @@ -1,5 +1,3 @@ -# Quality-Based Stream Filtering - When processing sensor streams, you often want to reduce frequency while keeping the best quality data. For discrete data like images that can't be averaged or merged, instead of blindly dropping frames, `quality_barrier` selects the highest quality item within each time window. ## The Problem @@ -40,7 +38,7 @@ print("Selected:", [r["id"] for r in result]) print("Qualities:", [r["quality"] for r in result]) ``` - +{/* Result: */} ``` Selected: [2] Qualities: [0.9] @@ -74,7 +72,7 @@ print("Sharpness scores:") show_frames(input_frames) ``` - +{/* Result: */} ``` Loaded 20 frames from Go2 camera Frame resolution: 1280x720 @@ -105,7 +103,7 @@ print(f"Output: {len(sharp_frames)} frame(s) (selected sharpest per window)") show_frames(sharp_frames) ``` - +{/* Result: */} ``` Output: 3 frame(s) (selected sharpest per window) Frame 0: 0.351 @@ -167,15 +165,15 @@ Visualizing which frames were selected (green border = selected as sharpest in w plot_mosaic(input_frames, sharp_frames, '{output}') ``` - -![output](assets/frame_mosaic.jpg) +{/* Result: */} +output ```python skip session=qb output=assets/sharpness_graph.svg plot_sharpness(input_frames, sharp_frames, '{output}') ``` - -![output](assets/sharpness_graph.svg) +{/* Result: */} +output Let's request a higher frequency. @@ -189,7 +187,7 @@ print(f"Output: {len(sharp_frames)} frame(s) (selected sharpest per window)") show_frames(sharp_frames) ``` - +{/* Result: */} ``` Output: 6 frame(s) (selected sharpest per window) Frame 0: 0.351 @@ -204,16 +202,16 @@ Output: 6 frame(s) (selected sharpest per window) plot_mosaic(input_frames, sharp_frames, '{output}') ``` - -![output](assets/frame_mosaic2.jpg) +{/* Result: */} +output ```python skip session=qb output=assets/sharpness_graph2.svg plot_sharpness(input_frames, sharp_frames, '{output}') ``` - -![output](assets/sharpness_graph2.svg) +{/* Result: */} +output As we can see the system is trying to strike a balance between requested frequency and quality that's available @@ -261,7 +259,7 @@ print(f"Mean gradient magnitude: {magnitude.mean():.2f}") print(f"Normalized sharpness: {img.sharpness:.3f}") ``` - +{/* Result: */} ``` Mean gradient magnitude: 230.00 Normalized sharpness: 0.332 @@ -287,7 +285,7 @@ result = rx.of(*detections).pipe( print(f"Selected: {result[0]['name']} (conf: {result[0]['confidence']})") ``` - +{/* Result: */} ``` Selected: dog (conf: 0.95) ``` diff --git a/docs/usage/sensor_streams/reactivex.md b/docs/usage/sensor_streams/reactivex.mdx similarity index 97% rename from docs/usage/sensor_streams/reactivex.md rename to docs/usage/sensor_streams/reactivex.mdx index 1498e46595..98e073d6f6 100644 --- a/docs/usage/sensor_streams/reactivex.md +++ b/docs/usage/sensor_streams/reactivex.mdx @@ -1,5 +1,3 @@ -# ReactiveX (RxPY) Quick Reference - RxPY provides composable asynchronous data streams. This is a practical guide focused on common patterns in this codebase. ## Quick Start: Using an Observable @@ -19,7 +17,7 @@ source.subscribe(lambda x: received.append(x)) print("received:", received) ``` - +{/* Result: */} ``` received: [0, 1, 2, 3, 4] ``` @@ -43,7 +41,7 @@ observable.subscribe(lambda x: result.append(x)) print("transformed:", result) ``` - +{/* Result: */} ``` transformed: [6, 8] ``` @@ -58,7 +56,7 @@ rx.of(1, 2, 3).pipe( ).subscribe(print) ``` - +{/* Result: */} ``` item_1 item_2 @@ -74,7 +72,7 @@ rx.of(1, 2, 3, 4, 5).pipe( ).subscribe(print) ``` - +{/* Result: */} ``` 2 4 @@ -89,7 +87,7 @@ rx.of(1, 2, 3, 4, 5).pipe( ).subscribe(print) ``` - +{/* Result: */} ``` 1 2 @@ -106,7 +104,7 @@ rx.of(1, 2).pipe( ).subscribe(print) ``` - +{/* Result: */} ``` 1 10 @@ -133,7 +131,7 @@ results = rx.interval(0.05).pipe( print("sample() got:", results) ``` - +{/* Result: */} ``` sample() got: [2, 6, 9] ``` @@ -151,7 +149,7 @@ results = rx.interval(0.05).pipe( print("throttle_first() got:", results) ``` - +{/* Result: */} ``` throttle_first() got: [0, 3, 6, 9] ``` @@ -169,7 +167,7 @@ print("sample: latest value at each tick") print("throttle_first: first value, then block") ``` - +{/* Result: */} ``` sample: latest value at each tick throttle_first: first value, then block @@ -228,8 +226,8 @@ Handler: box "callback" rad 5px fit wid 170% ht 170%
- -![output](assets/observable_flow.svg) +{/* Result: */} +output **Key property: Observables are lazy.** Nothing happens until you call `.subscribe()`. This means you can build up complex pipelines without any work being done, then start the flow when ready. @@ -244,7 +242,7 @@ rx.of(1, 2, 3).subscribe( ) ``` - +{/* Result: */} ``` value: 1 value: 2 @@ -268,7 +266,7 @@ subscription.dispose() # Stop receiving values, clean up resources print("disposed") ``` - +{/* Result: */} ``` disposed ``` @@ -305,7 +303,7 @@ time.sleep(0.25) module.stop() ``` - +{/* Result: */} ``` got 0 got 1 @@ -361,7 +359,7 @@ sub.dispose() print("callbacks after dispose:", len(sensor._callbacks)) ``` - +{/* Result: */} ``` received: ['reading_1', 'reading_2'] callbacks after dispose: 0 @@ -399,7 +397,7 @@ sub.dispose() print("callbacks after dispose:", len(pubsub._callbacks)) ``` - +{/* Result: */} ``` received: ['msg_1', 'msg_2'] callbacks after dispose: 0 @@ -426,7 +424,7 @@ obs.subscribe( print("results:", results) ``` - +{/* Result: */} ``` cleaned up results: ['first', 'second', 'DONE'] @@ -452,7 +450,7 @@ time.sleep(0.2) print(f"received {len(received)} items before dispose") ``` - +{/* Result: */} ``` received 2 items before dispose ``` @@ -475,7 +473,7 @@ disposables.dispose() print("after dispose:", disposables.is_disposed) ``` - +{/* Result: */} ``` subscriptions: 2 after dispose: True diff --git a/docs/usage/sensor_streams/storage_replay.md b/docs/usage/sensor_streams/storage_replay.mdx similarity index 99% rename from docs/usage/sensor_streams/storage_replay.md rename to docs/usage/sensor_streams/storage_replay.mdx index 37777ce4d9..a11190010e 100644 --- a/docs/usage/sensor_streams/storage_replay.md +++ b/docs/usage/sensor_streams/storage_replay.mdx @@ -1,5 +1,3 @@ -# Sensor Storage and Replay - Record sensor streams to disk and replay them with original timing. Useful for testing, debugging, and creating reproducible datasets. ## Quick Start diff --git a/docs/usage/sensor_streams/temporal_alignment.md b/docs/usage/sensor_streams/temporal_alignment.mdx similarity index 95% rename from docs/usage/sensor_streams/temporal_alignment.md rename to docs/usage/sensor_streams/temporal_alignment.mdx index 7a7faa24a0..5174bf45f0 100644 --- a/docs/usage/sensor_streams/temporal_alignment.md +++ b/docs/usage/sensor_streams/temporal_alignment.mdx @@ -1,10 +1,9 @@ -# Temporal Message Alignment - Robots have multiple sensors emitting data at different rates and latencies. A camera might run at 30fps, while lidar scans at 10Hz, and each has different processing delays. For perception tasks like projecting 2D detections into 3D pointclouds, we need to match data from these streams by timestamp. `align_timestamped` solves this by buffering messages and matching them within a time tolerance. -
Pikchr +
+Pikchr ```pikchr fold output=assets/alignment_overview.svg color = white @@ -21,10 +20,11 @@ arrow from Align.e right 0.4in Out: box "(image, pointcloud)" rad 5px fit wid 170% ht 170% ``` +
- -![output](assets/alignment_overview.svg) +{/* Result: */} +output ## Basic Usage @@ -68,6 +68,7 @@ lidar_stream = lidar_replay.stream(from_timestamp=seek_ts, duration=2.0).pipe( ``` +
Streams would normally come from an actual robot into your module via `In` inputs. [`detection/module3D.py`](/dimos/perception/detection/module3D.py#L11) is a good example of this. @@ -95,7 +96,7 @@ if aligned_pairs: print(f"\nFirst matched pair: Δ{dt*1000:.1f}ms") ``` - +{/* Result: */} ``` Video: 29 frames, Lidar: 15 scans Aligned pairs: 11 out of 29 video frames @@ -157,14 +158,15 @@ def plot_alignment_timeline(video_frames, lidar_scans, aligned_pairs, path): plt.close() ``` +
```python skip session=align output=assets/alignment_timeline.png plot_alignment_timeline(video_frames, lidar_scans, aligned_pairs, '{output}') ``` - -![output](assets/alignment_timeline.png) +{/* Result: */} +output If we loosen up our match tolerance, we might get multiple pairs matching the same lidar frame. @@ -180,7 +182,7 @@ print(f"Video: {len(video_frames)} frames, Lidar: {len(lidar_scans)} scans") print(f"Aligned pairs: {len(aligned_pairs)} out of {len(video_frames)} video frames") ``` - +{/* Result: */} ``` Video: 58 frames, Lidar: 30 scans Aligned pairs: 23 out of 58 video frames @@ -191,8 +193,8 @@ Aligned pairs: 23 out of 58 video frames plot_alignment_timeline(video_frames, lidar_scans, aligned_pairs, '{output}') ``` - -![output](assets/alignment_timeline2.png) +{/* Result: */} +output ## Combine Frame Alignment with a Quality Filter @@ -226,7 +228,7 @@ print(f"Aligned pairs: {len(aligned_pairs)} out of {len(video_frames)} video fra ``` - +{/* Result: */} ``` Video: 6 frames, Lidar: 15 scans Aligned pairs: 1 out of 6 video frames @@ -236,8 +238,8 @@ Aligned pairs: 1 out of 6 video frames plot_alignment_timeline(video_frames, lidar_scans, aligned_pairs, '{output}') ``` - -![output](assets/alignment_timeline3.png) +{/* Result: */} +output We are very picky but data is high quality. Best frame, with closest lidar match in this window. @@ -273,10 +275,11 @@ Buffer: box "Buffer" "primary" rad 5px fit wid 170% ht 170% text "waiting..." at (Buffer.w.x - 0.4in, Buffer.w.y - 0.15in) ``` +
- -![output](assets/alignment_flow.svg) +{/* Result: */} +output ## Parameters diff --git a/docs/usage/tool_streams.md b/docs/usage/tool_streams.mdx similarity index 99% rename from docs/usage/tool_streams.md rename to docs/usage/tool_streams.mdx index 444183de35..4f314fcaa9 100644 --- a/docs/usage/tool_streams.md +++ b/docs/usage/tool_streams.mdx @@ -1,5 +1,3 @@ -# Tool Streams - Some tools return quickly but keep doing work in the background. For example, `look_out_for` starts a perception loop and waits minutes for a match; `follow_person` returns "started following" right away and then keeps publishing diff --git a/docs/usage/transforms.md b/docs/usage/transforms.mdx similarity index 98% rename from docs/usage/transforms.md rename to docs/usage/transforms.mdx index 8b31492ac2..ed4b41e080 100644 --- a/docs/usage/transforms.md +++ b/docs/usage/transforms.mdx @@ -1,5 +1,3 @@ -# Transforms - ## The Problem: Everything Measures from Its Own Perspective Imagine your robot has an RGB-D camera—a camera that captures both color images and depth (distance to each pixel). These are common in robotics: Intel RealSense, Microsoft Kinect, and similar sensors. @@ -42,8 +40,8 @@ text "target here" small italic at (GR.s.x, GR.s.y - 0.25in)
- -![output](assets/transforms_tree.svg) +{/* Result: */} +output Each arrow in this tree is a transform. To get the mug's position in gripper coordinates, you chain transforms through their common parent: camera → robot_base → arm → gripper. @@ -92,7 +90,7 @@ camera_transform = Transform( print(camera_transform) ``` - +{/* Result: */} ``` base_link -> camera_link Translation: → Vector Vector([0.5 0. 0.3]) @@ -133,7 +131,7 @@ t_inverse = -t1 print(f"Inverse: {t_inverse.frame_id} -> {t_inverse.child_frame_id}") ``` - +{/* Result: */} ``` Composed: base_link -> end_effector Translation: (1.0, 0.5, 0.0) @@ -159,7 +157,7 @@ print("4x4 transformation matrix:") print(matrix) ``` - +{/* Result: */} ``` 4x4 transformation matrix: [[1. 0. 0. 1.] @@ -196,7 +194,7 @@ sensor2 = MySensorModule(frame_id_prefix="robot1") print(f"With prefix: {sensor2.frame_id}") ``` - +{/* Result: */} ``` Default frame_id: sensor_link With prefix: robot1/sensor_link @@ -332,7 +330,7 @@ if __name__ == "__main__": ``` - +{/* Result: */} ``` 16:21:45.203 [inf][ation/worker_manager_python.py] Worker pool started. n_workers=2 16:21:45.445 [inf][/coordination/python_worker.py] Deployed module. module=RobotBaseModule module_id=0 worker_id=0 @@ -379,7 +377,7 @@ Transform tree: You can also run `foxglove-studio-bridge` in the next terminal (binary provided by DimOS and should be in your Python env) and `foxglove-studio` to view these transforms in 3D. (TODO we need to update this for rerun) -![transforms](assets/transforms.png) +transforms Key points: @@ -422,8 +420,8 @@ text "CameraModule" italic at ((CL.x + CO.x)/2, CL.s.y - 0.25in) - -![output](assets/transforms_modules.svg) +{/* Result: */} +output # Internals @@ -460,7 +458,7 @@ print(f"Buffer has {len(tf.buffers)} transform pair(s)") print(tf) ``` - +{/* Result: */} ``` Latest transform: x=4.0 Buffer has 1 transform pair(s) diff --git a/docs/usage/transports/dds.md b/docs/usage/transports/dds.mdx similarity index 97% rename from docs/usage/transports/dds.md rename to docs/usage/transports/dds.mdx index 924b9d43e8..5a49e4b74a 100644 --- a/docs/usage/transports/dds.md +++ b/docs/usage/transports/dds.mdx @@ -1,5 +1,3 @@ -# Installing DDS Transport Libs on Ubuntu - The `dds` extra provides DDS (Data Distribution Service) transport support via [Eclipse Cyclone DDS](https://cyclonedds.io/docs/cyclonedds-python/latest/). The Python package builds C extensions against the CycloneDDS C library, so the C library must be installed before the Python package. ## Recommended: nix-provided cyclonedds diff --git a/docs/usage/transports/index.md b/docs/usage/transports/overview.mdx similarity index 75% rename from docs/usage/transports/index.md rename to docs/usage/transports/overview.mdx index 3319abc57b..71ce146ed8 100644 --- a/docs/usage/transports/index.md +++ b/docs/usage/transports/overview.mdx @@ -1,5 +1,3 @@ -# Transports - Transports connect **module streams** across **process boundaries** and/or **networks**. * **Module**: a running component (e.g., camera, mapping, nav). @@ -9,7 +7,7 @@ Transports connect **module streams** across **process boundaries** and/or **net Each edge in the graph is a **transported stream** (potentially different protocols). Each node is a **module**: -![go2_nav](../assets/go2_nav.svg) +go2_nav ## What the transport layer guarantees (and what it doesn’t) @@ -37,13 +35,14 @@ Quick view on performance of our pubsub backends: python -m pytest -svm tool -k "not bytes" dimos/protocol/pubsub/benchmark/test_benchmark.py ``` -![Benchmark results](../assets/pubsub_benchmark.png) +Benchmark results --- ## Abstraction layers -
Pikchr +
+Pikchr ```pikchr output=../assets/abstraction_layers.svg fold color = white @@ -68,10 +67,11 @@ text "LCM, SHM, ROS" at T.s + (0, -0.2in) text "pub/sub API" at P.s + (0, -0.2in) ``` +
- -![output](../assets/abstraction_layers.svg) +{/* Result: */} +output We’ll go through these layers top-down. @@ -110,53 +110,23 @@ ros = nav.transports( Each **stream** on a module can use a different transport. Set `.transport` on the stream **before starting** modules. -The runnable example below uses a tiny synthetic image publisher instead of `CameraModule` so it works without a webcam and in CI; the wiring is the same as with a real camera. - ```python ansi=false import time -import numpy as np -import reactivex as rx - -from dimos.core.core import rpc -from dimos.core.coordination.module_coordinator import ModuleCoordinator -from dimos.core.module import Module, ModuleConfig -from dimos.core.stream import In, Out +from dimos.core.module import Module +from dimos.core.stream import In from dimos.core.transport import LCMTransport -from dimos.msgs.sensor_msgs.Image import Image, ImageFormat - - -class TickerCameraConfig(ModuleConfig): - frequency_hz: float = 2.0 - - -class TickerCameraModule(Module): - """Publish synthetic frames so this example runs without a webcam.""" - - config: TickerCameraConfig - color_image: Out[Image] - - @rpc - def start(self) -> None: - super().start() - - def emit(_: int) -> None: - img = Image.from_numpy( - np.zeros((480, 640, 3), dtype=np.uint8), - format=ImageFormat.RGB, - frame_id="synthetic", - ) - self.color_image.publish(img) - - period = 1.0 / max(self.config.frequency_hz, 0.1) - self.register_disposable(rx.interval(period).subscribe(emit)) +from dimos.hardware.sensors.camera.module import CameraModule +from dimos.msgs.sensor_msgs import Image +from dimos.core.module_coordinator import ModuleCoordinator class ImageListener(Module): image: In[Image] - async def handle_image(self, img: Image) -> None: - print(f"Received: {img.shape}") + def start(self): + super().start() + self.image.subscribe(lambda img: print(f"Received: {img.shape}")) if __name__ == "__main__": @@ -164,7 +134,7 @@ if __name__ == "__main__": dimos = ModuleCoordinator() dimos.start() - camera = dimos.deploy(TickerCameraModule, frequency_hz=2.0) + camera = dimos.deploy(CameraModule, frequency=2.0) listener = dimos.deploy(ImageListener) # Choose a transport for the stream (example: LCM typed channel) @@ -179,24 +149,17 @@ if __name__ == "__main__": dimos.stop() ``` - +{/* Result: */} + ``` -02:57:31.428 [inf][ation/worker_manager_python.py] Worker pool started. n_workers=2 -02:57:31.761 [inf][/coordination/python_worker.py] Deployed module. module=TickerCameraModule module_id=0 worker_id=0 -02:57:31.768 [inf][/coordination/python_worker.py] Deployed module. module=ImageListener module_id=1 worker_id=1 -02:57:33.778 [inf][dination/module_coordinator.py] Stopping module... module=ImageListener -02:57:33.793 [inf][dination/module_coordinator.py] Module stopped. module=ImageListener -02:57:33.793 [inf][dination/module_coordinator.py] Stopping module... module=TickerCameraModule -02:57:33.802 [inf][dination/module_coordinator.py] Module stopped. module=TickerCameraModule -02:57:33.802 [inf][ation/worker_manager_python.py] Shutting down all workers... +Initialized dimos local cluster with 2 workers, memory limit: auto +2026-01-24T13:17:50.190559Z [info ] Deploying module. [dimos/core/__init__.py] module=CameraModule +2026-01-24T13:17:50.218466Z [info ] Deployed module. [dimos/core/__init__.py] module=CameraModule worker_id=1 +2026-01-24T13:17:50.229474Z [info ] Deploying module. [dimos/core/__init__.py] module=ImageListener +2026-01-24T13:17:50.250199Z [info ] Deployed module. [dimos/core/__init__.py] module=ImageListener worker_id=0 Received: (480, 640, 3) Received: (480, 640, 3) Received: (480, 640, 3) -02:57:33.803 [inf][/coordination/python_worker.py] Worker stopping module... module=ImageListener module_id=1 worker_id=1 -02:57:33.803 [inf][/coordination/python_worker.py] Worker module stopped. module=ImageListener module_id=1 worker_id=1 -02:57:33.861 [inf][/coordination/python_worker.py] Worker stopping module... module=TickerCameraModule module_id=0 worker_id=0 -02:57:33.862 [inf][/coordination/python_worker.py] Worker module stopped. module=TickerCameraModule module_id=0 worker_id=0 -02:57:33.892 [inf][ation/worker_manager_python.py] All workers shut down ``` See [Modules](/docs/usage/modules.md) for more on module architecture. @@ -207,7 +170,7 @@ See [Modules](/docs/usage/modules.md) for more on module architecture. `lcmspy` shows topic frequency/bandwidth stats: -![lcmspy](../assets/lcmspy.png) +lcmspy `dimos topic echo /topic` listens on typed channels like `/topic#pkg.Msg` and decodes automatically: @@ -255,8 +218,8 @@ print(inspect.getsource(PubSub.publish)) print(inspect.getsource(PubSub.subscribe)) ``` - -``` +{/* Result: */} +```python @abstractmethod def publish(self, topic: TopicT, message: MsgT) -> None: """Publish a message to a topic.""" @@ -278,8 +241,8 @@ LCM is UDP multicast. It’s very fast on a robot LAN, but it’s **best-effort* For local emission it autoconfigures system in a way in which it's more robust and faster then other more common protocols like ROS, DDS ```python -from dimos.msgs.geometry_msgs.Vector3 import Vector3 -from dimos.protocol.pubsub.impl.lcmpubsub import LCM, Topic +from dimos.protocol.pubsub.lcmpubsub import LCM, Topic +from dimos.msgs.geometry_msgs import Vector3 lcm = LCM() lcm.start() @@ -297,7 +260,7 @@ print(f"Received velocity: x={received[0].x}, y={received[0].y}, z={received[0]. lcm.stop() ``` - +{/* Result: */} ``` Received velocity: x=1.0, y=0.0, z=0.5 ``` @@ -307,7 +270,7 @@ Received velocity: x=1.0, y=0.0, z=0.5 Shared memory is highest performance, but only works on the **same machine**. ```python -from dimos.protocol.pubsub.impl.shmpubsub import PickleSharedMemory +from dimos.protocol.pubsub.shmpubsub import PickleSharedMemory shm = PickleSharedMemory(prefer="cpu") shm.start() @@ -323,7 +286,7 @@ print(f"Received: {received}") shm.stop() ``` - +{/* Result: */} ``` Received: [{'data': [1, 2, 3]}] ``` @@ -332,7 +295,7 @@ Received: [{'data': [1, 2, 3]}] For network communication, DDS uses the Data Distribution Service (DDS) protocol: -```python skip session=dds_demo ansi=false +```python session=dds_demo ansi=false from dataclasses import dataclass from cyclonedds.idl import IdlStruct @@ -358,10 +321,11 @@ print(f"Received: {received}") dds.stop() ``` - +{/* Result: */} ``` Received: [SensorReading(value=22.5)] ``` + --- ## A minimal transport: `Memory` @@ -369,7 +333,7 @@ Received: [SensorReading(value=22.5)] The simplest toy backend is `Memory` (single process). Start from there when implementing a new pubsub backend. ```python -from dimos.protocol.pubsub.impl.memory import Memory +from dimos.protocol.pubsub.memory import Memory bus = Memory() received = [] @@ -386,7 +350,7 @@ for msg in received: unsubscribe() ``` - +{/* Result: */} ``` Received 2 messages: {'temperature': 22.5} @@ -401,7 +365,7 @@ See [`pubsub/impl/memory.py`](/dimos/protocol/pubsub/impl/memory.py) for the com Transports often need to serialize messages before sending and deserialize after receiving. -`PubSubEncoderMixin` at [`pubsub/encoders.py`](/dimos/protocol/pubsub/encoders.py#L39) provides a clean way to add encoding/decoding to any pubsub implementation. +`PubSubEncoderMixin` at [`pubsub/spec.py`](/dimos/protocol/pubsub/spec.py#L95) provides a clean way to add encoding/decoding to any pubsub implementation. ### Available mixins @@ -416,11 +380,9 @@ Transports often need to serialize messages before sending and deserialize after ### Creating a custom mixin ```python session=jsonencoder no-result +from dimos.protocol.pubsub.spec import PubSubEncoderMixin import json -from dimos.protocol.pubsub.encoders import PubSubEncoderMixin - - class JsonEncoderMixin(PubSubEncoderMixin[str, dict, bytes]): def encode(self, msg: dict, topic: str) -> bytes: return json.dumps(msg).encode("utf-8") @@ -432,8 +394,7 @@ class JsonEncoderMixin(PubSubEncoderMixin[str, dict, bytes]): Combine with a pubsub implementation via multiple inheritance: ```python session=jsonencoder no-result -from dimos.protocol.pubsub.impl.memory import Memory - +from dimos.protocol.pubsub.memory import Memory class MyJsonPubSub(JsonEncoderMixin, Memory): pass @@ -442,9 +403,7 @@ class MyJsonPubSub(JsonEncoderMixin, Memory): Swap serialization by changing the mixin: ```python session=jsonencoder no-result -from dimos.protocol.pubsub.encoders import PickleEncoderMixin -from dimos.protocol.pubsub.impl.memory import Memory - +from dimos.protocol.pubsub.spec import PickleEncoderMixin class MyPicklePubSub(PickleEncoderMixin, Memory): pass diff --git a/docs/usage/visualization.md b/docs/usage/visualization.mdx similarity index 51% rename from docs/usage/visualization.md rename to docs/usage/visualization.mdx index bb6a7e28e5..f08ba8f1c4 100644 --- a/docs/usage/visualization.md +++ b/docs/usage/visualization.mdx @@ -1,43 +1,35 @@ -# Viewer Backends - -Dimos supports three visualization backends: `rerun` (default), `foxglove`, and `none`. +Dimos supports three visualization backends: Rerun (web or native) and Foxglove. ## Quick Start -Choose your viewer via the CLI: +Choose your viewer via the CLI (preferred): ```bash # Rerun native viewer (default) - dimos-viewer with built-in teleop + click-to-navigate dimos run unitree-go2 -# Explicitly select the viewer backend: +# Explicitly select the viewer mode: dimos --viewer rerun run unitree-go2 +dimos --viewer rerun-web run unitree-go2 dimos --viewer foxglove run unitree-go2 -dimos --viewer none run unitree-go2 ``` -Control how the Rerun viewer opens with `--rerun-open` and `--rerun-web`: +Alternative (environment variable): ```bash -# Open native desktop viewer (default) -dimos --rerun-open native run unitree-go2 - -# Open web viewer in browser -dimos --rerun-open web run unitree-go2 - -# Open both native and web -dimos --rerun-open both run unitree-go2 +# Rerun native viewer (default) - dimos-viewer with built-in teleop + click-to-navigate +VIEWER=rerun dimos run unitree-go2 -# No viewer (headless) — data still accessible via gRPC -dimos --rerun-open none run unitree-go2 +# Rerun web viewer - browser dashboard + teleop at http://localhost:7779 +VIEWER=rerun-web dimos run unitree-go2 -# Serve the web viewer without auto-opening a browser -dimos --rerun-web --rerun-open native run unitree-go2 +# Foxglove - Use Foxglove Studio instead of Rerun +VIEWER=foxglove dimos run unitree-go2 ``` ## Viewer Modes Explained -### Rerun Native (`rerun`, `--rerun-open native`) — Default +### Rerun Native (`rerun`) — Default **What you get:** - [dimos-viewer](https://github.com/dimensionalOS/dimos-viewer), a custom Dimensional fork of Rerun with built-in keyboard teleop and click-to-navigate @@ -47,7 +39,7 @@ dimos --rerun-web --rerun-open native run unitree-go2 --- -### Rerun Web (`rerun`, `--rerun-open web`) +### Rerun Web (`rerun-web`) **What you get:** - Browser-based dashboard at http://localhost:7779 @@ -69,27 +61,22 @@ dimos --rerun-web --rerun-open native run unitree-go2 ## Rendering with Custom Blueprints -To enable visualization in your own blueprint, use `vis_module`: +To enable rerun within your own blueprint simply include `RerunBridgeModule`: ```python -from dimos.core.coordination.blueprints import autoconnect +from dimos.visualization.rerun.bridge import RerunBridgeModule from dimos.hardware.sensors.camera.module import CameraModule -from dimos.visualization.vis_module import vis_module +from dimos.protocol.pubsub.impl.lcmpubsub import LCM camera_demo = autoconnect( CameraModule.blueprint(), - vis_module(viewer_backend=global_config.viewer), + RerunBridgeModule.blueprint( + viewer_mode="native", # native (desktop), web (browser), none (headless) + ), ) -``` - -Run the stack locally (this blocks until you stop the process): - -```python skip -from dimos.core.coordination.module_coordinator import ModuleCoordinator - if __name__ == "__main__": - ModuleCoordinator.build(camera_demo).loop() + camera_demo.build().loop() ``` Every LCM stream, such as `color_image` (output by CameraModule), that uses a data type (like `Image`) that has a `.to_rerun` method will get rendered (`rr.log`) using the LCM topic as the rerun entity path. In other words: to render something, simply log it to a stream and it will automatically be available in rerun. @@ -109,7 +96,7 @@ This happens on lower-end hardware (NUC, older laptops) with large maps. Edit [`dimos/robot/unitree/go2/blueprints/smart/unitree_go2.py`](/dimos/robot/unitree/go2/blueprints/smart/unitree_go2.py): -```python skip +```python # Before (high detail, slower on large maps) voxel_mapper(voxel_size=0.05), # 5cm voxels @@ -123,34 +110,6 @@ voxel_mapper(voxel_size=0.1), # 10cm voxels --- -## Direct Visualization from a Module - -If you want to log data to Rerun directly from inside a module (e.g. for debugging or one-off visualizations), use `rerun_init` instead of calling `rr.init()` yourself. It handles colormap registration and can optionally start a gRPC server so a viewer can connect. - -```python -import rerun as rr -from dimos.visualization.rerun.init import rerun_init - -# Basic init (no gRPC server — use when RerunBridgeModule is already running) -rerun_init() -rr.log("debug/my_points", rr.Points3D(positions=[[1, 2, 3]])) - -# Start a gRPC server so you can connect a viewer -rerun_init(start_grpc=True) -# Then connect with: dimos-viewer --connect rerun+http://127.0.0.1:9877/proxy - -# Custom gRPC config -rerun_init( - start_grpc=True, - grpc_config={ - "connect_url": "rerun+http://127.0.0.1:9999/proxy", - "server_memory_limit": "4GB", - }, -) -``` - -When a `RerunBridgeModule` is already part of your blueprint, you typically don't need `start_grpc` — just call `rerun_init()` and log directly with `rr.log()`. The data will appear in the existing viewer. - ## How to use Rerun on `dev` (and the TF/entity nuances) Rerun on `dev` is **module-driven**: modules decide what to log, and `Blueprint.build()` sets up the shared viewer + default layout. From e7e37919a579f763f956b05be89df532102aa949 Mon Sep 17 00:00:00 2001 From: Swastika Yadav Date: Wed, 6 May 2026 19:37:38 +0530 Subject: [PATCH 3/5] added introduction page to getting started section --- docs/introduction.mdx | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 docs/introduction.mdx diff --git a/docs/introduction.mdx b/docs/introduction.mdx new file mode 100644 index 0000000000..1c19bfff7f --- /dev/null +++ b/docs/introduction.mdx @@ -0,0 +1,36 @@ +--- +title: "DimensionalOS" +description: "An open source modern operating system for generalist robotics. Python-first, ROS-optional, and agent native." +sidebarTitle: Introduction +--- + +**Dimensional** is the modern operating system for generalist robotics. We are setting the next-generation SDK standard and integrating with the majority of robot manufacturers. + +With a simple install and no ROS required, you can build physical applications entirely in Python that run on humanoids, quadrupeds, or drones. + +Dimensional is agent native, describe behavior in natural language and build local and hosted multi-agent systems that work with your hardware. Agents run as native modules, subscribing to embedded streams from perception (LiDAR, camera) and spatial memory down to control loops and motor drivers. + +## Capabilities at a glance + + + + **SLAM**, dynamic obstacle avoidance, route planning, and autonomous exploration- via both DimOS native and ROS integrations. + + + Detectors, 3D projections, VLMs, and audio processing. + + + Agentive control and MCP. Example: *"Hey robot, go find the kitchen."* + + + Spatio-temporal RAG, dynamic memory, object localization and permanence. + + + +## Start here + +Use these pages to continue setup and then learn the core system model: + +- [Quickstart](/quickstart) +- [System requirements](/quickstart#system-requirements) (Quickstart) +- [DimOS concepts](/usage/concepts) From 09241cff2b2c98b2e8aac64a06321065309d1785 Mon Sep 17 00:00:00 2001 From: Swastika Yadav Date: Wed, 6 May 2026 19:41:10 +0530 Subject: [PATCH 4/5] added quickstart page to getting started section --- docs/quickstart.mdx | 132 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 docs/quickstart.mdx diff --git a/docs/quickstart.mdx b/docs/quickstart.mdx new file mode 100644 index 0000000000..7228c1cf06 --- /dev/null +++ b/docs/quickstart.mdx @@ -0,0 +1,132 @@ +--- +title: "Quickstart" +description: "Install DimOS, run a replay or simulation, then explore CLI and MCP. No hardware required." +--- + +In this quickstart, you will be able to replay a Unitree Go2 office navigation session with no hardware, switch to simulation or a live robot. + +If you use coding agents (OpenClaw, Claude Code or similar), point them at [`AGENTS.md`](https://github.com/dimensionalOS/dimos/blob/main/AGENTS.md). + +## System requirements + +| Component | Minimum | Recommended | +| --- | --- | --- | +| OS | Ubuntu 22.04, macOS 12.6+ | Ubuntu 24.04 | +| Python | 3.12 | Latest | +| RAM | 16 GB | 32 GB+ | +| Disk | 10 GB SSD | 25 GB+ SSD | +| CPU | 8-core Intel / AMD | 12+ cores | +| GPU (optional) | NVIDIA RTX 3000+ (8 GB VRAM) | RTX 4070+ (12 GB+ VRAM) | + +> GPU is required only for perception, VLMs, and AI features. Optional for basic robot control. + +## Interactive install + +```bash +curl -fsSL https://raw.githubusercontent.com/dimensionalOS/dimos/main/scripts/install.sh | bash +``` + +## Manual system install + +If you prefer to install system dependencies yourself, follow the guide for your OS: + +| OS Guide | Notes | +| --- | --- | +| [Ubuntu](/installation/ubuntu) | Primary tested path | +| [Nix](/installation/nix) | Flakes and dev shell | +| [macOS](/installation/osx) | Homebrew-based; less mature than Linux | + +## Python environment + +DimOS targets Python 3.12. The examples use [`uv`](https://docs.astral.sh/uv/); plain `python -m venv` and `pip` work too. + +```bash +uv venv --python "3.12" +source .venv/bin/activate # Windows: .venv\Scripts\activate +``` + +## Install DimOS + +```bash +uv pip install 'dimos[base,unitree]' +``` + +Extras keep installs lean: `base` is runtime, modules, transports, and CLI; `unitree` adds WebRTC and skills for Go2 / G1 (real or replayed). + +### Replay a recorded session (no hardware) + +On first run, the Rerun window may stay black briefly while roughly **75 MB** of data downloads from LFS. + +```bash +dimos --replay run unitree-go2 +``` + +### Simulation (MuJoCo) + +```bash +uv pip install 'dimos[base,unitree,sim]' +dimos --simulation run unitree-go2 +dimos --simulation run unitree-g1-sim # humanoid +``` + +### Real robot (example: Unitree Go2 over WebRTC) + +```bash +export ROBOT_IP= +dimos run unitree-go2 +``` + +Do not skip the platform guide - latency, time sync, and safety habits matter: [Unitree Go2](/platforms/quadruped-go2). + + + First replay failing? Most issues are network or LFS related. See [Replay troubleshooting](/troubleshooting/replay). + + +## Featured runfiles + +| Command | What it does | +| --- | --- | +| `dimos --replay run unitree-go2` | Quadruped navigation replay - SLAM, costmap, A-star planning | +| `dimos --replay --replay-db go2_bigoffice run unitree-go2-memory` | Quadruped temporal memory replay | +| `dimos --simulation run unitree-go2-agentic` | Quadruped agentic + MCP server in simulation | +| `dimos --simulation run unitree-g1` | Humanoid in MuJoCo simulation | +| `dimos --replay run drone-basic` | Drone video + telemetry replay | +| `dimos --replay run drone-agentic` | Drone + LLM agent with flight skills (replay) | +| `dimos run demo-camera` | Webcam demo - no hardware needed | +| `dimos run keyboard-teleop-xarm7` | Keyboard teleop with mock xArm7 (`dimos[manipulation]` extra) | +| `dimos --simulation run unitree-go2-agentic-ollama` | Quadruped agentic with local LLM (Ollama running, `ollama serve`) | + +Blueprint reference: [Blueprints](/usage/blueprints). + +## Agent CLI and MCP + +The `dimos` CLI runs blueprints, inspects state, talks to agents, and invokes skills via MCP. + +```bash +dimos run unitree-go2-agentic --daemon # background +dimos status +dimos log -f +dimos agent-send "explore the room" +dimos mcp list-tools +dimos mcp call relative_move --arg forward=0.5 +dimos stop +``` + +Full reference: [CLI](/usage/cli). + +## What next? + + + + Natural language control and MCP-exposed skills. + + + Hardware support matrix and bring-up guides. + + + Modules, streams, and blueprints behind every workflow. + + + Navigation, perception, spatial memory, and manipulation. + + From 02abdbf36d50ee18b8b5938e4c0639f882a8c464 Mon Sep 17 00:00:00 2001 From: Swastika Yadav Date: Wed, 6 May 2026 19:43:34 +0530 Subject: [PATCH 5/5] added getting started pages to sidebar --- docs/docs.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/docs.json b/docs/docs.json index 6de9e407b5..edf3ccc62c 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -19,6 +19,8 @@ { "group": "Getting Started", "pages": [ + "introduction", + "quickstart", "requirements", { "group": "Installation",