Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
bb4f753
Add topological navigation helper
May 5, 2026
ef5611d
Add NavVLA topomap generation command
May 5, 2026
7235987
Wire topological goals into navigation node
May 5, 2026
b1d440e
Add topological navigation config
May 5, 2026
7391263
Install topomap assets and generator
May 5, 2026
84cf662
Ignore generated topomap artifacts
May 5, 2026
cfa8770
Separate topomap generation from runtime navigation
May 5, 2026
a3fe014
Fix toponav preprocessing and path resolution
May 5, 2026
027571d
fix
Kasaiatsuki May 14, 2026
58bb478
データ形式に合わせた変更
Kasaiatsuki May 14, 2026
b30be61
modified NavVLA
Kasaiatsuki May 14, 2026
8062a09
openCVで使えるBGRに変換
Kasaiatsuki May 14, 2026
346702e
fix
Kasaiatsuki May 14, 2026
eb27a56
delete def_update_text_feature
Kasaiatsuki May 14, 2026
3955fac
save_freqによるエポック番号付き保存を廃止しbest loss(検証時)のエポックを保存していくように変更
Kasaiatsuki May 16, 2026
67f789a
delete save_freq & add resume_from
Kasaiatsuki May 16, 2026
70fce1b
evalループにtqdmのプログレスバーを追加
Kasaiatsuki May 16, 2026
85652f1
録画の一時停止/再生時にデータセットディレクトリを再作成せずに既存dirに保存する変更
Kasaiatsuki May 16, 2026
e069585
捨てていた距離予測ロス(dist_loss)を学習・evalに追加
Kasaiatsuki May 16, 2026
833b7db
train/valをそれぞれのトラジェクトリー内で分けるように変更
Kasaiatsuki May 16, 2026
622de7d
Revert "delete def_update_text_feature"
Kasaiatsuki May 16, 2026
a377e0f
train/valの分け方をランダムに変更
Kasaiatsuki May 16, 2026
2c215f3
学習環境の調整
Kasaiatsuki May 16, 2026
799acdf
ベイズフィルタの実装とリサイズ方針の修正
Kasaiatsuki May 17, 2026
07066cc
スケール調整
Kasaiatsuki May 21, 2026
8119420
deployをeb32deaに更新
Kasaiatsuki May 21, 2026
f9c8c20
スケジューラーの追加
Kasaiatsuki May 21, 2026
3221057
ディレクトリの移動
Kasaiatsuki May 22, 2026
1506bf5
Merge branch 'main' into feat/toponav
Kasaiatsuki May 22, 2026
5c74b35
ディレクトリの移動
Kasaiatsuki May 22, 2026
caef8a4
poseを削除
Kasaiatsuki May 22, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 2 additions & 7 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,5 @@ training/weights/*.pth
training/config/dataset.yaml
training/runs
training/dataset/

#ファイル名で指定
.codex
.vscode
__pycache__

#例外ファイルを指定
deployment/config/topomap/
deployment/weights/*.pt
2 changes: 1 addition & 1 deletion OmniVLA
8 changes: 8 additions & 0 deletions deployment/config/nav.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,11 @@ goal_pose: [10.0, -1.0, 0.0, -1.0]
goal_image_path: OmniVLA/inference/goal_img.jpg
lan_prompt: blue trash bin
path_frame_id: base_link

# Topological navigation using ml_planner-style topomap features.
use_toponav: false
Comment thread
Kasaiatsuki marked this conversation as resolved.
topomap_path: config/topomap/topomap.yaml
topomap_image_dir: config/topomap/images
placenet_weight_path: deployment/weights/placenet.pt
toponav_crop_size: 288
Comment thread
Kasaiatsuki marked this conversation as resolved.
toponav_min_score: -1.0
Comment thread
Kasaiatsuki marked this conversation as resolved.
105 changes: 95 additions & 10 deletions deployment/navvla/navigation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import argparse
import math
import sys
from collections import deque
from pathlib import Path
from typing import Deque, Optional, Tuple
Expand All @@ -15,11 +16,26 @@
import torch
from PIL import Image as PILImage

_THIS_FILE = Path(__file__).resolve()
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

この辺のpath周りの設定,変更が不要なようにも思えるのですが,変更が必要なら理由を教えてください

_REPO_ROOT_CANDIDATES = [
_THIS_FILE.parents[2],
_THIS_FILE.parents[4] / "src" / "NavVLA" if len(_THIS_FILE.parents) > 4 else None,
]
for _repo_root in reversed([path for path in _REPO_ROOT_CANDIDATES if path is not None and (path / "OmniVLA").exists()]):
for _path in (_repo_root, _repo_root / "OmniVLA", _repo_root / "OmniVLA" / "inference"):
if str(_path) not in sys.path:
sys.path.insert(0, str(_path))

_INSTALLED_INFERENCE_DIR = _THIS_FILE.parents[1] / "OmniVLA" / "inference"
if _INSTALLED_INFERENCE_DIR.exists() and str(_INSTALLED_INFERENCE_DIR) not in sys.path:
sys.path.insert(0, str(_INSTALLED_INFERENCE_DIR))

from OmniVLA.inference.utils_policy import (
load_model,
transform_images_PIL_mask,
)
from .preprocess import build_mask, build_omnivla_edge_inputs, image_to_cv2, load_yaml
from .preprocess import build_mask, build_omnivla_edge_inputs, image_msg_to_bgr, image_to_cv2, load_yaml
from .toponav import TopologicalNavigator

import rclpy
from geometry_msgs.msg import PoseStamped, Twist
Expand All @@ -41,6 +57,7 @@ def __init__(
self.autonomous_flag = False
self.context_queue = []
self.obs_image = None
self.obs_image_bgr = None
self.package_share_dir = package_share_dir

self.nav_cfg = load_yaml(nav_config_path)
Expand All @@ -49,6 +66,7 @@ def __init__(
self.init_params()
self.init_model()
self.init_model_modality()
self.init_toponav()

self.image_sub = self.create_subscription(Image, "/image_raw", self.image_callback, 10)
self.autonomous_sub = self.create_subscription(Bool, "/autonomous", self.autonomous_callback, 10)
Expand All @@ -61,7 +79,9 @@ def __init__(

def init_params(self) -> None:
self.context_size = self.nav_cfg.get("context_size", 5)
self.waypoint_spacing = self.nav_cfg.get("metric_waypoint_spacing", 0.1)
self.metric_waypoint_spacing = self.nav_cfg.get("metric_waypoint_spacing", 0.1)
self.waypoint_spacing = self.nav_cfg.get("waypoint_spacing", 1)
self.action_scale = self.metric_waypoint_spacing * self.waypoint_spacing
self.waypoint_select = self.nav_cfg.get("waypoint_select", 4)
self.linear_max_vel = self.nav_cfg.get("linear_max_vel", 0.3)
self.angular_max_vel = self.nav_cfg.get("angular_max_vel", 0.3)
Expand Down Expand Up @@ -151,7 +171,46 @@ def init_model_modality(self) -> None:
self.goal_image_tensor = transform_images_PIL_mask(goal_pil, self.mask_goal).to(self.device)
self.goal_pose_tensor = torch.tensor([goal_pose], dtype=torch.float32, device=self.device)
self.modality_tensor = torch.tensor([self.modality_id], dtype=torch.long, device=self.device)


def _update_text_feature(self) -> None:
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

メソッド名に統一感が無いです

prompt = self.latest_prompt if self.use_prompt else "No language instruction"
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

modality id次第で切り替えればuse_promptというパラメータを用意せずに済みそうですね

その際にプロンプトが与えられていないのであれば,エラーを返すようにしたほうが良いと思います.

token = clip.tokenize(prompt, truncate=True).to(self.device)
with torch.no_grad():
self.feat_text = self.text_encoder.encode_text(token)

def resolve_package_path(self, raw_path: str) -> Path:
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ちょっと気持ち悪いことをしている

path = Path(raw_path)
return path if path.is_absolute() else self.package_share_dir / path

def init_toponav(self) -> None:
self.use_toponav = bool(self.nav_cfg.get("use_toponav", False))
self.toponav = None
self.toponav_current_index = None
self.toponav_goal_index = None
self.toponav_min_score = float(self.nav_cfg.get("toponav_min_score", -1.0))

if not self.use_toponav:
return

if not self.use_goal_image:
raise ValueError("Toponav requires a modality_id that uses goal_image.")

topomap_path = self.resolve_package_path(str(self.nav_cfg.get("topomap_path", "config/topomap/topomap.yaml")))
image_dir = self.resolve_package_path(str(self.nav_cfg.get("topomap_image_dir", "config/topomap/images")))
weight_path = self.resolve_package_path(str(self.nav_cfg.get("placenet_weight_path", "deployment/weights/placenet.pt")))

self.toponav = TopologicalNavigator(
topomap_path=topomap_path,
image_dir=image_dir,
weight_path=weight_path,
device=self.device,
image_size=self.goal_size,
crop_size=int(self.nav_cfg.get("toponav_crop_size", 288)),
delta=float(self.nav_cfg.get("toponav_delta", 5.0)),
window_lower=int(self.nav_cfg.get("toponav_window_lower", -1)),
window_upper=int(self.nav_cfg.get("toponav_window_upper", 10)),
)
self.get_logger().info(f"Toponav loaded: nodes={len(self.toponav.nodes)}, topomap={topomap_path}")

def autonomous_callback(self, msg: Bool) -> None:
self.autonomous_flag = bool(msg.data)
Expand All @@ -162,6 +221,8 @@ def prompt_callback(self, msg: String) -> None:
self._update_text_feature()

def image_callback(self, msg: Image) -> None:
self.obs_image_bgr = image_msg_to_bgr(msg)

cv_image = image_to_cv2(msg, self.clip_size)
self.obs_image = PILImage.fromarray(cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB))

Expand All @@ -176,6 +237,8 @@ def timer_callback(self) -> None:
if len(self.context_queue) < self.context_size + 1:
return

self.update_toponav_goal()

obs_images, map_images, cur_large_img = build_omnivla_edge_inputs(
context_queue=self.context_queue,
current_image=self.obs_image,
Expand Down Expand Up @@ -208,6 +271,31 @@ def timer_callback(self) -> None:
self.publisher_path(waypoints)
self.publisher_command_velocity(linear_vel, angular_vel)

def update_toponav_goal(self) -> None:
if self.toponav is None or self.obs_image_bgr is None:
return

current_index, score = self.toponav.estimate_current_node(self.obs_image_bgr)
if score < self.toponav_min_score:
self.get_logger().warn(
f"Toponav score below threshold: score={score:.3f}, threshold={self.toponav_min_score:.3f}"
)
return

goal_index = self.toponav.select_goal_node(current_index)
if current_index == self.toponav_current_index and goal_index == self.toponav_goal_index:
return

goal_pil = self.toponav.load_goal_image(goal_index)
self.goal_image_tensor = transform_images_PIL_mask(goal_pil, self.mask_goal).to(self.device)
self.toponav_current_index = current_index
self.toponav_goal_index = goal_index
current_node = self.toponav.nodes[current_index]
goal_node = self.toponav.nodes[goal_index]
self.get_logger().info(
"Toponav goal updated: "
f"current_id={current_node.node_id}, goal_id={goal_node.node_id}, score={score:.3f}"
)

def publisher_path(self, waypoints: np.ndarray) -> None:
msg = NavPath()
Expand All @@ -217,8 +305,8 @@ def publisher_path(self, waypoints: np.ndarray) -> None:
for wp in waypoints:
pose = PoseStamped()
pose.header = msg.header
x = float(wp[0]) * self.waypoint_spacing
y = float(wp[1]) * self.waypoint_spacing
x = float(wp[0]) * self.action_scale
y = float(wp[1]) * self.action_scale
yaw = math.atan2(float(wp[3]), float(wp[2]))

pose.pose.position.x = x
Expand All @@ -241,8 +329,8 @@ def action_to_waypoints_and_cmd_vel(self, action_pred: np.ndarray) -> Tuple[np.n
selected = max(0, min(self.waypoint_select, waypoints.shape[0] - 1))

dx, dy, hx, hy = [float(v) for v in waypoints[selected]]
dx *= self.waypoint_spacing
dy *= self.waypoint_spacing
dx *= self.action_scale
dy *= self.action_scale

eps = 1e-8
dt = 1.0 / 3.0
Expand All @@ -257,9 +345,6 @@ def action_to_waypoints_and_cmd_vel(self, action_pred: np.ndarray) -> Tuple[np.n
linear_vel = dx / dt
angular_vel = math.atan(dy / dx) / dt

linear_vel = float(np.clip(linear_vel, 0.0, 0.5))
angular_vel = float(np.clip(angular_vel, -1.0, 1.0))

maxv = float(self.linear_max_vel)
maxw = float(self.angular_max_vel)
if abs(linear_vel) <= maxv:
Expand Down
50 changes: 48 additions & 2 deletions deployment/navvla/preprocess.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from pathlib import Path
import sys
from typing import TYPE_CHECKING
from typing import Dict, List, Tuple

Expand All @@ -10,6 +11,16 @@
import yaml
from PIL import Image as PILImage

_THIS_FILE = Path(__file__).resolve()
_REPO_ROOT_CANDIDATES = [
_THIS_FILE.parents[2],
_THIS_FILE.parents[4] / "src" / "NavVLA" if len(_THIS_FILE.parents) > 4 else None,
]
for _repo_root in reversed([path for path in _REPO_ROOT_CANDIDATES if path is not None and (path / "OmniVLA").exists()]):
for _path in (_repo_root, _repo_root / "OmniVLA", _repo_root / "OmniVLA" / "inference"):
if str(_path) not in sys.path:
sys.path.insert(0, str(_path))

from OmniVLA.inference.utils_policy import transform_images_PIL_mask, transform_images_map

if TYPE_CHECKING:
Expand Down Expand Up @@ -45,9 +56,44 @@ def build_mask(size: Tuple[int, int], use_mask: bool, mask_path: str) -> np.ndar
return loaded.astype(np.float32)


def image_msg_to_bgr(msg: "Image") -> np.ndarray:
encoding = msg.encoding.lower()
channels_by_encoding = {
"bgr8": 3,
"rgb8": 3,
"bgra8": 4,
"rgba8": 4,
"mono8": 1,
"8uc1": 1,
"8uc3": 3,
"8uc4": 4,
"yuv422_yuy2": 2,
"yuyv": 2,
"yuy2": 2,
}
if encoding not in channels_by_encoding:
raise ValueError(f"Unsupported image encoding: {msg.encoding}")

channels = channels_by_encoding[encoding]
row = np.frombuffer(msg.data, dtype=np.uint8).reshape(int(msg.height), int(msg.step))
image = row[:, : int(msg.width) * channels].reshape(int(msg.height), int(msg.width), channels)

if encoding in ("bgr8", "8uc3"):
return image.copy()
if encoding == "rgb8":
return cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
if encoding == "bgra8":
return cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)
if encoding == "rgba8":
return cv2.cvtColor(image, cv2.COLOR_RGBA2BGR)
if encoding in ("yuv422_yuy2", "yuyv", "yuy2"):
return cv2.cvtColor(image, cv2.COLOR_YUV2BGR_YUY2)

return cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)


def image_to_cv2(msg: "Image", output_size: Tuple[int, int]) -> np.ndarray:
frame = np.frombuffer(msg.data, dtype=np.uint8).reshape((int(msg.height), int(msg.width), 3))
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
frame = image_msg_to_bgr(msg)

side = min(frame.shape[0], frame.shape[1])
offset_y = (frame.shape[0] - side) // 2
Expand Down
Loading