diff --git a/modules/slam/CMakeLists.txt b/modules/slam/CMakeLists.txt new file mode 100644 index 0000000000..3b86f31986 --- /dev/null +++ b/modules/slam/CMakeLists.txt @@ -0,0 +1,62 @@ +set(the_description "SLAM module: visual odometry / mapping / localization utilities") + +# Core dependencies used by the sources (see includes in src/ and include/) +ocv_define_module(slam + opencv_core + opencv_imgproc + opencv_calib3d + opencv_features2d + OPTIONAL opencv_sfm + opencv_imgcodecs + opencv_highgui + opencv_video + WRAP python +) + +# Detect optional dependencies and expose preprocessor macros so sources +# can use #if defined(HAVE_SFM) / #if defined(HAVE_G2O) +if(TARGET ${the_module}) + # opencv_sfm is declared OPTIONAL above; if its target exists enable HAVE_SFM + if(TARGET opencv_sfm) + message(STATUS "slam: opencv_sfm target found — enabling HAVE_SFM") + target_compile_definitions(${the_module} PRIVATE HAVE_SFM=1) + # also add a global definition so header-parsers (python generator) + # running during build/config time can evaluate #if HAVE_SFM + add_definitions(-DHAVE_SFM=1) + target_link_libraries(${the_module} PRIVATE opencv_sfm) + else() + message(STATUS "slam: opencv_sfm not found — HAVE_SFM disabled") + endif() + + # Try to detect g2o (header + library). This is a best-effort detection: we + # look for a common g2o header and try to find a library name. Users on + # Windows should install/build g2o and make it discoverable to CMake. + find_path(G2O_INCLUDE_DIR NAMES g2o/core/sparse_optimizer.h) + find_library(G2O_LIBRARY NAMES g2o_core g2o) + if(G2O_INCLUDE_DIR AND G2O_LIBRARY) + message(STATUS "slam: g2o found (headers and library) — enabling HAVE_G2O") + target_include_directories(${the_module} PRIVATE ${G2O_INCLUDE_DIR}) + target_link_libraries(${the_module} PRIVATE ${G2O_LIBRARY}) + target_compile_definitions(${the_module} PRIVATE HAVE_G2O=1) + # also expose globally so external header parsers can evaluate #if HAVE_G2O + add_definitions(-DHAVE_G2O=1) + else() + message(STATUS "slam: g2o not found — HAVE_G2O disabled") + endif() +endif() + +# Ensure C++17 is enabled for this module (required for ) +if(TARGET ${the_module}) + set_target_properties(${the_module} PROPERTIES CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON) + # For older GCC/libstdc++ (<9) we may need to link stdc++fs + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "9.0") + target_link_libraries(${the_module} PRIVATE stdc++fs) + endif() +endif() + +# If you need to add special include directories, libraries or compile flags, +# add them below using the OpenCV contrib module helper macros, for example: +# +# ocv_include_directories(${CMAKE_CURRENT_LIST_DIR}/include) +# ocv_module_compile_options(-Wall) +# diff --git a/modules/slam/include/opencv2/slam.hpp b/modules/slam/include/opencv2/slam.hpp new file mode 100644 index 0000000000..334925ec9b --- /dev/null +++ b/modules/slam/include/opencv2/slam.hpp @@ -0,0 +1,27 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#ifndef __OPENCV_SLAM_HPP__ +#define __OPENCV_SLAM_HPP__ + +#include "opencv2/slam/data_loader.hpp" +#include "opencv2/slam/feature.hpp" +#include "opencv2/slam/initializer.hpp" +#include "opencv2/slam/keyframe.hpp" +#include "opencv2/slam/localizer.hpp" +#include "opencv2/slam/map.hpp" +#include "opencv2/slam/matcher.hpp" +#include "opencv2/slam/optimizer.hpp" +#include "opencv2/slam/pose.hpp" +#include "opencv2/slam/visualizer.hpp" +#include "opencv2/slam/vo.hpp" + + +/** @defgroup slam Simultaneous Localization and Mapping + @brief Simultaneous Localization and Mapping (SLAM) module +*/ + +#endif + +/* End of file. */ \ No newline at end of file diff --git a/modules/slam/include/opencv2/slam/data_loader.hpp b/modules/slam/include/opencv2/slam/data_loader.hpp new file mode 100644 index 0000000000..109a83639f --- /dev/null +++ b/modules/slam/include/opencv2/slam/data_loader.hpp @@ -0,0 +1,46 @@ +#pragma once +#include +#include +#include + +namespace cv{ +namespace vo{ + +bool ensureDirectoryExists(const std::string &dir); + +class DataLoader { +public: + // 构造:传入图像目录(可以是相对或绝对路径) + DataLoader(const std::string &imageDir); + + // 获取下一张图像,成功返回 true 并填充 image 与 imagePath;到末尾返回 false + bool getNextImage(Mat &image, std::string &imagePath); + + // 重置到序列开始 + void reset(); + + // 是否还有图像 + bool hasNext() const; + + // 图像总数 + size_t size() const; + + // 尝试加载并返回相机内参(fx, fy, cx, cy),返回是否成功 + bool loadIntrinsics(const std::string &yamlPath); + + // 内参访问 + double fx() const { return fx_; } + double fy() const { return fy_; } + double cx() const { return cx_; } + double cy() const { return cy_; } + +private: + std::vector imageFiles; + size_t currentIndex; + + // 相机内参(若未加载则为回退值) + double fx_, fy_, cx_, cy_; +}; + +} // namespace vo +} // namespace cv diff --git a/modules/slam/include/opencv2/slam/feature.hpp b/modules/slam/include/opencv2/slam/feature.hpp new file mode 100644 index 0000000000..0ec86302a0 --- /dev/null +++ b/modules/slam/include/opencv2/slam/feature.hpp @@ -0,0 +1,38 @@ +#pragma once +#include +#include +#include + +namespace cv { +namespace vo { + +class FeatureExtractor { +public: + explicit FeatureExtractor(int nfeatures = 2000); + // detectAndCompute: detect keypoints and compute descriptors. + // If previous-frame data (prevGray, prevKp) is provided, a flow-aware grid allocation + // will be used (score = response * (1 + flow_lambda * normalized_flow)). Otherwise a + // simpler ANMS selection is used. The prev arguments have defaults so this function + // replaces the two previous overloads. + void detectAndCompute(const Mat &image, std::vector &kps, Mat &desc, + const Mat &prevGray = Mat(), const std::vector &prevKp = std::vector(), + double flow_lambda = 5.0); +private: + Ptr orb_; + int nfeatures_; +}; + +// Function to detect and compute features in an image +inline void detectAndComputeFeatures(const Mat &image, + std::vector &keypoints, + Mat &descriptors) { + // Create ORB detector and descriptor + auto orb = ORB::create(); + // Detect keypoints + orb->detect(image, keypoints); + // Compute descriptors + orb->compute(image, keypoints, descriptors); +} + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/include/opencv2/slam/initializer.hpp b/modules/slam/include/opencv2/slam/initializer.hpp new file mode 100644 index 0000000000..b0e8676a0f --- /dev/null +++ b/modules/slam/include/opencv2/slam/initializer.hpp @@ -0,0 +1,87 @@ +#pragma once +#include +#include +#include +#include "opencv2/slam/keyframe.hpp" +#include "opencv2/slam/map.hpp" + +namespace cv { +namespace vo { + +class Initializer { +public: + Initializer(); + + // Attempt initialization with two frames + // Returns true if initialization successful + bool initialize(const std::vector &kps1, + const std::vector &kps2, + const std::vector &matches, + double fx, double fy, double cx, double cy, + Mat &R, Mat &t, + std::vector &points3D, + std::vector &isTriangulated); + + // Check if frames have sufficient parallax for initialization + static bool checkParallax(const std::vector &kps1, + const std::vector &kps2, + const std::vector &matches, + double minMedianParallax = 15.0); + +private: + // Reconstruct from Homography + bool reconstructH(const std::vector &pts1, + const std::vector &pts2, + const Mat &H, + double fx, double fy, double cx, double cy, + Mat &R, Mat &t, + std::vector &points3D, + std::vector &isTriangulated, + float ¶llax); + + // Reconstruct from Fundamental/Essential + bool reconstructF(const std::vector &pts1, + const std::vector &pts2, + const Mat &F, + double fx, double fy, double cx, double cy, + Mat &R, Mat &t, + std::vector &points3D, + std::vector &isTriangulated, + float ¶llax); + + // Check reconstructed points quality + int checkRT(const Mat &R, const Mat &t, + const std::vector &pts1, + const std::vector &pts2, + const std::vector &points3D, + std::vector &isGood, + double fx, double fy, double cx, double cy, + float ¶llax); + + // Triangulate points + void triangulate(const Mat &P1, const Mat &P2, + const std::vector &pts1, + const std::vector &pts2, + std::vector &points3D); + + // Decompose Homography + void decomposeH(const Mat &H, std::vector &Rs, + std::vector &ts, std::vector &normals); + + // Compute homography score + float computeScore(const Mat &H21, const Mat &H12, + const std::vector &pts1, + const std::vector &pts2, + std::vector &inliersH, + float sigma = 1.0); + + // Compute fundamental score + float computeScoreF(const Mat &F21, + const std::vector &pts1, + const std::vector &pts2, + std::vector &inliersF, + float sigma = 1.0); +}; + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/include/opencv2/slam/keyframe.hpp b/modules/slam/include/opencv2/slam/keyframe.hpp new file mode 100644 index 0000000000..ce5d3a6b06 --- /dev/null +++ b/modules/slam/include/opencv2/slam/keyframe.hpp @@ -0,0 +1,21 @@ +#pragma once +#include +#include +#include + +namespace cv { +namespace vo { + +struct KeyFrame { + int id = -1; + Mat image; // optional + std::vector kps; + Mat desc; + // pose: rotation and translation to map coordinates + // X_world = R * X_cam + t + Mat R_w = Mat::eye(3,3,CV_64F); + Mat t_w = Mat::zeros(3,1,CV_64F); +}; + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/include/opencv2/slam/localizer.hpp b/modules/slam/include/opencv2/slam/localizer.hpp new file mode 100644 index 0000000000..c441737b93 --- /dev/null +++ b/modules/slam/include/opencv2/slam/localizer.hpp @@ -0,0 +1,32 @@ +#pragma once +#include +#include +#include "opencv2/slam/data_loader.hpp" +#include "opencv2/slam/map.hpp" + +namespace cv { +namespace vo { + +class Localizer { +public: + Localizer(float ratio = 0.7f); + + // Try to solve PnP against map points. Returns true if solved and fills R_out/t_out and inliers. + // Provide camera intrinsics and image dimensions explicitly (DataLoader doesn't expose width/height). + // Enhanced tryPnP with optional diagnostics output (frame id, image and output directory) + // out_preMatches and out_postMatches can be nullptr. If outDir provided, diagnostics images/logs will be saved there. + + bool tryPnP(const MapManager &map, const Mat &desc, const std::vector &kps, + double fx, double fy, double cx, double cy, int imgW, int imgH, + int min_inliers, + Mat &R_out, Mat &t_out, int &inliers_out, + int frame_id = -1, const Mat *image = nullptr, const std::string &outDir = "", + int *out_preMatches = nullptr, int *out_postMatches = nullptr, double *out_meanReproj = nullptr) const; + + float ratio() const { return ratio_; } +private: + float ratio_ = 0.7f; +}; + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/include/opencv2/slam/map.hpp b/modules/slam/include/opencv2/slam/map.hpp new file mode 100644 index 0000000000..38d8d7097b --- /dev/null +++ b/modules/slam/include/opencv2/slam/map.hpp @@ -0,0 +1,91 @@ +#pragma once +#include +#include +#include +#include "opencv2/slam/keyframe.hpp" + +namespace cv { +namespace vo { + +struct MapPoint { + int id = -1; // unique id for id-based lookups + Point3d p; // 3D position in world frame + std::vector> observations; // pairs of (keyframe id, keypoint idx) + + // Quality management fields + Mat descriptor; // Representative descriptor for matching + int nObs = 0; // Number of observations + bool isBad = false; // Flag for bad points to be culled + double minDistance = 0.0; // Min viewing distance + double maxDistance = 0.0; // Max viewing distance + + // Statistics + int nFound = 0; // Number of times found in tracking + int nVisible = 0; // Number of times visible + + // Constructor + MapPoint() = default; + MapPoint(const Point3d& pos) : p(pos) {} + + // Helper: compute found ratio + float getFoundRatio() const { + return nVisible > 0 ? static_cast(nFound) / nVisible : 0.0f; + } +}; + +class MapManager { +public: + MapManager(); + + void addKeyFrame(const KeyFrame &kf); + void addMapPoints(const std::vector &pts); + + const std::vector& keyframes() const { return keyframes_; } + const std::vector& mappoints() const { return mappoints_; } + + // Find candidate mappoint indices visible from pose (lastR, lastT) and inside image + std::vector findVisibleCandidates(const Mat &lastR, const Mat &lastT, + double fx, double fy, double cx, double cy, + int imgW, int imgH) const; + + // Triangulate given normalized coordinates between last keyframe and current + // Returns newly created map points (appended to internal list). + std::vector triangulateBetweenLastTwo(const std::vector &pts1n, + const std::vector &pts2n, + const std::vector &pts1_kp_idx, + const std::vector &pts2_kp_idx, + const KeyFrame &lastKf, const KeyFrame &curKf, + double fx, double fy, double cx, double cy); + + // Lookup keyframe index by id (-1 if not found) + int keyframeIndex(int id) const; + // Lookup mappoint index by id (-1 if not found) + int mapPointIndex(int id) const; + // Mutable access if caller legitimately needs to modify keyframes/mappoints. + // Prefer using the applyOptimized... APIs for controlled updates. + std::vector& keyframesMutable() { return keyframes_; } + std::vector& mappointsMutable() { return mappoints_; } + + // Apply optimized pose/point by id (safe writeback APIs for backend) + void applyOptimizedKeyframePose(int keyframeId, const Mat &R, const Mat &t); + void applyOptimizedMapPoint(int mappointId, const Point3d &p); + + // Quality management + void cullBadMapPoints(); + double computeReprojError(const MapPoint &mp, const KeyFrame &kf, + double fx, double fy, double cx, double cy) const; + void updateMapPointDescriptor(MapPoint &mp); + + // Statistics + int countGoodMapPoints() const; + +private: + std::vector keyframes_; + std::vector mappoints_; + std::unordered_map id2idx_; + std::unordered_map mpid2idx_; + int next_mappoint_id_ = 1; +}; + +} // namespace vo +} // namespace cv diff --git a/modules/slam/include/opencv2/slam/matcher.hpp b/modules/slam/include/opencv2/slam/matcher.hpp new file mode 100644 index 0000000000..215b8ea0b1 --- /dev/null +++ b/modules/slam/include/opencv2/slam/matcher.hpp @@ -0,0 +1,34 @@ +#pragma once +#include +#include +#include + +namespace cv { +namespace vo { + +class Matcher { +public: + explicit Matcher(float ratio = 0.75f); + // Match descriptors from prev -> curr, return good matches (queryIdx refers to prev, trainIdx to curr) + // Basic knn ratio test (backwards-compatible) + void knnMatch(const Mat &desc1, const Mat &desc2, std::vector &goodMatches); + + // Stronger match: knn + ratio test + optional mutual cross-check + optional spatial bucketing. + // desc1/desc2 are descriptors for prev/curr frames. + // kps1/kps2 are the corresponding keypoints (needed for spatial bucketing). + // imgCols/imgRows are used to size the bucketing grid. Defaults provide conservative values. + // maxTotal: if >0, final matches will be truncated to this count (keep smallest distances). + // enableMutual: enable/disable mutual cross-check. enableBucketing: enable/disable grid bucketing. + void match(const Mat &desc1, const Mat &desc2, + const std::vector &kps1, const std::vector &kps2, + std::vector &goodMatches, + int imgCols = 640, int imgRows = 480, + int bucketRows = 8, int bucketCols = 8, int topKPerBucket = 4, + int maxTotal = 0, bool enableMutual = true, bool enableBucketing = true); +private: + float ratio_; + BFMatcher bf_; +}; + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/include/opencv2/slam/optimizer.hpp b/modules/slam/include/opencv2/slam/optimizer.hpp new file mode 100644 index 0000000000..b99b980815 --- /dev/null +++ b/modules/slam/include/opencv2/slam/optimizer.hpp @@ -0,0 +1,74 @@ +#pragma once +#include +#include +#include "opencv2/slam/keyframe.hpp" +#include "opencv2/slam/map.hpp" + +namespace cv { +namespace vo { + +// Bundle Adjustment Optimizer using OpenCV-based Levenberg-Marquardt +// Note: For production, should use g2o or Ceres for better performance +class Optimizer { +public: + Optimizer(); + + // Local Bundle Adjustment + // Optimizes a window of recent keyframes and all observed map points + // fixedKFs: indices of keyframes to keep fixed during optimization +#if defined(HAVE_G2O) + static void localBundleAdjustmentG2O( + std::vector &keyframes, + std::vector &mappoints, + const std::vector &localKfIndices, + const std::vector &fixedKfIndices, + double fx, double fy, double cx, double cy, + int iterations = 10); +#endif + +#if defined(HAVE_SFM) + static void localBundleAdjustmentSFM( + std::vector &keyframes, + std::vector &mappoints, + const std::vector &localKfIndices, + const std::vector &fixedKfIndices, + double fx, double fy, double cx, double cy, + int iterations = 10); +#endif + // Pose-only optimization (optimize camera pose given fixed 3D points) + static bool optimizePose( + KeyFrame &kf, + const std::vector &mappoints, + const std::vector &matchedMpIndices, + double fx, double fy, double cx, double cy, + std::vector &inliers, + int iterations = 10); + +#if defined(HAVE_SFM) + // Global Bundle Adjustment (expensive, use after loop closure) + static void globalBundleAdjustmentSFM( + std::vector &keyframes, + std::vector &mappoints, + double fx, double fy, double cx, double cy, + int iterations = 20); +#endif + +private: + // Compute reprojection error and Jacobian + static double computeReprojectionError( + const Point3d &point3D, + const Mat &R, const Mat &t, + const Point2f &observed, + double fx, double fy, double cx, double cy, + Mat &jacobianPose, + Mat &jacobianPoint); + + // Project 3D point to image + static Point2f project( + const Point3d &point3D, + const Mat &R, const Mat &t, + double fx, double fy, double cx, double cy); +}; + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/include/opencv2/slam/pose.hpp b/modules/slam/include/opencv2/slam/pose.hpp new file mode 100644 index 0000000000..acf3615950 --- /dev/null +++ b/modules/slam/include/opencv2/slam/pose.hpp @@ -0,0 +1,19 @@ +#pragma once +#include +#include + +namespace cv { +namespace vo { + +class PoseEstimator { +public: + PoseEstimator() = default; + // Estimate relative pose from matched normalized image points. Returns true if pose recovered. + bool estimate(const std::vector &pts1, + const std::vector &pts2, + double fx, double fy, double cx, double cy, + Mat &R, Mat &t, Mat &mask, int &inliers); +}; + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/include/opencv2/slam/visualizer.hpp b/modules/slam/include/opencv2/slam/visualizer.hpp new file mode 100644 index 0000000000..22ffe94fd3 --- /dev/null +++ b/modules/slam/include/opencv2/slam/visualizer.hpp @@ -0,0 +1,47 @@ +#pragma once +#include +#include +#include "opencv2/slam/pose.hpp" +#include "opencv2/slam/feature.hpp" +#include "opencv2/slam/matcher.hpp" + +namespace cv { +namespace vo { + +class Tracker { +public: + Tracker(); + // Process a gray image, returns true if a pose was estimated. imgOut contains visualization (matches or keypoints) + bool processFrame(const Mat &gray, const std::string &imagePath, Mat &imgOut, Mat &R_out, Mat &t_out, std::string &info); +private: + FeatureExtractor feat_; + Matcher matcher_; + PoseEstimator poseEst_; + + Mat prevGray_, prevDesc_; + std::vector prevKp_; + int frame_id_; +}; + +class Visualizer { +public: + Visualizer(int W=1000, int H=800, double meters_per_pixel=0.02); + // 更新轨迹(传入 x,z 坐标) + void addPose(double x, double z); + // 返回帧绘制(matches 或 keypoints)到窗口 + void showFrame(const Mat &frame); + // 返回并显示俯视图 + void showTopdown(); + // 保存最终轨迹图像到文件 + bool saveTrajectory(const std::string &path); +private: + int W_, H_; + double meters_per_pixel_; + Size mapSize_; + Mat map_; + std::vector traj_; // 存储 (x,z) + Point worldToPixel(const Point2d &p) const; +}; + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/include/opencv2/slam/vo.hpp b/modules/slam/include/opencv2/slam/vo.hpp new file mode 100644 index 0000000000..5172edb74d --- /dev/null +++ b/modules/slam/include/opencv2/slam/vo.hpp @@ -0,0 +1,32 @@ +#pragma once +#include +#include +#include +#include + +namespace cv { +namespace vo { + +struct CV_EXPORTS_W_SIMPLE VisualOdometryOptions { + CV_PROP_RW int min_matches = 15; + CV_PROP_RW int min_inliers = 4; + CV_PROP_RW double min_inlier_ratio = 0.1; + CV_PROP_RW double diff_zero_thresh = 2.0; + CV_PROP_RW double flow_zero_thresh = 0.3; + CV_PROP_RW double min_translation_norm = 1e-4; + CV_PROP_RW double min_rotation_rad = 0.5 * CV_PI / 180.0; + CV_PROP_RW int max_matches_keep = 500; + CV_PROP_RW double flow_weight_lambda = 5.0; +}; + +class CV_EXPORTS_W VisualOdometry { +public: + CV_WRAP VisualOdometry(cv::Ptr feature, cv::Ptr matcher); + CV_WRAP int run(const std::string &imageDir, double scale_m = 1.0, const VisualOdometryOptions &options = VisualOdometryOptions()); +private: + cv::Ptr feature_; + cv::Ptr matcher_; +}; + +} +} // namespace cv::vo \ No newline at end of file diff --git a/modules/slam/src/data_loader.cpp b/modules/slam/src/data_loader.cpp new file mode 100644 index 0000000000..3210ebc9cf --- /dev/null +++ b/modules/slam/src/data_loader.cpp @@ -0,0 +1,193 @@ +#include "opencv2/slam/data_loader.hpp" +#include +#if defined(__has_include) +# if __has_include() +# include +# define HAVE_STD_FILESYSTEM 1 + namespace fs = std::filesystem; +# elif __has_include() +# include +# define HAVE_STD_FILESYSTEM 1 + namespace fs = std::experimental::filesystem; +# else +# define HAVE_STD_FILESYSTEM 0 +# endif +#else +# define HAVE_STD_FILESYSTEM 0 +#endif +#include +#include +#include +#include +#ifdef _WIN32 +# include +# ifndef S_ISDIR +# define S_ISDIR(mode) (((mode) & S_IFDIR) != 0) +# endif +# ifndef mkdir +# define mkdir(path, mode) _mkdir(path) +# endif +#endif +#include +#include +#include + + +namespace cv { +namespace vo { + +bool ensureDirectoryExists(const std::string &dir){ + if(dir.empty()) return false; +#if HAVE_STD_FILESYSTEM + try{ + fs::path p(dir); + return fs::create_directories(p) || fs::exists(p); + }catch(...){ return false; } +#else + std::string tmp = dir; + if(tmp.empty()) return false; + while(tmp.size() > 1 && tmp.back() == '/') tmp.pop_back(); + std::string cur; + if(!tmp.empty() && tmp[0] == '/') cur = "/"; + size_t pos = 0; + while(pos < tmp.size()){ + size_t next = tmp.find('/', pos); + std::string part = (next == std::string::npos) ? tmp.substr(pos) : tmp.substr(pos, next-pos); + if(!part.empty()){ + if(cur.size() > 1 && cur.back() != '/') cur += '/'; + cur += part; + if(mkdir(cur.c_str(), 0755) != 0){ + if(errno == EEXIST){ /* ok */ } + else { + struct stat st; + if(stat(cur.c_str(), &st) == 0 && S_ISDIR(st.st_mode)){ + // ok + } else return false; + } + } + } + if(next == std::string::npos) break; + pos = next + 1; + } + return true; +#endif +} + +DataLoader::DataLoader(const std::string &imageDir) + : currentIndex(0), fx_(700.0), fy_(700.0), cx_(0.5), cy_(0.5) +{ + // Check whether the directory exists. Prefer std::filesystem when available, + // otherwise fall back to POSIX stat. +#if HAVE_STD_FILESYSTEM + try { + fs::path p(imageDir); + if(!fs::exists(p) || !fs::is_directory(p)){ + std::cerr << "DataLoader: imageDir does not exist or is not a directory: " << imageDir << std::endl; + return; + } + } catch(const std::exception &e){ + std::cerr << "DataLoader: filesystem error checking imageDir: " << e.what() << std::endl; + // fallthrough to try glob + } +#else + struct stat sb; + if(stat(imageDir.c_str(), &sb) != 0 || !S_ISDIR(sb.st_mode)){ + std::cerr << "DataLoader: imageDir does not exist or is not a directory: " << imageDir << std::endl; + return; + } +#endif + + // Use glob to list files, catching any possible OpenCV exceptions + try { + glob(imageDir + "/*", imageFiles, false); + } catch(const Exception &e){ + std::cerr << "DataLoader: glob failed for '" << imageDir << "': " << e.what() << std::endl; + imageFiles.clear(); + return; + } + + if(imageFiles.empty()){ + std::cerr << "DataLoader: no image files found in " << imageDir << std::endl; + return; + } + + // Try to find sensor.yaml in imageDir or its parent directory. + // Use filesystem paths when available; otherwise build paths with simple string ops. +#if HAVE_STD_FILESYSTEM + fs::path p(imageDir); + std::string yaml1 = (p / "sensor.yaml").string(); + std::string yaml2 = (p.parent_path() / "sensor.yaml").string(); + if(!loadIntrinsics(yaml1)){ + loadIntrinsics(yaml2); // best-effort + } +#else + auto make_parent_yaml = [](const std::string &dir)->std::string{ + std::string tmp = dir; + if(!tmp.empty() && tmp.back() == '/') tmp.pop_back(); + auto pos = tmp.find_last_of('/'); + if(pos == std::string::npos) return std::string("sensor.yaml"); + return tmp.substr(0, pos) + "/sensor.yaml"; + }; + std::string yaml1 = imageDir + "/sensor.yaml"; + std::string yaml2 = make_parent_yaml(imageDir); + if(!loadIntrinsics(yaml1)){ + loadIntrinsics(yaml2); // best-effort + } +#endif +} + +bool DataLoader::loadIntrinsics(const std::string &yamlPath){ + std::ifstream ifs(yamlPath); + if(!ifs.is_open()) return false; + std::string line; + while(std::getline(ifs, line)){ + auto pos = line.find("intrinsics:"); + if(pos != std::string::npos){ + size_t lb = line.find('[', pos); + size_t rb = line.find(']', pos); + std::string nums; + if(lb != std::string::npos && rb != std::string::npos && rb > lb){ + nums = line.substr(lb+1, rb-lb-1); + } else { + std::string rest; + while(std::getline(ifs, rest)){ + nums += rest + " "; + if(rest.find(']') != std::string::npos) break; + } + } + for(char &c: nums) if(c == ',' || c == '[' || c == ']') c = ' '; + std::stringstream ss(nums); + std::vector vals; + double v; + while(ss >> v) vals.push_back(v); + if(vals.size() >= 4){ + fx_ = vals[0]; fy_ = vals[1]; cx_ = vals[2]; cy_ = vals[3]; + std::cerr << "DataLoader: loaded intrinsics from " << yamlPath << std::endl; + return true; + } + } + } + return false; +} + +bool DataLoader::hasNext() const { return currentIndex < imageFiles.size(); } + +size_t DataLoader::size() const { return imageFiles.size(); } + +void DataLoader::reset(){ currentIndex = 0; } + +bool DataLoader::getNextImage(Mat &image, std::string &imagePath){ + if(currentIndex >= imageFiles.size()) return false; + imagePath = imageFiles[currentIndex]; + image = imread(imagePath, IMREAD_UNCHANGED); + if(image.empty()){ + std::cerr << "DataLoader: couldn't read " << imagePath << ", skipping" << std::endl; + currentIndex++; + return getNextImage(image, imagePath); // try next + } + currentIndex++; + return true; +} + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/src/feature.cpp b/modules/slam/src/feature.cpp new file mode 100644 index 0000000000..e2eb7dde50 --- /dev/null +++ b/modules/slam/src/feature.cpp @@ -0,0 +1,167 @@ +#include "opencv2/slam/feature.hpp" +#include +#include +#include +#include + +namespace cv { +namespace vo { + +FeatureExtractor::FeatureExtractor(int nfeatures) + : nfeatures_(nfeatures) +{ + orb_ = ORB::create(nfeatures_); +} + +// Adaptive Non-Maximal Suppression (ANMS) +static void anms(const std::vector &in, std::vector &out, int maxFeatures) +{ + out.clear(); + if(in.empty()) return; + int N = (int)in.size(); + if(maxFeatures <= 0 || N <= maxFeatures){ out = in; return; } + + // For each keypoint, find distance to the nearest keypoint with strictly higher response + std::vector radius(N, std::numeric_limits::infinity()); + for(int i=0;i in[i].response){ + float dx = in[i].pt.x - in[j].pt.x; + float dy = in[i].pt.y - in[j].pt.y; + float d2 = dx*dx + dy*dy; + if(d2 < radius[i]) radius[i] = d2; + } + } + // if no stronger keypoint exists, radius[i] stays INF + } + + // Now pick top maxFeatures by radius (larger radius preferred). If radius==INF, treat as large. + std::vector idx(N); + for(int i=0;i in[b].response; // tie-break by response + if(std::isinf(ra)) return true; + if(std::isinf(rb)) return false; + if(ra == rb) return in[a].response > in[b].response; + return ra > rb; + }); + + int take = std::min(maxFeatures, N); + out.reserve(take); + for(int i=0;i &kps, Mat &desc, + const Mat &prevGray, const std::vector &prevKp, + double flow_lambda) +{ + kps.clear(); desc.release(); + if(image.empty()) return; + + // detect candidates with ORB + std::vector candidates; + orb_->detect(image, candidates); + if(candidates.empty()) return; + + // If no previous-frame info is provided, use simple ANMS + descriptor computation + if(prevGray.empty() || prevKp.empty()){ + std::vector selected; + anms(candidates, selected, nfeatures_); + if(selected.empty()) return; + orb_->compute(image, selected, desc); + kps = std::move(selected); + return; + } + + // Flow-aware scoring path ------------------------------------------------- + // 1) track previous keypoints into current frame to estimate flows + std::vector trackedPts; + std::vector status; + std::vector err; + std::vector trackedFlows; // aligned with prevKp + std::vector prevPts; prevPts.reserve(prevKp.size()); + for(const auto &kp: prevKp) prevPts.push_back(kp.pt); + trackedPts.resize(prevPts.size()); status.resize(prevPts.size()); err.resize(prevPts.size()); + try{ + calcOpticalFlowPyrLK(prevGray, image, prevPts, trackedPts, status, err, Size(21,21), 3, + TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 30, 0.01), 0, 1e-4); + } catch(...) { status.clear(); } + trackedFlows.resize(prevPts.size()); + for(size_t i=0;i scored; scored.reserve(candidates.size()); + for(size_t i=0;i= 0) flow = trackedFlows[besti]; + } + double resp = candidates[i].response; + double norm_flow = diag > 0.0 ? (flow / diag) : flow; + double score = resp * (1.0 + flow_lambda * norm_flow); + scored.push_back({score, flow, (int)i}); + } + + // 3) grid allocation (ORB-style): split into grid and take top per-cell + const int GRID_ROWS = 8; + const int GRID_COLS = 8; + int cellW = std::max(1, image.cols / GRID_COLS); + int cellH = std::max(1, image.rows / GRID_ROWS); + int cellCap = (nfeatures_ + GRID_ROWS*GRID_COLS - 1) / (GRID_ROWS*GRID_COLS); + std::vector> buckets(GRID_ROWS * GRID_COLS); + for(const auto &c: scored){ + const KeyPoint &kp = candidates[c.idx]; + int cx = std::min(GRID_COLS-1, std::max(0, int(kp.pt.x) / cellW)); + int cy = std::min(GRID_ROWS-1, std::max(0, int(kp.pt.y) / cellH)); + buckets[cy*GRID_COLS + cx].push_back(c); + } + + std::vector selected; selected.reserve(nfeatures_); + for(auto &b: buckets){ + if(b.empty()) continue; + std::sort(b.begin(), b.end(), [](const CandScore &a, const CandScore &b){ return a.score > b.score; }); + int take = std::min((int)b.size(), cellCap); + for(int i=0;i all = scored; + std::sort(all.begin(), all.end(), [](const CandScore &a, const CandScore &b){ return a.score > b.score; }); + for(const auto &c: all){ + if((int)selected.size() >= nfeatures_) break; + bool dup = false; + for(const auto &s: selected){ if(norm(s.pt - candidates[c.idx].pt) < 1.0){ dup = true; break; } } + if(!dup) selected.push_back(candidates[c.idx]); + } + } + + if(selected.empty()) return; + orb_->compute(image, selected, desc); + kps = std::move(selected); +} + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/src/initializer.cpp b/modules/slam/src/initializer.cpp new file mode 100644 index 0000000000..2bdd5a9924 --- /dev/null +++ b/modules/slam/src/initializer.cpp @@ -0,0 +1,406 @@ +#include "opencv2/slam/initializer.hpp" +#include +#include +#include + +namespace cv { +namespace vo { + +Initializer::Initializer() {} + +bool Initializer::initialize(const std::vector &kps1, + const std::vector &kps2, + const std::vector &matches, + double fx, double fy, double cx, double cy, + Mat &R, Mat &t, + std::vector &points3D, + std::vector &isTriangulated) { + + if(matches.size() < 50) { + std::cout << "Initializer: too few matches (" << matches.size() << ")" << std::endl; + return false; + } + + // Extract matched points + std::vector pts1, pts2; + pts1.reserve(matches.size()); + pts2.reserve(matches.size()); + + for(const auto &m : matches) { + pts1.push_back(kps1[m.queryIdx].pt); + pts2.push_back(kps2[m.trainIdx].pt); + } + + // Estimate both Homography and Fundamental + std::vector inliersH, inliersF; + Mat H = findHomography(pts1, pts2, RANSAC, 3.0, inliersH, 2000, 0.999); + Mat F = findFundamentalMat(pts1, pts2, FM_RANSAC, 3.0, 0.999, inliersF); + + if(H.empty() || F.empty()) { + std::cout << "Initializer: failed to compute H or F" << std::endl; + return false; + } + + // Compute scores + std::vector inlH, inlF; + float scoreH = computeScore(H, H.inv(), pts1, pts2, inlH, 1.0); + float scoreF = computeScoreF(F, pts1, pts2, inlF, 1.0); + + float ratio = scoreH / (scoreH + scoreF); + + std::cout << "Initializer: H score=" << scoreH << " F score=" << scoreF + << " ratio=" << ratio << std::endl; + + // Decide between H and F + // If ratio > 0.45, scene is likely planar, use H + // Otherwise use F for general scenes + bool useH = (ratio > 0.45); + + Mat R_out, t_out; + std::vector pts3D; + std::vector isTri; + float parallax = 0.0f; + + bool success = false; + if(useH) { + std::cout << "Initializer: using Homography" << std::endl; + success = reconstructH(pts1, pts2, H, fx, fy, cx, cy, R_out, t_out, pts3D, isTri, parallax); + } else { + std::cout << "Initializer: using Fundamental" << std::endl; + success = reconstructF(pts1, pts2, F, fx, fy, cx, cy, R_out, t_out, pts3D, isTri, parallax); + } + + if(!success) { + std::cout << "Initializer: reconstruction failed" << std::endl; + return false; + } + + // Count good triangulated points + int goodCount = 0; + for(bool b : isTri) if(b) goodCount++; + + std::cout << "Initializer: triangulated " << goodCount << "/" << pts3D.size() + << " points, parallax=" << parallax << std::endl; + + // Check quality: need enough good points + if(goodCount < 50) { + std::cout << "Initializer: too few good points (" << goodCount << ")" << std::endl; + return false; + } + + // Check parallax + if(parallax < 1.0f) { + std::cout << "Initializer: insufficient parallax (" << parallax << ")" << std::endl; + return false; + } + + // Success + R = R_out; + t = t_out; + points3D = pts3D; + isTriangulated = isTri; + + std::cout << "Initializer: SUCCESS!" << std::endl; + return true; +} + +bool Initializer::checkParallax(const std::vector &kps1, + const std::vector &kps2, + const std::vector &matches, + double minMedianParallax) { + if(matches.empty()) return false; + + std::vector parallaxes; + parallaxes.reserve(matches.size()); + + for(const auto &m : matches) { + Point2f p1 = kps1[m.queryIdx].pt; + Point2f p2 = kps2[m.trainIdx].pt; + double dx = p2.x - p1.x; + double dy = p2.y - p1.y; + parallaxes.push_back(std::sqrt(dx*dx + dy*dy)); + } + + std::sort(parallaxes.begin(), parallaxes.end()); + double median = parallaxes[parallaxes.size()/2]; + + return median >= minMedianParallax; +} + +bool Initializer::reconstructF(const std::vector &pts1, + const std::vector &pts2, + const Mat &F, + double fx, double fy, double cx, double cy, + Mat &R, Mat &t, + std::vector &points3D, + std::vector &isTriangulated, + float ¶llax) { + + // Compute Essential matrix from F + Mat K = (Mat_(3,3) << fx, 0, cx, 0, fy, cy, 0, 0, 1); + Mat E = K.t() * F * K; + + // Recover pose from E + Mat mask; + int inliers = recoverPose(E, pts1, pts2, K, R, t, mask); + + if(inliers < 30) return false; + + // Triangulate + points3D.resize(pts1.size()); + isTriangulated.resize(pts1.size(), false); + + Mat P1 = Mat::eye(3, 4, CV_64F); + Mat P2(3, 4, CV_64F); + for(int i = 0; i < 3; ++i) { + for(int j = 0; j < 3; ++j) P2.at(i,j) = R.at(i,j); + P2.at(i, 3) = t.at(i, 0); + } + + triangulate(P1, P2, pts1, pts2, points3D); + + // Check quality + std::vector isGood; + int nGood = checkRT(R, t, pts1, pts2, points3D, isGood, fx, fy, cx, cy, parallax); + + isTriangulated = isGood; + return nGood >= 30; +} + +bool Initializer::reconstructH(const std::vector &pts1, + const std::vector &pts2, + const Mat &H, + double fx, double fy, double cx, double cy, + Mat &R, Mat &t, + std::vector &points3D, + std::vector &isTriangulated, + float ¶llax) { + + // Decompose H to get multiple solutions + std::vector Rs, ts, normals; + decomposeH(H, Rs, ts, normals); + + // Try all solutions and pick best + int bestGood = 0; + int bestIdx = -1; + std::vector> allPoints(Rs.size()); + std::vector> allGood(Rs.size()); + std::vector allParallax(Rs.size()); + + for(size_t i = 0; i < Rs.size(); ++i) { + std::vector pts3D; + std::vector isGood; + float par = 0.0f; + + Mat P1 = Mat::eye(3, 4, CV_64F); + Mat P2(3, 4, CV_64F); + for(int r = 0; r < 3; ++r) { + for(int c = 0; c < 3; ++c) P2.at(r,c) = Rs[i].at(r,c); + P2.at(r, 3) = ts[i].at(r, 0); + } + + triangulate(P1, P2, pts1, pts2, pts3D); + int nGood = checkRT(Rs[i], ts[i], pts1, pts2, pts3D, isGood, fx, fy, cx, cy, par); + + allPoints[i] = pts3D; + allGood[i] = isGood; + allParallax[i] = par; + + if(nGood > bestGood) { + bestGood = nGood; + bestIdx = i; + } + } + + if(bestIdx < 0 || bestGood < 30) return false; + + R = Rs[bestIdx]; + t = ts[bestIdx]; + points3D = allPoints[bestIdx]; + isTriangulated = allGood[bestIdx]; + parallax = allParallax[bestIdx]; + + return true; +} + +int Initializer::checkRT(const Mat &R, const Mat &t, + const std::vector &pts1, + const std::vector &pts2, + const std::vector &points3D, + std::vector &isGood, + double fx, double fy, double cx, double cy, + float ¶llax) { + + isGood.resize(points3D.size(), false); + + Mat R1 = Mat::eye(3, 3, CV_64F); + Mat t1 = Mat::zeros(3, 1, CV_64F); + Mat R2 = R; + Mat t2 = t; + + int nGood = 0; + std::vector cosParallaxes; + cosParallaxes.reserve(points3D.size()); + + for(size_t i = 0; i < points3D.size(); ++i) { + const Point3d &p3d = points3D[i]; + + // Check depth in first camera + Mat p3dMat = (Mat_(3,1) << p3d.x, p3d.y, p3d.z); + Mat p1 = R1 * p3dMat + t1; + double z1 = p1.at(2, 0); + + if(z1 <= 0) continue; + + // Check depth in second camera + Mat p2 = R2 * p3dMat + t2; + double z2 = p2.at(2, 0); + + if(z2 <= 0) continue; + + // Check reprojection error in first camera + double u1 = fx * p1.at(0,0)/z1 + cx; + double v1 = fy * p1.at(1,0)/z1 + cy; + double err1 = std::sqrt(std::pow(u1 - pts1[i].x, 2) + std::pow(v1 - pts1[i].y, 2)); + + if(err1 > 4.0) continue; + + // Check reprojection error in second camera + double u2 = fx * p2.at(0,0)/z2 + cx; + double v2 = fy * p2.at(1,0)/z2 + cy; + double err2 = std::sqrt(std::pow(u2 - pts2[i].x, 2) + std::pow(v2 - pts2[i].y, 2)); + + if(err2 > 4.0) continue; + + // Check parallax + Mat normal1 = p3dMat / norm(p3dMat); + Mat normal2 = (p3dMat - t2) / norm(p3dMat - t2); + double cosParallax = normal1.dot(normal2); + + cosParallaxes.push_back(cosParallax); + + isGood[i] = true; + nGood++; + } + + if(!cosParallaxes.empty()) { + std::sort(cosParallaxes.begin(), cosParallaxes.end()); + float medianCos = cosParallaxes[cosParallaxes.size()/2]; + parallax = std::acos(medianCos) * 180.0 / CV_PI; + } + + return nGood; +} + +void Initializer::triangulate(const Mat &P1, const Mat &P2, + const std::vector &pts1, + const std::vector &pts2, + std::vector &points3D) { + + points3D.resize(pts1.size()); + + Mat pts4D; + triangulatePoints(P1, P2, pts1, pts2, pts4D); + + for(int i = 0; i < pts4D.cols; ++i) { + Mat x = pts4D.col(i); + x /= x.at(3, 0); + points3D[i] = Point3d(x.at(0,0), x.at(1,0), x.at(2,0)); + } +} + +void Initializer::decomposeH(const Mat &H, std::vector &Rs, + std::vector &ts, std::vector &normals) { + + Mat H_normalized = H / H.at(2,2); + + std::vector rotations, translations, normalsOut; + int solutions = decomposeHomographyMat(H_normalized, Mat::eye(3,3,CV_64F), + rotations, translations, normalsOut); + + Rs = rotations; + ts = translations; + normals = normalsOut; +} + +float Initializer::computeScore(const Mat &H21, const Mat &H12, + const std::vector &pts1, + const std::vector &pts2, + std::vector &inliersH, + float sigma) { + + const float th = 5.991 * sigma; + inliersH.resize(pts1.size(), false); + + float score = 0.0f; + const float thSq = th * th; + + for(size_t i = 0; i < pts1.size(); ++i) { + // Forward error + Mat p1 = (Mat_(3,1) << pts1[i].x, pts1[i].y, 1.0); + Mat p2pred = H21 * p1; + p2pred /= p2pred.at(2,0); + + float dx = pts2[i].x - p2pred.at(0,0); + float dy = pts2[i].y - p2pred.at(1,0); + float errSq = dx*dx + dy*dy; + + if(errSq < thSq) { + score += thSq - errSq; + } + + // Backward error + Mat p2 = (Mat_(3,1) << pts2[i].x, pts2[i].y, 1.0); + Mat p1pred = H12 * p2; + p1pred /= p1pred.at(2,0); + + dx = pts1[i].x - p1pred.at(0,0); + dy = pts1[i].y - p1pred.at(1,0); + errSq = dx*dx + dy*dy; + + if(errSq < thSq) { + score += thSq - errSq; + inliersH[i] = true; + } + } + + return score; +} + +float Initializer::computeScoreF(const Mat &F21, + const std::vector &pts1, + const std::vector &pts2, + std::vector &inliersF, + float sigma) { + + const float th = 3.841 * sigma; + const float thSq = th * th; + + inliersF.resize(pts1.size(), false); + float score = 0.0f; + + for(size_t i = 0; i < pts1.size(); ++i) { + Mat p1 = (Mat_(3,1) << pts1[i].x, pts1[i].y, 1.0); + Mat p2 = (Mat_(3,1) << pts2[i].x, pts2[i].y, 1.0); + + // Epipolar line in second image + Mat l2 = F21 * p1; + float a2 = l2.at(0,0); + float b2 = l2.at(1,0); + float c2 = l2.at(2,0); + + float num2 = a2*pts2[i].x + b2*pts2[i].y + c2; + float den2 = a2*a2 + b2*b2; + float distSq2 = (num2*num2) / den2; + + if(distSq2 < thSq) { + score += thSq - distSq2; + inliersF[i] = true; + } + } + + return score; +} + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/src/localizer.cpp b/modules/slam/src/localizer.cpp new file mode 100644 index 0000000000..8e7c384988 --- /dev/null +++ b/modules/slam/src/localizer.cpp @@ -0,0 +1,157 @@ +#include "opencv2/slam/localizer.hpp" +#include "opencv2/slam/matcher.hpp" +#include "opencv2/slam/data_loader.hpp" +#include +#include +#include +#include + +namespace cv { +namespace vo { + +Localizer::Localizer(float ratio) : ratio_(ratio) {} + +bool Localizer::tryPnP(const MapManager &map, const Mat &desc, const std::vector &kps, + double fx, double fy, double cx, double cy, int imgW, int imgH, + int min_inliers, + Mat &R_out, Mat &t_out, int &inliers_out, + int frame_id, const Mat *image, const std::string &outDir, + int *out_preMatches, int *out_postMatches, double *out_meanReproj) const { + inliers_out = 0; R_out.release(); t_out.release(); + const auto &mappoints = map.mappoints(); + const auto &keyframes = map.keyframes(); + if(mappoints.empty() || keyframes.empty() || desc.empty()) return false; + + // Use last keyframe as prior + const KeyFrame &last = keyframes.back(); + Mat lastR = last.R_w, lastT = last.t_w; + + // select visible candidates + std::vector candidates = map.findVisibleCandidates(lastR, lastT, fx, fy, cx, cy, imgW, imgH); + if(candidates.empty()) return false; + + // gather descriptors from map (prefer mp.descriptor if available) + Mat trainDesc; + std::vector objPts; objPts.reserve(candidates.size()); + std::vector trainMpIds; trainMpIds.reserve(candidates.size()); + for(int idx: candidates){ + const auto &mp = mappoints[idx]; + if(mp.observations.empty()) continue; + // prefer representative descriptor on MapPoint + if(!mp.descriptor.empty()){ + trainDesc.push_back(mp.descriptor.row(0)); + } else { + auto ob = mp.observations.front(); + int kfid = ob.first; int kpidx = ob.second; + int kfIdx = map.keyframeIndex(kfid); + if(kfIdx < 0) continue; + const KeyFrame &kf = keyframes[kfIdx]; + if(kf.desc.empty() || kpidx < 0 || kpidx >= kf.desc.rows) continue; + trainDesc.push_back(kf.desc.row(kpidx)); + } + objPts.emplace_back((float)mp.p.x, (float)mp.p.y, (float)mp.p.z); + trainMpIds.push_back(mp.id); + } + if(trainDesc.empty()) return false; + + // Forward and backward knn to enable mutual cross-check + BFMatcher bf(NORM_HAMMING); + std::vector> knnF, knnB; + bf.knnMatch(desc, trainDesc, knnF, 2); + bf.knnMatch(trainDesc, desc, knnB, 1); + + int preMatches = static_cast(knnF.size()); + if(out_preMatches) *out_preMatches = preMatches; + + // Ratio + mutual + const float RATIO = ratio_; + std::vector goodMatches; + goodMatches.reserve(knnF.size()); + for(size_t q=0;q= 2){ + const DMatch &m1 = knnF[q][1]; + if(m0.distance > RATIO * m1.distance) continue; + } + int trainIdx = m0.trainIdx; + // mutual check: ensure best match of trainIdx points back to this query + if(trainIdx < 0 || trainIdx >= static_cast(knnB.size())) continue; + if(knnB[trainIdx].empty()) continue; + int backIdx = knnB[trainIdx][0].trainIdx; // index in desc + if(backIdx != static_cast(q)) continue; + // passed ratio and mutual + goodMatches.push_back(DMatch(static_cast(q), trainIdx, m0.distance)); + } + + if(out_postMatches) *out_postMatches = static_cast(goodMatches.size()); + + if(goodMatches.size() < static_cast(std::max(6, min_inliers))) return false; + + // build PnP inputs + std::vector obj; std::vector imgpts; obj.reserve(goodMatches.size()); imgpts.reserve(goodMatches.size()); + std::vector matchedMpIds; matchedMpIds.reserve(goodMatches.size()); + for(const auto &m: goodMatches){ + int q = m.queryIdx; int t = m.trainIdx; + if(t < 0 || t >= static_cast(objPts.size()) || q < 0 || q >= static_cast(kps.size())) continue; + obj.push_back(objPts[t]); + imgpts.push_back(kps[q].pt); + matchedMpIds.push_back(trainMpIds[t]); + } + + if(obj.size() < static_cast(std::max(6, min_inliers))) return false; + + Mat camera = (Mat_(3,3) << fx,0,cx, 0,fy,cy, 0,0,1); + Mat dist = Mat::zeros(4,1,CV_64F); + std::vector inliersIdx; + bool ok = solvePnPRansac(obj, imgpts, camera, dist, R_out, t_out, false, + 100, 8.0, 0.99, inliersIdx, SOLVEPNP_ITERATIVE); + if(!ok) return false; + inliers_out = static_cast(inliersIdx.size()); + + // compute mean reprojection error on inliers + double meanReproj = 0.0; + if(!inliersIdx.empty()){ + std::vector proj; + projectPoints(obj, R_out, t_out, camera, dist, proj); + double sum = 0.0; + for(int idx: inliersIdx){ + double e = std::hypot(proj[idx].x - imgpts[idx].x, proj[idx].y - imgpts[idx].y); + sum += e; + } + meanReproj = sum / inliersIdx.size(); + } + if(out_meanReproj) *out_meanReproj = meanReproj; + + // Diagnostics: draw matches and inliers if requested + if(!outDir.empty() && image){ + try{ + ensureDirectoryExists(outDir); + Mat vis; + if(image->channels() == 1) cvtColor(*image, vis, COLOR_GRAY2BGR); + else vis = image->clone(); + // draw all good matches as small circles; inliers green + for(size_t i=0;i(i)) != inliersIdx.end(); + Scalar col = isInlier ? Scalar(0,255,0) : Scalar(0,0,255); + circle(vis, p, 3, col, 2, LINE_AA); + } + std::ostringstream name; name << outDir << "/pnp_frame_" << frame_id << ".png"; + putText(vis, "pre=" + std::to_string(preMatches) + " post=" + std::to_string(goodMatches.size()) + " inliers=" + std::to_string(inliers_out) + " mean_px=" + std::to_string(meanReproj), + Point(10,20), FONT_HERSHEY_SIMPLEX, 0.6, Scalar(255,255,255), 2); + imwrite(name.str(), vis); + // append a small CSV-like log + std::ofstream logf((outDir + "/pnp_stats.csv"), std::ios::app); + if(logf){ + logf << frame_id << "," << preMatches << "," << goodMatches.size() << "," << inliers_out << "," << meanReproj << "\n"; + logf.close(); + } + } catch(...) { /* don't crash on diagnostics */ } + } + + return inliers_out >= min_inliers; +} + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/src/map.cpp b/modules/slam/src/map.cpp new file mode 100644 index 0000000000..670eaff63c --- /dev/null +++ b/modules/slam/src/map.cpp @@ -0,0 +1,319 @@ +#include "opencv2/slam/map.hpp" +#include +#include +#include +#include + +namespace cv { +namespace vo { + +MapManager::MapManager() {} + +void MapManager::addKeyFrame(const KeyFrame &kf){ + keyframes_.push_back(kf); + id2idx_[kf.id] = static_cast(keyframes_.size()) - 1; +} + +void MapManager::addMapPoints(const std::vector &pts){ + for(auto p: pts) { + if(p.id <= 0) p.id = next_mappoint_id_++; + mpid2idx_[p.id] = static_cast(mappoints_.size()); + mappoints_.push_back(p); + } +} + +std::vector MapManager::findVisibleCandidates(const Mat &lastR, const Mat &lastT, + double fx, double fy, double cx, double cy, + int imgW, int imgH) const { + std::vector candidates; candidates.reserve(mappoints_.size()); + for(size_t mi=0; mi(3,1) << mp.p.x, mp.p.y, mp.p.z); + Mat Xc = lastR.t() * (Xw - lastT); + double z = Xc.at(2,0); + if(z <= 0) continue; + double u = fx * (Xc.at(0,0)/z) + cx; + double v = fy * (Xc.at(1,0)/z) + cy; + if(u >= 0 && u < imgW && v >= 0 && v < imgH) candidates.push_back((int)mi); + } + return candidates; +} + +std::vector MapManager::triangulateBetweenLastTwo(const std::vector &pts1n, + const std::vector &pts2n, + const std::vector &pts1_kp_idx, + const std::vector &pts2_kp_idx, + const KeyFrame &lastKf, const KeyFrame &curKf, + double fx, double fy, double cx, double cy){ + std::vector newPoints; + if(pts1n.empty() || pts2n.empty()) return newPoints; + // Relative pose from last -> current (we expect lastKf and curKf poses to be in world) + // compute relative transformation + // P1 = [I|0], P2 = [R_rel|t_rel] where R_rel = R_last^{-1} * R_cur, t_rel = R_last^{-1}*(t_cur - t_last) + Mat R_last = lastKf.R_w, t_last = lastKf.t_w; + Mat R_cur = curKf.R_w, t_cur = curKf.t_w; + Mat R_rel = R_last.t() * R_cur; + Mat t_rel = R_last.t() * (t_cur - t_last); + Mat P1 = Mat::eye(3,4,CV_64F); + Mat P2(3,4,CV_64F); + for(int r=0;r<3;++r){ + for(int c=0;c<3;++c) P2.at(r,c) = R_rel.at(r,c); + P2.at(r,3) = t_rel.at(r,0); + } + Mat points4D; + try{ triangulatePoints(P1, P2, pts1n, pts2n, points4D); } + catch(...) { points4D.release(); } + if(points4D.empty()) return newPoints; + Mat p4d64; + if(points4D.type() != CV_64F) points4D.convertTo(p4d64, CV_64F); else p4d64 = points4D; + for(int c=0;c(3,c); + if(std::abs(w) < 1e-8) continue; + double Xx = p4d64.at(0,c)/w; + double Xy = p4d64.at(1,c)/w; + double Xz = p4d64.at(2,c)/w; + if(Xz <= 0) continue; + Mat Xc = (Mat_(3,1) << Xx, Xy, Xz); + Mat Xw = lastKf.R_w * Xc + lastKf.t_w; + MapPoint mp; mp.p = Point3d(Xw.at(0,0), Xw.at(1,0), Xw.at(2,0)); + // compute reprojection error in both views (pixel coords) + // project into last + Mat Xc_last = Xc; + double u1 = fx * (Xc_last.at(0,0)/Xc_last.at(2,0)) + cx; + double v1 = fy * (Xc_last.at(1,0)/Xc_last.at(2,0)) + cy; + // project into current: Xc_cur = R_rel * Xc + t_rel (we computed P2 earlier) + Mat Xc_cur = R_rel * Xc + t_rel; + double u2 = fx * (Xc_cur.at(0,0)/Xc_cur.at(2,0)) + cx; + double v2 = fy * (Xc_cur.at(1,0)/Xc_cur.at(2,0)) + cy; + // obtain observed pixel locations + double obs_u1 = -1, obs_v1 = -1, obs_u2 = -1, obs_v2 = -1; + if(c < static_cast(pts1_kp_idx.size())){ + // pts1n/pts2n are normalized coords; but we were given kp indices - use them if valid + int kp1 = pts1_kp_idx[c]; + if(kp1 >= 0 && kp1 < static_cast(lastKf.kps.size())){ + obs_u1 = lastKf.kps[kp1].pt.x; obs_v1 = lastKf.kps[kp1].pt.y; + } + } + if(c < static_cast(pts2_kp_idx.size())){ + int kp2 = pts2_kp_idx[c]; + if(kp2 >= 0 && kp2 < static_cast(curKf.kps.size())){ + obs_u2 = curKf.kps[kp2].pt.x; obs_v2 = curKf.kps[kp2].pt.y; + } + } + // if we have observed pixel locations, check reprojection error + bool pass = true; + const double MAX_REPROJ_PX = 2.0; + if(obs_u1 >= 0 && obs_v1 >= 0){ + double e1 = std::hypot(u1 - obs_u1, v1 - obs_v1); + if(e1 > MAX_REPROJ_PX) pass = false; + } + if(obs_u2 >= 0 && obs_v2 >= 0){ + double e2 = std::hypot(u2 - obs_u2, v2 - obs_v2); + if(e2 > MAX_REPROJ_PX) pass = false; + } + // parallax check: angle between viewing rays (in last frame) + Mat ray1 = Xc_last / norm(Xc_last); + Mat ray2 = Xc_cur / norm(Xc_cur); + double cos_par = ray1.dot(ray2); + double parallax = std::acos(std::min(1.0, std::max(-1.0, cos_par))); + const double MIN_PARALLAX_RAD = 1.0 * CV_PI / 180.0; // 1 degree + if(parallax < MIN_PARALLAX_RAD) pass = false; + if(!pass) continue; + // attach observations using provided keypoint indices when available + int kp1idx = (c < static_cast(pts1_kp_idx.size())) ? pts1_kp_idx[c] : -1; + int kp2idx = (c < static_cast(pts2_kp_idx.size())) ? pts2_kp_idx[c] : -1; + if(kp1idx >= 0) mp.observations.emplace_back(lastKf.id, kp1idx); + if(kp2idx >= 0) mp.observations.emplace_back(curKf.id, kp2idx); + // assign id + mp.id = next_mappoint_id_++; + mpid2idx_[mp.id] = static_cast(mappoints_.size()) + static_cast(newPoints.size()); + newPoints.push_back(mp); + } + // append to internal map + for(const auto &p: newPoints) mappoints_.push_back(p); + return newPoints; +} + +int MapManager::keyframeIndex(int id) const{ + auto it = id2idx_.find(id); + if(it == id2idx_.end()) return -1; + return it->second; +} + +int MapManager::mapPointIndex(int id) const{ + auto it = mpid2idx_.find(id); + if(it == mpid2idx_.end()) return -1; + return it->second; +} + +void MapManager::applyOptimizedKeyframePose(int keyframeId, const Mat &R, const Mat &t){ + int idx = keyframeIndex(keyframeId); + if(idx < 0 || idx >= static_cast(keyframes_.size())) return; + keyframes_[idx].R_w = R.clone(); + keyframes_[idx].t_w = t.clone(); +} + +void MapManager::applyOptimizedMapPoint(int mappointId, const Point3d &p){ + int idx = mapPointIndex(mappointId); + if(idx < 0 || idx >= static_cast(mappoints_.size())) return; + mappoints_[idx].p = p; +} + +void MapManager::cullBadMapPoints() { + const double MAX_REPROJ_ERROR = 3.0; // pixels + const int MIN_OBSERVATIONS = 2; + const float MIN_FOUND_RATIO = 0.25f; + + for(auto &mp : mappoints_) { + if(mp.isBad) continue; + + // 1. Check observation count + mp.nObs = static_cast(mp.observations.size()); + if(mp.nObs < MIN_OBSERVATIONS) { + mp.isBad = true; + continue; + } + + // 2. Check found ratio (avoid points rarely tracked) + if(mp.nVisible > 10 && mp.getFoundRatio() < MIN_FOUND_RATIO) { + mp.isBad = true; + continue; + } + + // 3. Check reprojection error across observations + // Sample a few keyframes to check reprojection + int errorCount = 0; + int checkCount = 0; + for(const auto &obs : mp.observations) { + int kfId = obs.first; + int kfIdx = keyframeIndex(kfId); + if(kfIdx < 0 || kfIdx >= static_cast(keyframes_.size())) continue; + + const KeyFrame &kf = keyframes_[kfIdx]; + // Use default camera params (should be passed in production code) + double fx = 500.0, fy = 500.0, cx = 320.0, cy = 240.0; + double error = computeReprojError(mp, kf, fx, fy, cx, cy); + + checkCount++; + if(error > MAX_REPROJ_ERROR) { + errorCount++; + } + + // Sample up to 3 observations for efficiency + if(checkCount >= 3) break; + } + + // If majority of samples have high error, mark as bad + if(checkCount > 0 && errorCount > checkCount / 2) { + mp.isBad = true; + } + } + + // Remove bad points + size_t before = mappoints_.size(); + mappoints_.erase( + std::remove_if(mappoints_.begin(), mappoints_.end(), + [](const MapPoint &p) { return p.isBad; }), + mappoints_.end() + ); + size_t after = mappoints_.size(); + + if(before - after > 0) { + std::cout << "MapManager: culled " << (before - after) << " bad map points (" + << after << " remain)" << std::endl; + } +} + +double MapManager::computeReprojError(const MapPoint &mp, const KeyFrame &kf, + double fx, double fy, double cx, double cy) const { + // Transform world point to camera frame + Mat Xw = (Mat_(3,1) << mp.p.x, mp.p.y, mp.p.z); + Mat Xc = kf.R_w.t() * (Xw - kf.t_w); + + double z = Xc.at(2, 0); + if(z <= 0) return std::numeric_limits::max(); + + // Project to image + double u = fx * (Xc.at(0, 0) / z) + cx; + double v = fy * (Xc.at(1, 0) / z) + cy; + + // Find corresponding observation in this keyframe + Point2f observed(-1, -1); + for(const auto &obs : mp.observations) { + if(obs.first == kf.id) { + int kpIdx = obs.second; + if(kpIdx >= 0 && kpIdx < static_cast(kf.kps.size())) { + observed = kf.kps[kpIdx].pt; + break; + } + } + } + + if(observed.x < 0) return std::numeric_limits::max(); + + // Compute reprojection error + double dx = u - observed.x; + double dy = v - observed.y; + return std::sqrt(dx * dx + dy * dy); +} + +void MapManager::updateMapPointDescriptor(MapPoint &mp) { + if(mp.observations.empty()) return; + + // Collect all descriptors from observations + std::vector descriptors; + for(const auto &obs : mp.observations) { + int kfIdx = keyframeIndex(obs.first); + if(kfIdx < 0) continue; + + const KeyFrame &kf = keyframes_[kfIdx]; + int kpIdx = obs.second; + if(kpIdx >= 0 && kpIdx < kf.desc.rows) { + descriptors.push_back(kf.desc.row(kpIdx)); + } + } + + if(descriptors.empty()) return; + + // Compute median descriptor (for binary descriptors, use majority voting per bit) + if(descriptors[0].type() == CV_8U) { + // Binary descriptor (ORB) + int bytes = descriptors[0].cols; + Mat median = Mat::zeros(1, bytes, CV_8U); + + for(int b = 0; b < bytes; ++b) { + int bitCount[8] = {0}; + for(const auto &desc : descriptors) { + uchar byte = desc.at(0, b); + for(int bit = 0; bit < 8; ++bit) { + if(byte & (1 << bit)) bitCount[bit]++; + } + } + + uchar medianByte = 0; + int threshold = descriptors.size() / 2; + for(int bit = 0; bit < 8; ++bit) { + if(bitCount[bit] > threshold) { + medianByte |= (1 << bit); + } + } + median.at(0, b) = medianByte; + } + + mp.descriptor = median; + } else { + // Fallback: use first descriptor + mp.descriptor = descriptors[0].clone(); + } +} + +int MapManager::countGoodMapPoints() const { + int count = 0; + for(const auto &mp : mappoints_) { + if(!mp.isBad) count++; + } + return count; +} + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/src/matcher.cpp b/modules/slam/src/matcher.cpp new file mode 100644 index 0000000000..e0579793d3 --- /dev/null +++ b/modules/slam/src/matcher.cpp @@ -0,0 +1,132 @@ +#include "opencv2/slam/matcher.hpp" + +namespace cv { +namespace vo { + +Matcher::Matcher(float ratio) + : ratio_(ratio), bf_(NORM_HAMMING) +{ +} + +void Matcher::knnMatch(const Mat &desc1, const Mat &desc2, std::vector &goodMatches) +{ + goodMatches.clear(); + if(desc1.empty() || desc2.empty()) return; + std::vector> knn; + bf_.knnMatch(desc1, desc2, knn, 2); + for(size_t i=0;i &kps1, const std::vector &kps2, + std::vector &goodMatches, + int imgCols, int imgRows, + int bucketRows, int bucketCols, int topKPerBucket, + int maxTotal, bool enableMutual, bool enableBucketing) +{ + goodMatches.clear(); + if(desc1.empty() || desc2.empty()) return; + // 1) knn match desc1 -> desc2 and apply ratio test + std::vector> knn12; + bf_.knnMatch(desc1, desc2, knn12, 2); + const int n1 = static_cast(knn12.size()); + + std::vector best12(n1, -1); // best trainIdx for each query if ratio test passed + for(int i=0;i best21; // only filled if enableMutual + int n2 = 0; + if(enableMutual){ + std::vector> knn21; + bf_.knnMatch(desc2, desc1, knn21, 1); + n2 = static_cast(knn21.size()); + best21.assign(n2, -1); + for(int j=0;j 0 ? desc2.rows : 0; + } + + // 3) Collect candidate matches according to whether mutual check is enabled + struct Cand { DMatch m; float x; float y; }; + std::vector candidates; + candidates.reserve(n1); + for(int i=0;i= n2) continue; + if(best21[j] != i) continue; // mutual check + } + // safe distance value from knn12 + float dist = (knn12[i].size() ? knn12[i][0].distance : 0.0f); + DMatch dm(i, j, dist); + // keypoint location on current frame (kps2) + float x = 0, y = 0; + if(j >= 0 && j < (int)kps2.size()){ x = kps2[j].pt.x; y = kps2[j].pt.y; } + candidates.push_back({dm, x, y}); + } + + if(candidates.empty()) return; + + // 4) Spatial bucketing on current-frame locations to promote spatially-distributed matches + std::vector interimMatches; + interimMatches.reserve(candidates.size()); + if(enableBucketing){ + int cols = std::max(1, bucketCols); + int rows = std::max(1, bucketRows); + float cellW = (imgCols > 0) ? (float)imgCols / cols : 1.0f; + float cellH = (imgRows > 0) ? (float)imgRows / rows : 1.0f; + + // buckets: for each cell keep topKPerBucket smallest-distance matches + std::vector> buckets(cols * rows); + for(const auto &c: candidates){ + int bx = 0, by = 0; + if(cellW > 0) bx = std::min(cols - 1, std::max(0, (int)std::floor(c.x / cellW))); + if(cellH > 0) by = std::min(rows - 1, std::max(0, (int)std::floor(c.y / cellH))); + int idx = by * cols + bx; + buckets[idx].push_back(c); + } + + // sort each bucket and take top K + for(auto &vec: buckets){ + if(vec.empty()) continue; + std::sort(vec.begin(), vec.end(), [](const Cand &a, const Cand &b){ return a.m.distance < b.m.distance; }); + int take = std::min((int)vec.size(), topKPerBucket); + for(int i=0;i 0 && (int)interimMatches.size() > maxTotal){ + std::nth_element(interimMatches.begin(), interimMatches.begin() + maxTotal, interimMatches.end(), + [](const DMatch &a, const DMatch &b){ return a.distance < b.distance; }); + interimMatches.resize(maxTotal); + } + + // final sort by distance + std::sort(interimMatches.begin(), interimMatches.end(), [](const DMatch &a, const DMatch &b){ return a.distance < b.distance; }); + goodMatches = std::move(interimMatches); +} + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/src/optimizer.cpp b/modules/slam/src/optimizer.cpp new file mode 100644 index 0000000000..b1e037e142 --- /dev/null +++ b/modules/slam/src/optimizer.cpp @@ -0,0 +1,370 @@ +#include "opencv2/slam/optimizer.hpp" +#include +#include +#include +#include +#include + +// If g2o is enabled (CMake defines HAVE_G2O), compile and use the g2o-based +// implementation. Otherwise fall back to a simplified OpenCV-based implementation. + +#if defined(HAVE_G2O) +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#elif defined(HAVE_SFM) +#include "opencv2/sfm.hpp" +#endif + +#include + +namespace cv { +namespace vo { + +Optimizer::Optimizer() {} + +#if defined(HAVE_G2O) +void Optimizer::localBundleAdjustmentG2O( + std::vector &keyframes, + std::vector &mappoints, + const std::vector &localKfIndices, + const std::vector &fixedKfIndices, + double fx, double fy, double cx, double cy, + int iterations) { + + if(localKfIndices.empty()) return; + + typedef g2o::BlockSolver< g2o::BlockSolverTraits<6,3> > Block; + auto linearSolver = std::make_unique>(); + auto blockSolver = std::make_unique(std::move(linearSolver)); + auto solver = new g2o::OptimizationAlgorithmLevenberg(std::move(blockSolver)); + + g2o::SparseOptimizer optimizer; + optimizer.setAlgorithm(solver); + optimizer.setVerbose(false); + + // Camera parameter (id = 0) + g2o::CameraParameters* cam = new g2o::CameraParameters(fx, Eigen::Vector2d(cx, cy), 0); + cam->setId(0); + optimizer.addParameter(cam); + + // Poses + std::map poseVertices; + for (int idx : localKfIndices) { + if (idx < 0 || idx >= static_cast(keyframes.size())) continue; + KeyFrame &kf = keyframes[idx]; + Eigen::Matrix3d R; + cv2eigen(kf.R_w, R); + Eigen::Vector3d t; + cv2eigen(kf.t_w, t); + g2o::SE3Quat pose(R, t); + auto *vSE3 = new g2o::VertexSE3Expmap(); + vSE3->setId(idx); + vSE3->setEstimate(pose); + if (std::find(fixedKfIndices.begin(), fixedKfIndices.end(), idx) != fixedKfIndices.end()) vSE3->setFixed(true); + optimizer.addVertex(vSE3); + poseVertices[idx] = vSE3; + } + + // Points + const int POINT_ID_OFFSET = 10000; + std::vector mpVertexIds(mappoints.size(), -1); + for (size_t i = 0; i < mappoints.size(); ++i) { + const MapPoint &mp = mappoints[i]; + auto *vPoint = new g2o::VertexPointXYZ(); + vPoint->setEstimate(Eigen::Vector3d(mp.p.x, mp.p.y, mp.p.z)); + int vid = POINT_ID_OFFSET + static_cast(i); + vPoint->setId(vid); + vPoint->setMarginalized(true); + optimizer.addVertex(vPoint); + mpVertexIds[i] = vid; + } + + // Observations / edges + for (size_t i = 0; i < mappoints.size(); ++i) { + const MapPoint &mp = mappoints[i]; + int pid = mpVertexIds[i]; + if (pid < 0) continue; + for (const auto &obs : mp.observations) { + int kfIdx = obs.first; + int kpIdx = obs.second; + if (poseVertices.find(kfIdx) == poseVertices.end()) continue; + if (kfIdx < 0 || kfIdx >= static_cast(keyframes.size())) continue; + KeyFrame &kf = keyframes[kfIdx]; + if (kpIdx < 0 || kpIdx >= static_cast(kf.kps.size())) continue; + + const KeyPoint &kp = kf.kps[kpIdx]; + Eigen::Vector2d meas(kp.pt.x, kp.pt.y); + + auto *edge = new g2o::EdgeProjectXYZ2UV(); + edge->setVertex(0, optimizer.vertex(pid)); + edge->setVertex(1, optimizer.vertex(kfIdx)); + edge->setMeasurement(meas); + edge->information() = Eigen::Matrix2d::Identity(); + edge->setParameterId(0, cam->id()); + auto *rk = new g2o::RobustKernelHuber(); + rk->setDelta(1.0); + edge->setRobustKernel(rk); + optimizer.addEdge(edge); + } + } + + optimizer.initializeOptimization(); + optimizer.optimize(iterations); + + // Write back poses + for (auto &kv : poseVertices) { + int idx = kv.first; + g2o::VertexSE3Expmap* v = kv.second; + g2o::SE3Quat est = v->estimate(); + Eigen::Matrix3d R = est.rotation().toRotationMatrix(); + Eigen::Vector3d t = est.translation(); + Mat Rcv, tcv; + eigen2cv(R, Rcv); + eigen2cv(t, tcv); + keyframes[idx].R_w = Rcv.clone(); + keyframes[idx].t_w = tcv.clone(); + } + + // Write back points + for (size_t i = 0; i < mappoints.size(); ++i) { + int pid = mpVertexIds[i]; + if (pid < 0) continue; + auto *v = dynamic_cast(optimizer.vertex(pid)); + if (!v) continue; + Eigen::Vector3d p = v->estimate(); + mappoints[i].p.x = p[0]; + mappoints[i].p.y = p[1]; + mappoints[i].p.z = p[2]; + } +} +#endif +#if defined(HAVE_SFM) +void Optimizer::localBundleAdjustmentSFM( + std::vector &keyframes, + std::vector &mappoints, + const std::vector &localKfIndices, + const std::vector &fixedKfIndices, + double fx, double fy, double cx, double cy, + int iterations) { + // Simple SFM-based local bundle adjustment using OpenCV routines. + // This implementation alternates: + // - point-only Gauss-Newton updates (poses kept fixed) + // - pose-only optimization using PnP (points kept fixed) + // It uses available jacobians w.r.t. point coordinates from + // computeReprojectionError and the existing optimizePose() helper + // to avoid depending on g2o / ceres. This provides an effective + // local refinement for SLAM windows when full BA backends are + // not available. + + if (localKfIndices.empty() || mappoints.empty()) return; + + // Fast lookup for whether a keyframe id is in the local window + std::unordered_set localSet(localKfIndices.begin(), localKfIndices.end()); + std::unordered_set fixedSet(fixedKfIndices.begin(), fixedKfIndices.end()); + + const double HUBER_DELTA = 3.0; // pixels + const double MAX_POINT_STEP = 1.0; // meters (safeguard per-iter) + + // Outer iterations: alternate point updates and pose updates + for (int iter = 0; iter < iterations; ++iter) { + // --- Point-only Gauss-Newton update (poses fixed) --- + for (size_t pid = 0; pid < mappoints.size(); ++pid) { + MapPoint &mp = mappoints[pid]; + if (mp.isBad) continue; + + Mat JtJ = Mat::zeros(3, 3, CV_64F); + Mat Jtr = Mat::zeros(3, 1, CV_64F); + int obsCount = 0; + + for (const auto &obs : mp.observations) { + int kfIdx = obs.first; + int kpIdx = obs.second; + if (localSet.find(kfIdx) == localSet.end()) continue; + if (kfIdx < 0 || kfIdx >= static_cast(keyframes.size())) continue; + KeyFrame &kf = keyframes[kfIdx]; + if (kpIdx < 0 || kpIdx >= static_cast(kf.kps.size())) continue; + + Point2f observed = kf.kps[kpIdx].pt; + // Project current point into this keyframe + Point2f proj = project(mp.p, kf.R_w, kf.t_w, fx, fy, cx, cy); + if (proj.x < 0 || proj.y < 0) continue; + // Compute jacobians (fills jacobianPoint) + Mat jacPose, jacPoint; + computeReprojectionError(mp.p, kf.R_w, kf.t_w, observed, fx, fy, cx, cy, jacPose, jacPoint); + // jacPoint is 2x3, residual r = [u - u_obs; v - v_obs] + Mat r = (Mat_(2,1) << proj.x - observed.x, proj.y - observed.y); + + // robust weight (Huber) + double err = std::sqrt(r.at(0,0)*r.at(0,0) + r.at(1,0)*r.at(1,0)); + double w = 1.0; + if (err > HUBER_DELTA) w = HUBER_DELTA / err; + + // accumulate JtJ and Jtr + Mat Jt = jacPoint.t(); // 3x2 + Mat W = Mat::eye(2,2,CV_64F) * w; + Mat JtW = Jt * W; // 3x2 + JtJ += JtW * jacPoint; // 3x3 + Jtr += JtW * r; // 3x1 + obsCount++; + } + + if (obsCount < 2) continue; // not enough constraints + + // Solve for delta using SVD for stability + Mat delta; // 3x1 + bool solved = false; + if (cv::determinant(JtJ) != 0) { + solved = cv::solve(JtJ, -Jtr, delta, cv::DECOMP_SVD); + } else { + solved = cv::solve(JtJ, -Jtr, delta, cv::DECOMP_SVD); + } + if (!solved) continue; + + double step = std::sqrt(delta.at(0,0)*delta.at(0,0) + + delta.at(1,0)*delta.at(1,0) + + delta.at(2,0)*delta.at(2,0)); + if (step > MAX_POINT_STEP) { + double scale = MAX_POINT_STEP / step; + delta *= scale; + } + + mp.p.x += delta.at(0,0); + mp.p.y += delta.at(1,0); + mp.p.z += delta.at(2,0); + } + + // --- Pose-only optimization (points fixed) --- + // For each local keyframe that is not fixed, call optimizePose + for (int kfId : localKfIndices) { + if (fixedSet.find(kfId) != fixedSet.end()) continue; + if (kfId < 0 || kfId >= static_cast(keyframes.size())) continue; + KeyFrame &kf = keyframes[kfId]; + // Build matchedMpIndices: for each keypoint in this KF, which mappoint index it corresponds to + std::vector matchedMpIndices(kf.kps.size(), -1); + for (size_t mpIdx = 0; mpIdx < mappoints.size(); ++mpIdx) { + const MapPoint &mp = mappoints[mpIdx]; + if (mp.isBad) continue; + for (const auto &obs : mp.observations) { + if (obs.first == kfId) { + int kpIdx = obs.second; + if (kpIdx >= 0 && kpIdx < static_cast(matchedMpIndices.size())) + matchedMpIndices[kpIdx] = static_cast(mpIdx); + } + } + } + std::vector inliers; + // Use the same number of iterations as outer loop for RANSAC attempts + optimizePose(kf, mappoints, matchedMpIndices, fx, fy, cx, cy, inliers, std::max(20, iterations)); + } + } +} +#endif + +bool Optimizer::optimizePose( + KeyFrame &kf, + const std::vector &mappoints, + const std::vector &matchedMpIndices, + double fx, double fy, double cx, double cy, + std::vector &inliers, + int iterations) { + + if(matchedMpIndices.empty()) return false; + inliers.assign(matchedMpIndices.size(), false); + + std::vector objectPoints; + std::vector imagePoints; + for(size_t i = 0; i < matchedMpIndices.size(); ++i) { + int mpIdx = matchedMpIndices[i]; + if(mpIdx < 0 || mpIdx >= static_cast(mappoints.size())) continue; + const MapPoint &mp = mappoints[mpIdx]; + if(mp.isBad) continue; + if(i < kf.kps.size()) { + objectPoints.emplace_back(mp.p.x, mp.p.y, mp.p.z); + imagePoints.push_back(kf.kps[i].pt); + } + } + if(objectPoints.size() < 4) return false; + + Mat K = (Mat_(3,3) << fx, 0, cx, 0, fy, cy, 0, 0, 1); + Mat rvec, tvec, inliersMask; + bool success = solvePnPRansac(objectPoints, imagePoints, K, Mat(), rvec, tvec, + false, iterations, 2.0, 0.99, inliersMask); + if(!success) return false; + Mat R; + Rodrigues(rvec, R); + kf.R_w = R; + kf.t_w = tvec; + for(int i = 0; i < inliersMask.rows && i < static_cast(inliers.size()); ++i) + inliers[i] = (inliersMask.at(i,0) != 0); + + return true; +} + +#if defined(HAVE_SFM) +void Optimizer::globalBundleAdjustmentSFM( + std::vector &keyframes, + std::vector &mappoints, + double fx, double fy, double cx, double cy, + int iterations) { + + std::cout << "Optimizer: Global BA with " << keyframes.size() + << " KFs and " << mappoints.size() << " map points" << std::endl; + std::vector localKfIndices; + for(size_t i = 1; i < keyframes.size(); ++i) localKfIndices.push_back(static_cast(i)); + std::vector fixedKfIndices = {0}; + localBundleAdjustmentSFM(keyframes, mappoints, localKfIndices, fixedKfIndices, fx, fy, cx, cy, iterations); + std::cout << "Optimizer: Global BA completed" << std::endl; +} +#endif + +double Optimizer::computeReprojectionError( + const Point3d &point3D, + const Mat &R, const Mat &t, + const Point2f &observed, + double fx, double fy, double cx, double cy, + Mat &jacobianPose, + Mat &jacobianPoint) { + + Mat Xw = (Mat_(3,1) << point3D.x, point3D.y, point3D.z); + Mat Xc = R.t() * (Xw - t); + double x = Xc.at(0,0), y = Xc.at(1,0), z = Xc.at(2,0); + if(z <= 0) return std::numeric_limits::max(); + double u = fx * (x / z) + cx; + double v = fy * (y / z) + cy; + double du = u - observed.x, dv = v - observed.y; + jacobianPose = Mat::zeros(2,6,CV_64F); + jacobianPoint = Mat::zeros(2,3,CV_64F); + double invZ = 1.0 / z; double invZ2 = invZ * invZ; + jacobianPoint.at(0,0) = fx * invZ; + jacobianPoint.at(0,1) = 0; + jacobianPoint.at(0,2) = -fx * x * invZ2; + jacobianPoint.at(1,0) = 0; + jacobianPoint.at(1,1) = fy * invZ; + jacobianPoint.at(1,2) = -fy * y * invZ2; + return std::sqrt(du*du + dv*dv); +} + +Point2f Optimizer::project( + const Point3d &point3D, + const Mat &R, const Mat &t, + double fx, double fy, double cx, double cy) { + + Mat Xw = (Mat_(3,1) << point3D.x, point3D.y, point3D.z); + Mat Xc = R.t() * (Xw - t); + double z = Xc.at(2,0); + if(z <= 0) return Point2f(-1,-1); + float u = static_cast(fx * (Xc.at(0,0) / z) + cx); + float v = static_cast(fy * (Xc.at(1,0) / z) + cy); + return Point2f(u,v); +} + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/src/pose.cpp b/modules/slam/src/pose.cpp new file mode 100644 index 0000000000..728e27f313 --- /dev/null +++ b/modules/slam/src/pose.cpp @@ -0,0 +1,43 @@ +#include "opencv2/slam/pose.hpp" +#include + +namespace cv { +namespace vo { + + bool PoseEstimator::estimate(const std::vector &pts1, + const std::vector &pts2, + double fx, double fy, double cx, double cy, + Mat &R, Mat &t, Mat &mask, int &inliers) +{ + if(pts1.size() < 8 || pts2.size() < 8) { inliers = 0; return false; } + double focal = (fx + fy) * 0.5; + Point2d pp(cx, cy); + // If provided principal point looks invalid (e.g. zeros or tiny values), + // fall back to the approximate center of the matched points' bounding box. + // This is better than leaving (0,0) which can break essential-matrix estimation. + if((pp.x <= 2.0 || pp.y <= 2.0) && !pts1.empty()){ + float minx = std::numeric_limits::max(); + float miny = std::numeric_limits::max(); + float maxx = std::numeric_limits::lowest(); + float maxy = std::numeric_limits::lowest(); + for(size_t i=0;i +#include +#include + +namespace cv { +namespace vo { + +Tracker::Tracker() + : feat_(), matcher_(), poseEst_(), frame_id_(0) +{ +} + +bool Tracker::processFrame(const Mat &gray, const std::string &imagePath, Mat &imgOut, Mat &R_out, Mat &t_out, std::string &info) +{ + if(gray.empty()) return false; + // detect + std::vector kps; + Mat desc; + feat_.detectAndCompute(gray, kps, desc); + + if(!prevGray_.empty() && !prevDesc_.empty() && !desc.empty()){ + // match + std::vector goodMatches; + matcher_.knnMatch(prevDesc_, desc, goodMatches); + + // draw matches for visualization + drawMatches(prevGray_, prevKp_, gray, kps, goodMatches, imgOut, + Scalar::all(-1), Scalar::all(-1), std::vector(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS); + + // prepare points + std::vector pts1, pts2; + for(const auto &m: goodMatches){ + pts1.push_back(prevKp_[m.queryIdx].pt); + pts2.push_back(kps[m.trainIdx].pt); + } + + if(pts1.size() >= 8){ + Mat R, t, mask; + int inliers = 0; + // Note: we don't have intrinsics here; caller should provide via global or arguments. For now, caller will use PoseEstimator directly if needed. + // We'll estimate using default focal/pp later (caller will adapt). Return false for now so caller can invoke PoseEstimator separately. + // But to keep compatibility, leave R_out/t_out empty and set info. + info = "matches=" + std::to_string(goodMatches.size()) + ", inliers=" + std::to_string(inliers); + // update prev buffers below + } else { + info = "matches=" + std::to_string(goodMatches.size()) + ", inliers=0"; + } + } else { + // first frame: draw keypoints + drawKeypoints(gray, kps, imgOut, Scalar::all(-1), DrawMatchesFlags::DRAW_RICH_KEYPOINTS); + info = "first_frame"; + } + + // update prev + prevGray_ = gray.clone(); + prevKp_ = kps; + prevDesc_ = desc.clone(); + frame_id_++; + return true; +} + +Visualizer::Visualizer(int W, int H, double meters_per_pixel) + : W_(W), H_(H), meters_per_pixel_(meters_per_pixel), mapSize_(W,H) +{ + map_ = Mat::zeros(mapSize_, CV_8UC3); +} + +Point Visualizer::worldToPixel(const Point2d &p) const { + Point2d origin(mapSize_.width/2.0, mapSize_.height/2.0); + int x = int(origin.x + p.x / meters_per_pixel_); + int y = int(origin.y - p.y / meters_per_pixel_); + return Point(x,y); +} + +void Visualizer::addPose(double x, double z){ + traj_.emplace_back(x,z); +} + +void Visualizer::showFrame(const Mat &frame){ + if(frame.empty()) return; + // Do not draw heading overlay on video frames; only show raw frame. + imshow("frame", frame); +} + +void Visualizer::showTopdown(){ + map_ = Mat::zeros(mapSize_, CV_8UC3); + for (int gx = 0; gx < mapSize_.width; gx += 50) line(map_, Point(gx,0), Point(gx,mapSize_.height), Scalar(30,30,30), 1); + for (int gy = 0; gy < mapSize_.height; gy += 50) line(map_, Point(0,gy), Point(mapSize_.width,gy), Scalar(30,30,30), 1); + for(size_t i=1;i= 2){ + int K = std::min(5, traj_.size()-1); + double dx = 0.0, dz = 0.0; + for(int i=0;i 1e-6){ + dx /= norm; dz /= norm; + // arrow length in world meters + double arrow_m = 0.5; // 0.5 meters + // tail is behind the current position by arrow_m, head (tip) at current position + Point2d tail_world(traj_.back().x - dx * arrow_m, traj_.back().y - dz * arrow_m); + Point tail_px = worldToPixel(tail_world); + arrowedLine(map_, tail_px, p, Scalar(0,0,255), 2, LINE_AA, 0, 0.3); + } + } + // label near current position + putText(map_, "Robot", p + Point(10,-10), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(255,255,255), 1); + } + imshow("topdown", map_); +} + +bool Visualizer::saveTrajectory(const std::string &path){ + if(map_.empty()) showTopdown(); + return imwrite(path, map_); +} + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/src/vo.cpp b/modules/slam/src/vo.cpp new file mode 100644 index 0000000000..c1ac1e09a9 --- /dev/null +++ b/modules/slam/src/vo.cpp @@ -0,0 +1,513 @@ +#include "opencv2/slam/data_loader.hpp" +#include "opencv2/slam/feature.hpp" +#include "opencv2/slam/initializer.hpp" +#include "opencv2/slam/keyframe.hpp" +#include "opencv2/slam/localizer.hpp" +#include "opencv2/slam/map.hpp" +#include "opencv2/slam/matcher.hpp" +#include "opencv2/slam/optimizer.hpp" +#include "opencv2/slam/pose.hpp" +#include "opencv2/slam/visualizer.hpp" +#include "opencv2/slam/vo.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cv { +namespace vo { + +VisualOdometry::VisualOdometry(Ptr feature, Ptr matcher) + : feature_(std::move(feature)), matcher_(std::move(matcher)) { +} + +int VisualOdometry::run(const std::string &imageDir, double scale_m, const VisualOdometryOptions &options){ + DataLoader loader(imageDir); + std::cout << "VisualOdometry: loaded " << loader.size() << " images from " << imageDir << std::endl; + if(loader.size() == 0){ + std::cerr << "VisualOdometry: no images found in " << imageDir << std::endl; + return -1; + } + + if(!feature_){ + feature_ = ORB::create(2000); + } + if(!matcher_){ + matcher_ = DescriptorMatcher::create(DescriptorMatcher::BRUTEFORCE_HAMMING); + } + // Remove internal FeatureExtractor/Matcher in favor of injected OpenCV components + PoseEstimator poseEst; + Visualizer vis; + MapManager map; + // two-view initializer + Initializer initializer; + // configure Localizer with a slightly stricter Lowe ratio (0.7) + Localizer localizer(0.7f); + + // prepare per-run CSV diagnostics + std::string runTimestamp; + { + auto now = std::chrono::system_clock::now(); + std::time_t t = std::chrono::system_clock::to_time_t(now); + std::tm tm = *std::localtime(&t); + std::ostringstream ss; ss << std::put_time(&tm, "%Y%m%d_%H%M%S"); + runTimestamp = ss.str(); + } + // create a 'result' folder inside the provided imageDir (portable, avoids requiring ) + std::string resultDirStr = imageDir; + if(resultDirStr.empty()) resultDirStr = std::string("."); + if(resultDirStr.back() == '/') resultDirStr.pop_back(); + resultDirStr += "/result"; + ensureDirectoryExists(resultDirStr); + // create a per-run folder under result/ named by timestamp + std::string runDirStr = resultDirStr + "/" + runTimestamp; + ensureDirectoryExists(runDirStr); + std::string csvPath = runDirStr + "/run.csv"; + std::ofstream csv(csvPath); + if(csv){ + csv << "frame_id,mean_diff,median_flow,pre_matches,post_matches,inliers,inlier_ratio,integrated\n"; + csv.flush(); + std::cout << "Writing diagnostics to " << csvPath << std::endl; + } else { + std::cerr << "Failed to open diagnostics CSV " << csvPath << std::endl; + } + + Mat R_g = Mat::eye(3,3,CV_64F); + Mat t_g = Mat::zeros(3,1,CV_64F); + + // simple map structures + std::vector keyframes; + std::vector mappoints; + std::unordered_map keyframeIdToIndex; + + // Backend (BA) thread primitives + std::mutex mapMutex; // protects map and keyframe modifications and writeback + std::condition_variable backendCv; + std::atomic backendStop(false); + std::atomic backendRequests(0); + const int LOCAL_BA_WINDOW = 5; // window size for local BA (adjustable) + + // Start backend thread: waits for notifications and runs BA on a snapshot + std::thread backendThread([&]() { + while(!backendStop.load()){ + std::unique_lock lk(mapMutex); + backendCv.wait(lk, [&]{ return backendStop.load() || backendRequests.load() > 0; }); + if(backendStop.load()) break; + // snapshot map and keyframes + auto kfs_snapshot = map.keyframes(); + auto mps_snapshot = map.mappoints(); + // reset requests + backendRequests.store(0); + lk.unlock(); + + // determine local window + int K = static_cast(kfs_snapshot.size()); + if(K <= 0) continue; + int start = std::max(0, K - LOCAL_BA_WINDOW); + std::vector localKfIndices; + for(int ii = start; ii < K; ++ii) localKfIndices.push_back(ii); + std::vector fixedKfIndices; + if(start > 0) fixedKfIndices.push_back(0); + #if defined(HAVE_SFM) + // Run BA on snapshot (may take time) - uses Optimizer which will use g2o if enabled + Optimizer::localBundleAdjustmentSFM(kfs_snapshot, mps_snapshot, localKfIndices, fixedKfIndices, + loader.fx(), loader.fy(), loader.cx(), loader.cy(), 10); + #endif + // write back optimized poses/points into main map under lock using id-based lookup + std::lock_guard lk2(mapMutex); + auto &kfs_ref = const_cast&>(map.keyframes()); + auto &mps_ref = const_cast&>(map.mappoints()); + // copy back poses by id to ensure we update the authoritative containers + for(const auto &kf_opt : kfs_snapshot){ + int idx = map.keyframeIndex(kf_opt.id); + if(idx >= 0 && idx < static_cast(kfs_ref.size())){ + kfs_ref[idx].R_w = kf_opt.R_w.clone(); + kfs_ref[idx].t_w = kf_opt.t_w.clone(); + } + } + // copy back mappoint positions by id + for(const auto &mp_opt : mps_snapshot){ + if(mp_opt.id <= 0) continue; + int idx = map.mapPointIndex(mp_opt.id); + if(idx >= 0 && idx < static_cast(mps_ref.size())){ + mps_ref[idx].p = mp_opt.p; + } + } + } + }); + + Mat frame; + std::string imgPath; + int frame_id = 0; + + // persistent previous-frame storage (declare outside loop so detectAndCompute can use them) + static std::vector prevKp; + static Mat prevGray, prevDesc; + while(loader.getNextImage(frame, imgPath)){ + Mat gray = frame; + if(gray.channels() > 1) cvtColor(gray, gray, COLOR_BGR2GRAY); + + std::vector kps; + Mat desc; + // Detect and compute descriptors using injected feature_ (e.g., ORB) + feature_->detect(gray, kps); + feature_->compute(gray, kps, desc); + + // (previous-frame storage declared outside loop) + + if(!prevGray.empty() && !prevDesc.empty() && !desc.empty()){ + // KNN match with ratio test and mutual cross-check + std::vector> knn12, knn21; + matcher_->knnMatch(prevDesc, desc, knn12, 2); + matcher_->knnMatch(desc, prevDesc, knn21, 2); + auto ratioKeep = [&](const std::vector>& knn, bool forward) { + std::vector filtered; + for(size_t qi=0; qi= 2){ + if(knn[qi][1].distance > 0) { + if(best.distance / knn[qi][1].distance > ratio) continue; + } + } + // mutual check + int q = forward ? (int)qi : best.trainIdx; + int t = forward ? best.trainIdx : (int)qi; + // find reverse match for t + const auto &rev = forward ? knn21 : knn12; + if(t < 0 || t >= (int)rev.size() || rev[t].empty()) continue; + DMatch rbest = rev[t][0]; + if((forward && rbest.trainIdx == (int)qi) || (!forward && rbest.trainIdx == best.queryIdx)){ + filtered.push_back(best); + } + } + return filtered; + }; + std::vector goodMatches = ratioKeep(knn12, true); + + Mat imgMatches; + drawMatches(prevGray, prevKp, gray, kps, goodMatches, imgMatches, + Scalar::all(-1), Scalar::all(-1), std::vector(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS); + + std::vector pts1, pts2; + pts1.reserve(goodMatches.size()); pts2.reserve(goodMatches.size()); + for(const auto &m: goodMatches){ + pts1.push_back(prevKp[m.queryIdx].pt); + pts2.push_back(kps[m.trainIdx].pt); + } + + // quick frame-diff to detect near-static frames + double meanDiff = 0.0; + if(!prevGray.empty()){ + Mat diff; absdiff(gray, prevGray, diff); + meanDiff = mean(diff)[0]; + } + + // compute per-match flow magnitudes and median flow + std::vector flows; flows.reserve(pts1.size()); + double median_flow = 0.0; + for(size_t i=0;i tmp = flows; + size_t mid = tmp.size()/2; + std::nth_element(tmp.begin(), tmp.begin()+mid, tmp.end()); + median_flow = tmp[mid]; + } + + int pre_matches = static_cast(goodMatches.size()); + int post_matches = pre_matches; + + // Two-view initialization: if map is empty and this is the second frame, try to bootstrap + if(map.keyframes().empty() && frame_id == 1){ + std::cout << "Attempting two-view initialization (frame 0 + 1), matches=" << goodMatches.size() << std::endl; + Mat R_init, t_init; + std::vector pts3D; + std::vector isTri; + // call initializer using the matched keypoints between prev and current + if(initializer.initialize(prevKp, kps, goodMatches, loader.fx(), loader.fy(), loader.cx(), loader.cy(), R_init, t_init, pts3D, isTri)){ + std::cout << "Initializer: success, creating initial keyframes and mappoints (" << pts3D.size() << ")" << std::endl; + // build two keyframes: previous (id = frame_id-1) and current (id = frame_id) + KeyFrame kf0, kf1; + kf0.id = frame_id - 1; + // prevGray/prevKp/prevDesc refer to previous frame + if(!prevGray.empty()){ + if(prevGray.channels() == 1){ cvtColor(prevGray, kf0.image, COLOR_GRAY2BGR); } + else kf0.image = prevGray.clone(); + } + kf0.kps = prevKp; + kf0.desc = prevDesc.clone(); + kf0.R_w = Mat::eye(3,3,CV_64F); + kf0.t_w = Mat::zeros(3,1,CV_64F); + + kf1.id = frame_id; + kf1.image = frame.clone(); + kf1.kps = kps; + kf1.desc = desc.clone(); + // initializer returns pose of second camera relative to first (world = first) + kf1.R_w = R_init.clone(); + kf1.t_w = t_init.clone(); + + // convert initializer 3D points (in first camera frame) to MapPoints in world coords (world==first) + std::vector newMps; + newMps.reserve(pts3D.size()); + for(size_t i=0;i lk(mapMutex); + keyframes.push_back(std::move(kf0)); + map.addKeyFrame(keyframes.back()); + keyframes.push_back(std::move(kf1)); + map.addKeyFrame(keyframes.back()); + if(!newMps.empty()) map.addMapPoints(newMps); + } + + // set global pose to second keyframe (apply scale) + Mat t_d; kf1.t_w.convertTo(t_d, CV_64F); + t_g = t_d * scale_m; + R_g = kf1.R_w.clone(); + double x = t_g.at(0); + double z = t_g.at(2); + vis.addPose(x,-z); + + // write CSV entry for initialization frame + if(csv){ + csv << frame_id << ",init,0,0,0,0,0,1\n"; + csv.flush(); + } + + // notify backend to run BA on initial map + backendRequests.fetch_add(1); + backendCv.notify_one(); + // skip the usual PnP / poseEst path for this frame since we've initialized + prevGray = gray.clone(); prevKp = kps; prevDesc = desc.clone(); + frame_id++; + continue; + } else { + std::cout << "Initializer: failed to initialize from first two frames" << std::endl; + } + } + + // Try PnP against map points first (via Localizer) + bool solvedByPnP = false; + Mat R_pnp, t_pnp; int inliers_pnp = 0; + int preMatches_pnp = 0, postMatches_pnp = 0; double meanReproj_pnp = 0.0; + if(localizer.tryPnP(map, desc, kps, loader.fx(), loader.fy(), loader.cx(), loader.cy(), gray.cols, gray.rows, + options.min_inliers, R_pnp, t_pnp, inliers_pnp, frame_id, &frame, runDirStr, + &preMatches_pnp, &postMatches_pnp, &meanReproj_pnp)){ + solvedByPnP = true; + std::cout << "PnP solved: preMatches="< pose estimation failed, skipping integration." << std::endl; + } else if(inliers < MIN_INLIERS || matchCount < MIN_MATCHES){ + // Not enough geometric support -> skip (unless absolute inliers pass) + integrate = false; + // std::cout << " -> insufficient matches/inliers (by both absolute and relative metrics), skipping integration." << std::endl; + } else { + // We have sufficient geometric support. Only skip if motion is truly negligible + // (both translation and rotation tiny) AND the image/flow indicate near-identical frames. + const double MIN_TRANSLATION_NORM = 1e-4; + const double MIN_ROTATION_RAD = (0.5 * CV_PI / 180.0); // 0.5 degree + const double DIFF_ZERO_THRESH = 2.0; // nearly identical image + const double FLOW_ZERO_THRESH = 0.3; // nearly zero flow in pixels + + if(t_norm < MIN_TRANSLATION_NORM && std::abs(rot_angle) < MIN_ROTATION_RAD + && meanDiff < DIFF_ZERO_THRESH && median_flow < FLOW_ZERO_THRESH){ + integrate = false; // truly static + // std::cout << " -> negligible motion and near-identical frames, skipping integration." << std::endl; + } + } + if (inliers >= options.min_inliers || (inliers >= 2 && matchCount > 50 && median_flow > 2.0)) { + integrate = true; + } + + // integrate transform if allowed + if(integrate){ + Mat t_d; t.convertTo(t_d, CV_64F); + Mat t_scaled = t_d * scale_m; + Mat R_d; R.convertTo(R_d, CV_64F); + t_g = t_g + R_g * t_scaled; + R_g = R_g * R_d; + double x = t_g.at(0); + double z = t_g.at(2); + vis.addPose(x,-z); + } + + // if we integrated, create a keyframe and optionally triangulate new map points + if(integrate){ + KeyFrame kf; + kf.id = frame_id; + kf.image = frame.clone(); + kf.kps = kps; + kf.desc = desc.clone(); + kf.R_w = R_g.clone(); kf.t_w = t_g.clone(); + + bool didTriangulate = false; + if(!map.keyframes().empty() && map.keyframes().back().id == frame_id - 1){ + // triangulate between last keyframe and this frame using normalized coordinates + const KeyFrame &last = map.keyframes().back(); + std::vector pts1n, pts2n; pts1n.reserve(pts1.size()); pts2n.reserve(pts2.size()); + double fx = loader.fx(), fy = loader.fy(), cx = loader.cx(), cy = loader.cy(); + for(size_t i=0;i pts1_kp_idx; pts1_kp_idx.reserve(goodMatches.size()); + std::vector pts2_kp_idx; pts2_kp_idx.reserve(goodMatches.size()); + for(const auto &m: goodMatches){ pts1_kp_idx.push_back(m.queryIdx); pts2_kp_idx.push_back(m.trainIdx); } + auto newPts = map.triangulateBetweenLastTwo(pts1n, pts2n, pts1_kp_idx, pts2_kp_idx, last, keyframes.empty() ? kf : keyframes.back(), fx, fy, cx, cy); + if(!newPts.empty()){ + didTriangulate = true; + // already appended inside MapManager + } + } + + { + // insert keyframe and map points under lock to keep consistent state + std::lock_guard lk(mapMutex); + keyframes.push_back(std::move(kf)); + map.addKeyFrame(keyframes.back()); + } + if(didTriangulate){ + std::cout << "Created keyframe " << frame_id << " and triangulated new map points (total=" << map.mappoints().size() << ")" << std::endl; + } else { + std::cout << "Created keyframe " << frame_id << " (no triangulation)" << std::endl; + } + // Notify backend thread to run local BA asynchronously + backendRequests.fetch_add(1); + backendCv.notify_one(); + } + + // write CSV line + if(csv){ + csv << frame_id << "," << meanDiff << "," << median_flow << "," << pre_matches << "," << post_matches << "," << inliers << "," << inlierRatio << "," << (integrate?1:0) << "\n"; + csv.flush(); + } + + // Always show a single image; if we have matches, draw small boxes around matched keypoints + Mat visImg; + if(frame.channels() > 1) visImg = frame.clone(); + else cvtColor(gray, visImg, COLOR_GRAY2BGR); + std::string info = std::string("Frame ") + std::to_string(frame_id) + " matches=" + std::to_string(matchCount) + " inliers=" + std::to_string(inliers); + if(!goodMatches.empty()){ + for(size_t mi=0; mi(goodMatches.size())){ + isInlier = mask.at(static_cast(mi), 0) != 0; + } else if(mask.cols == static_cast(goodMatches.size())){ + isInlier = mask.at(0, static_cast(mi)) != 0; + } + } + Scalar col = isInlier ? Scalar(0,255,0) : Scalar(0,0,255); + Point ip(cvRound(p2.x), cvRound(p2.y)); + Rect r(ip - Point(4,4), Size(8,8)); + rectangle(visImg, r, col, 2, LINE_AA); + } + } + putText(visImg, info, Point(10,20), FONT_HERSHEY_SIMPLEX, 0.6, Scalar(0,255,0), 2); + vis.showFrame(visImg); + + } else { + vis.showFrame(gray); + } + } else { + Mat visFrame; drawKeypoints(gray, kps, visFrame, Scalar::all(-1), DrawMatchesFlags::DRAW_RICH_KEYPOINTS); + vis.showFrame(visFrame); + } + + vis.showTopdown(); + // update prev + prevGray = gray.clone(); prevKp = kps; prevDesc = desc.clone(); + frame_id++; + char key = (char)waitKey(1); + if(key == 27) break; + } + + // save trajectory with timestamp into result/ folder + try{ + // save trajectory into the per-run folder using a simple filename (no timestamp) + std::string outDir = resultDirStr + "/" + runTimestamp; + ensureDirectoryExists(outDir); + std::string outPath = outDir + "/trajectory.png"; + if(vis.saveTrajectory(outPath)){ + std::cout << "Saved trajectory to " << outPath << std::endl; + } else { + std::cerr << "Failed to save trajectory to " << outPath << std::endl; + } + } catch(const std::exception &e){ + std::cerr << "Error saving trajectory: " << e.what() << std::endl; + } + + // Shutdown backend thread gracefully + backendStop.store(true); + backendCv.notify_one(); + if(backendThread.joinable()) backendThread.join(); + + return 0; +} + +} // namespace vo +} // namespace cv \ No newline at end of file