simulation/src/vision/visual_odometry.py

#!/usr/bin/env python3
"""
Visual Odometry - Camera-based position estimation without GPS.
ORB/SIFT feature matching with essential matrix decomposition.
"""

import time
import cv2
import numpy as np
from scipy.spatial.transform import Rotation


class VisualOdometry:
    def __init__(self, camera_matrix=None, detector_type="ORB", min_features=100):
        self.detector_type = detector_type
        self.min_features = min_features

        if camera_matrix is not None:
            self.camera_matrix = np.array(camera_matrix).reshape(3, 3)
        else:
            self.camera_matrix = np.array(
                [[500.0, 0.0, 320.0], [0.0, 500.0, 240.0], [0.0, 0.0, 1.0]]
            )
        self.dist_coeffs = np.zeros(5)

        if detector_type == "ORB":
            self.detector = cv2.ORB_create(nfeatures=500)
            self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False)
        elif detector_type == "SIFT":
            self.detector = cv2.SIFT_create(nfeatures=500)
            self.matcher = cv2.BFMatcher(cv2.NORM_L2, crossCheck=False)
        else:
            self.detector = cv2.ORB_create(nfeatures=500)
            self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False)

        self.prev_frame = None
        self.prev_keypoints = None
        self.prev_descriptors = None
        self.prev_time = None

        self.position = np.zeros(3)
        self.orientation = np.eye(3)
        self.velocity = np.zeros(3)

        self.on_pose = None

        print(f"[VO] Visual Odometry initialized ({detector_type})")

    def process_frame(self, camera_name, frame):
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        keypoints, descriptors = self.detector.detectAndCompute(gray, None)
        current_time = time.time()

        if (
            self.prev_frame is not None
            and self.prev_descriptors is not None
            and len(keypoints) >= self.min_features
        ):
            matches = self._match_features(self.prev_descriptors, descriptors)
            if len(matches) >= self.min_features:
                self._estimate_motion(
                    self.prev_keypoints, keypoints, matches, current_time
                )

        self.prev_frame = gray
        self.prev_keypoints = keypoints
        self.prev_descriptors = descriptors
        self.prev_time = current_time

    def _match_features(self, desc1, desc2):
        if desc1 is None or desc2 is None:
            return []

        matches = self.matcher.knnMatch(desc1, desc2, k=2)
        good = []
        for pair in matches:
            if len(pair) == 2:
                m, n = pair
                if m.distance < 0.7 * n.distance:
                    good.append(m)
        return good

    def _estimate_motion(self, prev_kp, curr_kp, matches, current_time):
        if len(matches) < 5:
            return

        pts1 = np.float32([prev_kp[m.queryIdx].pt for m in matches])
        pts2 = np.float32([curr_kp[m.trainIdx].pt for m in matches])

        E, mask = cv2.findEssentialMat(
            pts1, pts2, self.camera_matrix, method=cv2.RANSAC, prob=0.999, threshold=1.0
        )

        if E is None:
            return

        _, R, t, mask = cv2.recoverPose(E, pts1, pts2, self.camera_matrix, mask=mask)

        scale = 1.0
        translation = scale * t.flatten()

        if self.prev_time is not None:
            dt = current_time - self.prev_time
            if dt > 0:
                self.velocity = translation / dt

        self.position += self.orientation @ translation
        self.orientation = R @ self.orientation

        if self.on_pose:
            r = Rotation.from_matrix(self.orientation)
            self.on_pose(self.position.copy(), r.as_quat())

    def get_pose(self):
        r = Rotation.from_matrix(self.orientation)
        return self.position.copy(), r.as_quat()

    def get_velocity(self):
        return self.velocity.copy()