diff --git a/.gitignore b/.gitignore index 4a671fe..935c47d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ /.DS_Store -detect.py +detect-car.py diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/edge-ai-car-detect.iml b/.idea/edge-ai-car-detect.iml new file mode 100644 index 0000000..8388dbc --- /dev/null +++ b/.idea/edge-ai-car-detect.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/material_theme_project_new.xml b/.idea/material_theme_project_new.xml new file mode 100644 index 0000000..f0be38b --- /dev/null +++ b/.idea/material_theme_project_new.xml @@ -0,0 +1,12 @@ + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..812ab5a --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..befe336 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/__pycache__/camera_list.cpython-38.pyc b/__pycache__/camera_list.cpython-38.pyc new file mode 100644 index 0000000..85ad382 Binary files /dev/null and b/__pycache__/camera_list.cpython-38.pyc differ diff --git a/__pycache__/camera_list_test_20.cpython-38.pyc b/__pycache__/camera_list_test_20.cpython-38.pyc new file mode 100644 index 0000000..c0f0df3 Binary files /dev/null and b/__pycache__/camera_list_test_20.cpython-38.pyc differ diff --git a/__pycache__/onnx_to_tensorrt.cpython-38.pyc b/__pycache__/onnx_to_tensorrt.cpython-38.pyc new file mode 100644 index 0000000..efc5fb6 Binary files /dev/null and b/__pycache__/onnx_to_tensorrt.cpython-38.pyc differ diff --git a/camera_list.py b/camera_list.py new file mode 100644 index 0000000..073c3c6 --- /dev/null +++ b/camera_list.py @@ -0,0 +1,22 @@ +cameras = [ + { + 'url': 'rtsp://admin:L2EC70CF@d5030edfff7a.sn.mynetname.net:554/cam/realmonitor?channel=1&subtype=1', + 'name': 'UVK Gate', + 'roi': (250, 0, 450, 400) + }, + { + 'url': 'rtsp://admin:L268C6B7@d5030edfff7a.sn.mynetname.net:556/cam/realmonitor?channel=1&subtype=1', + 'name': 'UVK Parking', + 'roi': (100, 50, 400, 400) + }, + { + 'url': 'rtsp://admin:L201353B@hcr086zs3b5.sn.mynetname.net:556/cam/realmonitor?channel=1&subtype=1', + 'name': 'LBB Rooftop', + 'roi': (150, 20, 500, 500) + }, + { + 'url': 'rtsp://user1:1234abcd@115.79.213.124:10554/streaming/channels/502', + 'name': '(HIK) PNA Tennis', + 'roi': (50, 200, 800, 250) + } + ] \ No newline at end of file diff --git a/camera_list_test_20.py b/camera_list_test_20.py new file mode 100644 index 0000000..cdccaf9 --- /dev/null +++ b/camera_list_test_20.py @@ -0,0 +1,102 @@ +cameras = [ + { + 'url': 'rtsp://admin:L2EC70CF@d5030edfff7a.sn.mynetname.net:554/cam/realmonitor?channel=1&subtype=1', + 'name': 'UVK Gate', + 'roi': (250, 0, 450, 400) + }, + { + 'url': 'rtsp://admin:L268C6B7@d5030edfff7a.sn.mynetname.net:556/cam/realmonitor?channel=1&subtype=1', + 'name': 'UVK Parking', + 'roi': (100, 50, 400, 400) + }, + { + 'url': 'rtsp://admin:L201353B@hcr086zs3b5.sn.mynetname.net:556/cam/realmonitor?channel=1&subtype=1', + 'name': 'LBB Rooftop', + 'roi': (150, 20, 500, 500) + }, + { + 'url': 'rtsp://user1:1234abcd@115.79.213.124:10554/streaming/channels/502', + 'name': '(HIK) PNA Tennis', + 'roi': (50, 200, 800, 250) + }, + { + 'url': 'rtsp://admin:L2EC70CF@d5030edfff7a.sn.mynetname.net:554/cam/realmonitor?channel=1&subtype=1', + 'name': 'UVK Gate 2', + 'roi': (250, 0, 450, 400) + }, + { + 'url': 'rtsp://admin:L268C6B7@d5030edfff7a.sn.mynetname.net:556/cam/realmonitor?channel=1&subtype=1', + 'name': 'UVK Parking 2', + 'roi': (100, 50, 400, 400) + }, + { + 'url': 'rtsp://admin:L201353B@hcr086zs3b5.sn.mynetname.net:556/cam/realmonitor?channel=1&subtype=1', + 'name': 'LBB Rooftop 2', + 'roi': (150, 20, 500, 500) + }, + { + 'url': 'rtsp://user1:1234abcd@115.79.213.124:10554/streaming/channels/502', + 'name': '(HIK) PNA Tennis 2', + 'roi': (50, 200, 800, 250) + }, + { + 'url': 'rtsp://admin:L2EC70CF@d5030edfff7a.sn.mynetname.net:554/cam/realmonitor?channel=1&subtype=1', + 'name': 'UVK Gate 3', + 'roi': (250, 0, 450, 400) + }, + { + 'url': 'rtsp://admin:L268C6B7@d5030edfff7a.sn.mynetname.net:556/cam/realmonitor?channel=1&subtype=1', + 'name': 'UVK Parking 3', + 'roi': (100, 50, 400, 400) + }, + { + 'url': 'rtsp://admin:L201353B@hcr086zs3b5.sn.mynetname.net:556/cam/realmonitor?channel=1&subtype=1', + 'name': 'LBB Rooftop 3', + 'roi': (150, 20, 500, 500) + }, + { + 'url': 'rtsp://user1:1234abcd@115.79.213.124:10554/streaming/channels/502', + 'name': '(HIK) PNA Tennis 3', + 'roi': (50, 200, 800, 250) + }, + { + 'url': 'rtsp://admin:L2EC70CF@d5030edfff7a.sn.mynetname.net:554/cam/realmonitor?channel=1&subtype=1', + 'name': 'UVK Gate 4', + 'roi': (250, 0, 450, 400) + }, + { + 'url': 'rtsp://admin:L268C6B7@d5030edfff7a.sn.mynetname.net:556/cam/realmonitor?channel=1&subtype=1', + 'name': 'UVK Parking 4', + 'roi': (100, 50, 400, 400) + }, + { + 'url': 'rtsp://admin:L201353B@hcr086zs3b5.sn.mynetname.net:556/cam/realmonitor?channel=1&subtype=1', + 'name': 'LBB Rooftop 4', + 'roi': (150, 20, 500, 500) + }, + { + 'url': 'rtsp://user1:1234abcd@115.79.213.124:10554/streaming/channels/502', + 'name': '(HIK) PNA Tennis 4', + 'roi': (50, 200, 800, 250) + }, + { + 'url': 'rtsp://admin:L2EC70CF@d5030edfff7a.sn.mynetname.net:554/cam/realmonitor?channel=1&subtype=1', + 'name': 'UVK Gate 5', + 'roi': (250, 0, 450, 400) + }, + { + 'url': 'rtsp://admin:L268C6B7@d5030edfff7a.sn.mynetname.net:556/cam/realmonitor?channel=1&subtype=1', + 'name': 'UVK Parking 5', + 'roi': (100, 50, 400, 400) + }, + { + 'url': 'rtsp://admin:L201353B@hcr086zs3b5.sn.mynetname.net:556/cam/realmonitor?channel=1&subtype=1', + 'name': 'LBB Rooftop 5', + 'roi': (150, 20, 500, 500) + }, + { + 'url': 'rtsp://user1:1234abcd@115.79.213.124:10554/streaming/channels/502', + 'name': '(HIK) PNA Tennis 5', + 'roi': (50, 200, 800, 250) + } + ] \ No newline at end of file diff --git a/detect.py b/detect.py deleted file mode 100755 index c6dd4a1..0000000 --- a/detect.py +++ /dev/null @@ -1,197 +0,0 @@ -import torch -from ultralytics import YOLO -import cv2 -import numpy as np -from collections import defaultdict -import time -import subprocess - -class Notifier: - def __init__(self): - pass - - def speak(self, text): - subprocess.run(['say', text]) - -class VehicleTracker: - def __init__(self, confidence_threshold=0.4, max_disappeared=30*10): - # Initialize YOLO model with GPU support - self.notifier = Notifier() - self.device = torch.device("mps" if torch.backends.mps.is_available() else "cpu") - print(f"Using device: {self.device}") - self.notifier.speak("Detection Initiated") - # Load YOLO model - self.model = YOLO('yolov8s.pt') # or 'yolov8n.pt' for less accuracy but faster inference - self.model.to(self.device) - - # Tracking parameters - self.confidence_threshold = confidence_threshold - self.max_disappeared = max_disappeared - self.next_vehicle_id = 0 - self.vehicles = {} - self.vehicle_history = defaultdict(list) - - # Valid vehicle classes in YOLO v8 - self.vehicle_classes = [2, 5, 7] # car, bus, truck in YOLOv8 - - def process_frame(self, frame, target_fps=10): - # Convert frame to RGB for YOLO - frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) - - # Run inference - results = self.model(frame_rgb, verbose=False) - - # Process detections - current_vehicles = [] - - for result in results: - boxes = result.boxes - for box in boxes: - cls = int(box.cls[0]) - conf = float(box.conf[0]) - - if conf > self.confidence_threshold and cls in self.vehicle_classes: - # Get coordinates - x1, y1, x2, y2 = box.xyxy[0].cpu().numpy() - w = x2 - x1 - h = y2 - y1 - current_vehicles.append((int(x1), int(y1), int(w), int(h))) - - # Update tracking - self.update_tracking(current_vehicles) - - # Draw results - for vehicle_id, vehicle_info in self.vehicles.items(): - if vehicle_info["disappeared"] == 0: - x, y, w, h = vehicle_info["box"] - cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2) - - # Add status text - status = self.get_vehicle_status(vehicle_id) - cv2.putText(frame, f"ID: {vehicle_id} ({status})", - (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, - 0.5, (0, 255, 0), 2) - - return frame - - def update_tracking(self, current_vehicles): - # Mark all existing vehicles as disappeared initially - for vehicle_id in self.vehicles: - self.vehicles[vehicle_id]["disappeared"] += 1 - - # Update or add new vehicles - for box in current_vehicles: - matched = False - for vehicle_id, vehicle_info in self.vehicles.items(): - if self.calculate_overlap(box, vehicle_info["box"]) > 0.3: - self.vehicles[vehicle_id]["box"] = box - self.vehicles[vehicle_id]["disappeared"] = 0 - self.vehicle_history[vehicle_id].append(time.time()) - matched = True - break - - if not matched: - self.notifier.speak("Vehicle Arriving") - self.vehicles[self.next_vehicle_id] = { - "box": box, - "disappeared": 0 - } - self.vehicle_history[self.next_vehicle_id].append(time.time()) - self.next_vehicle_id += 1 - - # Remove vehicles that have disappeared for too long - for vehicle_id in list(self.vehicles.keys()): - if self.vehicles[vehicle_id]["disappeared"] > self.max_disappeared: - self.notifier.speak("Vehicle Leaving") - del self.vehicles[vehicle_id] - - # Calculate IoU between two boxes (Intersection over Union) - - def calculate_overlap(self, box1, box2): - x1, y1, w1, h1 = box1 - x2, y2, w2, h2 = box2 - - # Calculate intersection - x_left = max(x1, x2) - y_top = max(y1, y2) - x_right = min(x1 + w1, x2 + w2) - y_bottom = min(y1 + h1, y2 + h2) - - if x_right < x_left or y_bottom < y_top: - return 0.0 - - intersection = (x_right - x_left) * (y_bottom - y_top) - - # Calculate union - area1 = w1 * h1 - area2 = w2 * h2 - union = area1 + area2 - intersection - - return intersection / union if union > 0 else 0 - - # Get vehicle status based on time present - - def get_vehicle_status(self, vehicle_id): - if vehicle_id not in self.vehicle_history: - return "Unknown" - - timestamps = self.vehicle_history[vehicle_id] - if len(timestamps) < 2: - return "Arriving" - - time_present = timestamps[-1] - timestamps[0] - if time_present < 3: - return "Arriving" - else: - return "Present" - -def main(): - # Initialize tracker - tracker = VehicleTracker() - - # Access RTSP stream - cap = cv2.VideoCapture('rtsp://192.168.1.1:7447/5EPTINH0aTXqTqC3') - - # Set buffer size - cap.set(cv2.CAP_PROP_BUFFERSIZE, 1) - - # Target FPS and frame timing - target_fps = 30 # Increased since we're using GPU - frame_time = 1/target_fps - - # Performance monitoring - frame_count = 0 - start_time = time.time() - - while True: - loop_start = time.time() - - ret, frame = cap.read() - if not ret: - break - - # Process frame - processed_frame = tracker.process_frame(frame) - - # Display the output - cv2.imshow('Vehicle Detection', processed_frame) - - # Calculate and display FPS - frame_count += 1 - if frame_count % 30 == 0: - elapsed = time.time() - start_time - fps = frame_count / elapsed - print(f"FPS: {fps:.2f}") - - # Maintain target FPS - processing_time = time.time() - loop_start - delay = max(1, int((frame_time - processing_time) * 1000)) - - if cv2.waitKey(delay) & 0xFF == ord('q'): - break - - cap.release() - cv2.destroyAllWindows() - -if __name__ == "__main__": - main() diff --git a/detect_person.py b/detect_person.py new file mode 100755 index 0000000..6a2809d --- /dev/null +++ b/detect_person.py @@ -0,0 +1,249 @@ +import os +import threading +import torch +import cv2 +import numpy as np +import time +import datetime +import subprocess +import math + +from ultralytics import YOLO +from camera_list import cameras + +# Global dictionary and lock for frames +display_frames = {} +display_lock = threading.Lock() +stop_event = threading.Event() + +def get_device(preferred: str = None) -> torch.device: + """ + Returns the torch device based on a preferred device override, + auto-detection of CUDA/MPS, or falls back to CPU. + """ + if preferred is None: + preferred = os.getenv("TORCH_DEVICE", "auto") + if preferred != "auto": + return torch.device(preferred) + if torch.cuda.is_available(): + return torch.device("cuda") + elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): + return torch.device("mps") + else: + return torch.device("cpu") + +class Notifier: + def __init__(self, cooldown: float = 5) -> None: + self.cooldown = cooldown # seconds between notifications + self.last_spoken = 0 + + def speak(self, message: str) -> None: + current_time = time.time() + if current_time - self.last_spoken >= self.cooldown: + subprocess.Popen(['say', message]) + self.last_spoken = current_time + +class PersonDetection: + def __init__(self, + confidence_threshold: float = 0.4, + roi: tuple = None, + movement_threshold: float = 5.0, + history_size: int = 3) -> None: + self.device = get_device() + print(f"Using device: {self.device}") + self.model = YOLO('models/yolov8n.pt') + self.model.to(self.device) + self.confidence_threshold = confidence_threshold + self.person_class = 0 # Person class in YOLOv8 + self.notifier = Notifier() + self.roi = roi # tuple (x, y, w, h); if None, process full frame + self.movement_threshold = movement_threshold + self.previous_frame = None + self.frame_history = [] + self.history_size = history_size + self.consecutive_detections = {} + + def _inside_roi(self, box: tuple) -> bool: + if self.roi is None: + return True + x, y, w, h = box + cx = x + w / 2 + cy = y + h / 2 + rx, ry, rw, rh = self.roi + return (rx <= cx <= rx + rw) and (ry <= cy <= ry + rh) + + def detect_persons(self, frame: np.ndarray) -> list: + frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + results = self.model(frame_rgb, verbose=False) + if self.previous_frame is None: + self.previous_frame = frame.copy() + return [] + + # Calculate difference between current and previous frame for motion detection + diff = cv2.absdiff(self.previous_frame, frame) + gray_diff = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY) + blur_diff = cv2.GaussianBlur(gray_diff, (5, 5), 0) + _, thresh_diff = cv2.threshold(blur_diff, 20, 255, cv2.THRESH_BINARY) + + current_time = time.time() + outdated_keys = [pos for pos, (last_time, _) in self.consecutive_detections.items() + if current_time - last_time > 5.0] + for key in outdated_keys: + del self.consecutive_detections[key] + + persons = [] + for result in results: + boxes = result.boxes + for box in boxes: + cls = int(box.cls[0]) + conf = float(box.conf[0]) + if conf > self.confidence_threshold and cls == self.person_class: + xyxy = box.xyxy[0].cpu().numpy().astype(int) + x1, y1, x2, y2 = xyxy + w = x2 - x1 + h = y2 - y1 + candidate = (x1, y1, w, h) + if self._inside_roi(candidate): + mask = np.zeros_like(thresh_diff) + mask[y1:y2, x1:x2] = 1 + motion_pixels = cv2.countNonZero(thresh_diff * mask) + area = w * h + motion_percentage = (motion_pixels / area * 100) if area > 0 else 0 + grid_x, grid_y = x1 // 50, y1 // 50 + pos_key = (grid_x, grid_y) + if motion_percentage > self.movement_threshold: + if pos_key in self.consecutive_detections: + _, count = self.consecutive_detections[pos_key] + self.consecutive_detections[pos_key] = (current_time, count + 1) + else: + self.consecutive_detections[pos_key] = (current_time, 1) + if self.consecutive_detections[pos_key][1] >= 2 or motion_percentage > self.movement_threshold * 2: + persons.append(candidate) + else: + if pos_key in self.consecutive_detections: + _, count = self.consecutive_detections[pos_key] + self.consecutive_detections[pos_key] = (current_time, max(0, count - 1)) + self.frame_history.append(frame.copy()) + if len(self.frame_history) > self.history_size: + self.frame_history.pop(0) + self.previous_frame = frame.copy() + return persons + + def draw_persons(self, frame: np.ndarray, persons: list) -> np.ndarray: + for (x, y, w, h) in persons: + cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2) + cv2.putText(frame, "Person", (x, y - 10), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) + if self.roi is not None: + rx, ry, rw, rh = self.roi + cv2.rectangle(frame, (rx, ry), (rx + rw, ry + rh), (255, 0, 0), 2) + return frame + + def export_image(self, frame: np.ndarray, camera_name: str) -> None: + export_dir = "exports" + if not os.path.exists(export_dir): + os.makedirs(export_dir) + timestamp = int(time.time()) + filename = f"{export_dir}/{camera_name}_person_{timestamp}.png" + if cv2.imwrite(filename, frame): + print(f"Exported positive result to {filename}") + else: + print(f"Failed to export image to {filename}") + +def open_stream(rtsp_url: str, width: int = 640, height: int = 480) -> cv2.VideoCapture: + os.environ["OPENCV_FFMPEG_CAPTURE_OPTIONS"] = "rtsp_transport;udp" + cap = cv2.VideoCapture(rtsp_url) + cap.set(cv2.CAP_PROP_FRAME_WIDTH, width) + cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height) + time.sleep(2) + return cap + +def monitor_camera(camera_url: str, window_name: str, roi: tuple, + stop_event: threading.Event, + display_frames: dict, + display_lock: threading.Lock) -> None: + detection = PersonDetection(roi=roi) + if "(HIK)" in window_name: + print(f"{window_name}: Using HIK Vision capture method") + cap = open_stream(camera_url) + else: + cap = cv2.VideoCapture(camera_url) + cap.set(cv2.CAP_PROP_BUFFERSIZE, 1) + target_fps = 30 + cap.set(cv2.CAP_PROP_FPS, target_fps) + target_fps = 30 + frame_time = 1 / target_fps + frame_count = 0 + start_time = time.time() + while not stop_event.is_set(): + loop_start = time.time() + ret, frame = cap.read() + if not ret: + print(f"{window_name}: Unable to capture frame.") + break + persons = detection.detect_persons(frame) + if persons: + detection.notifier.speak("There is a person") + processed_frame = detection.draw_persons(frame, persons) + cv2.putText(processed_frame, window_name, (10, 30), + cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) + with display_lock: + display_frames[window_name] = processed_frame + frame_count += 1 + if frame_count % 30 == 0: + elapsed = time.time() - start_time + fps = frame_count / elapsed + print(f"{window_name} FPS: {fps:.2f}") + processing_time = time.time() - loop_start + delay = max(0, frame_time - processing_time) + time.sleep(delay) + cap.release() + +def combine_frames(frames: list) -> np.ndarray: + if not frames: + return None + n = len(frames) + cols = math.ceil(math.sqrt(n)) + rows = math.ceil(n / cols) + h, w, channels = frames[0].shape + resized_frames = [cv2.resize(frame, (w, h)) for frame in frames] + grid_rows = [] + for i in range(rows): + row_frames = [] + for j in range(cols): + idx = i * cols + j + if idx < len(resized_frames): + row_frames.append(resized_frames[idx]) + else: + row_frames.append(np.zeros((h, w, channels), dtype=np.uint8)) + row = cv2.hconcat(row_frames) + grid_rows.append(row) + combined_frame = cv2.vconcat(grid_rows) + return combined_frame + +def main() -> None: + threads = [] + for cam in cameras: + t = threading.Thread( + target=monitor_camera, + args=(cam['url'], cam['name'], cam['roi'], + stop_event, display_frames, display_lock) + ) + t.start() + threads.append(t) + while not stop_event.is_set(): + with display_lock: + frames = list(display_frames.values()) + if frames: + combined_frame = combine_frames(frames) + if combined_frame is not None: + cv2.imshow("Combined Cameras", combined_frame) + if cv2.waitKey(1) & 0xFF == ord('q'): + stop_event.set() + break + for t in threads: + t.join() + cv2.destroyAllWindows() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/detect_person_nvidia.py b/detect_person_nvidia.py new file mode 100644 index 0000000..5ad347a --- /dev/null +++ b/detect_person_nvidia.py @@ -0,0 +1,267 @@ +# python +import os +import sys +import threading +import torch +import cv2 +import numpy as np +import time +import subprocess +import math +from ultralytics import YOLO +from camera_list import cameras + +# Optional: Import the TensorRT conversion function +try: + from onnx_to_tensorrt import build_engine +except ImportError: + build_engine = None + +# Global dictionary and lock for frames +display_frames = {} +display_lock = threading.Lock() +stop_event = threading.Event() + +def get_device(preferred: str = None) -> torch.device: + """ + Returns the torch device based on a preferred device override, + auto-detection of CUDA/MPS, or falls back to CPU. + """ + if preferred is None: + preferred = os.getenv("TORCH_DEVICE", "auto") + if preferred != "auto": + return torch.device(preferred) + if torch.cuda.is_available(): + return torch.device("cuda") + elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): + return torch.device("mps") + else: + return torch.device("cpu") + +class Notifier: + def __init__(self, cooldown: float = 5) -> None: + self.cooldown = cooldown # seconds between notifications + self.last_spoken = 0 + + def speak(self, message: str) -> None: + current_time = time.time() + if current_time - self.last_spoken >= self.cooldown: + subprocess.Popen(['say', message]) + self.last_spoken = current_time + +class PersonDetection: + def __init__(self, + confidence_threshold: float = 0.4, + roi: tuple = None, + movement_threshold: float = 5.0, + history_size: int = 3) -> None: + self.device = get_device() + print(f"Using device: {self.device}") + self.model = YOLO("models/yolov8n.pt") + self.model.to(self.device) + self.confidence_threshold = confidence_threshold + self.person_class = 0 # Person class in YOLOv8 + self.notifier = Notifier() + self.roi = roi # tuple (x, y, w, h); if None, full frame is processed + self.movement_threshold = movement_threshold + self.previous_frame = None + self.frame_history = [] + self.history_size = history_size + self.consecutive_detections = {} + + def _inside_roi(self, box: tuple) -> bool: + if self.roi is None: + return True + x, y, w, h = box + cx = x + w / 2 + cy = y + h / 2 + rx, ry, rw, rh = self.roi + return (rx <= cx <= rx + rw) and (ry <= cy <= ry + rh) + + def detect_persons(self, frame: np.ndarray) -> list: + frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + results = self.model(frame_rgb, verbose=False) + if self.previous_frame is None: + self.previous_frame = frame.copy() + return [] + diff = cv2.absdiff(self.previous_frame, frame) + gray_diff = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY) + blur_diff = cv2.GaussianBlur(gray_diff, (5, 5), 0) + _, thresh_diff = cv2.threshold(blur_diff, 20, 255, cv2.THRESH_BINARY) + + current_time = time.time() + outdated_keys = [key for key, (last_time, _) in self.consecutive_detections.items() + if current_time - last_time > 5.0] + for key in outdated_keys: + del self.consecutive_detections[key] + + persons = [] + for result in results: + boxes = result.boxes + for box in boxes: + cls = int(box.cls[0]) + conf = float(box.conf[0]) + if conf > self.confidence_threshold and cls == self.person_class: + xyxy = box.xyxy[0].cpu().numpy().astype(int) + x1, y1, x2, y2 = xyxy + w = x2 - x1 + h = y2 - y1 + candidate = (x1, y1, w, h) + if self._inside_roi(candidate): + mask = np.zeros_like(thresh_diff) + mask[y1:y2, x1:x2] = 1 + motion_pixels = cv2.countNonZero(thresh_diff * mask) + area = w * h + motion_percentage = (motion_pixels / area * 100) if area > 0 else 0 + grid_x, grid_y = x1 // 50, y1 // 50 + pos_key = (grid_x, grid_y) + if motion_percentage > self.movement_threshold: + if pos_key in self.consecutive_detections: + _, count = self.consecutive_detections[pos_key] + self.consecutive_detections[pos_key] = (current_time, count + 1) + else: + self.consecutive_detections[pos_key] = (current_time, 1) + if (self.consecutive_detections[pos_key][1] >= 2 or + motion_percentage > self.movement_threshold * 2): + persons.append(candidate) + else: + if pos_key in self.consecutive_detections: + _, count = self.consecutive_detections[pos_key] + self.consecutive_detections[pos_key] = (current_time, max(0, count - 1)) + self.frame_history.append(frame.copy()) + if len(self.frame_history) > self.history_size: + self.frame_history.pop(0) + self.previous_frame = frame.copy() + return persons + + def draw_persons(self, frame: np.ndarray, persons: list) -> np.ndarray: + for (x, y, w, h) in persons: + cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2) + cv2.putText(frame, "Person", (x, y - 10), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) + if self.roi is not None: + rx, ry, rw, rh = self.roi + cv2.rectangle(frame, (rx, ry), (rx + rw, ry + rh), (255, 0, 0), 2) + return frame + + def export_image(self, frame: np.ndarray, camera_name: str) -> None: + export_dir = "exports" + if not os.path.exists(export_dir): + os.makedirs(export_dir) + timestamp = int(time.time()) + filename = f"{export_dir}/{camera_name}_person_{timestamp}.png" + if cv2.imwrite(filename, frame): + print(f"Exported positive result to {filename}") + else: + print(f"Failed to export image to {filename}") + +def open_stream(rtsp_url: str, width: int = 640, height: int = 480) -> cv2.VideoCapture: + os.environ["OPENCV_FFMPEG_CAPTURE_OPTIONS"] = "rtsp_transport;udp" + cap = cv2.VideoCapture(rtsp_url) + cap.set(cv2.CAP_PROP_FRAME_WIDTH, width) + cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height) + time.sleep(2) + return cap + +def monitor_camera(camera_url: str, window_name: str, roi: tuple, + stop_event: threading.Event, + display_frames: dict, + display_lock: threading.Lock) -> None: + detection = PersonDetection(roi=roi) + if "(HIK)" in window_name: + print(f"{window_name}: Using HIK Vision capture method") + cap = open_stream(camera_url) + else: + cap = cv2.VideoCapture(camera_url) + cap.set(cv2.CAP_PROP_BUFFERSIZE, 1) + target_fps = 30 + cap.set(cv2.CAP_PROP_FPS, target_fps) + target_fps = 30 + frame_time = 1 / target_fps + frame_count = 0 + start_time = time.time() + while not stop_event.is_set(): + loop_start = time.time() + ret, frame = cap.read() + if not ret: + print(f"{window_name}: Unable to capture frame.") + break + persons = detection.detect_persons(frame) + if persons: + detection.notifier.speak("There is a person") + processed_frame = detection.draw_persons(frame, persons) + cv2.putText(processed_frame, window_name, (10, 30), + cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) + with display_lock: + display_frames[window_name] = processed_frame + frame_count += 1 + if frame_count % 30 == 0: + elapsed = time.time() - start_time + fps = frame_count / elapsed + print(f"{window_name} FPS: {fps:.2f}") + processing_time = time.time() - loop_start + delay = max(0, frame_time - processing_time) + time.sleep(delay) + cap.release() + +def combine_frames(frames: list) -> np.ndarray: + if not frames: + return None + n = len(frames) + cols = math.ceil(math.sqrt(n)) + rows = math.ceil(n / cols) + h, w, channels = frames[0].shape + resized_frames = [cv2.resize(frame, (w, h)) for frame in frames] + grid_rows = [] + for i in range(rows): + row_frames = [] + for j in range(cols): + idx = i * cols + j + if idx < len(resized_frames): + row_frames.append(resized_frames[idx]) + else: + row_frames.append(np.zeros((h, w, channels), dtype=np.uint8)) + row = cv2.hconcat(row_frames) + grid_rows.append(row) + combined_frame = cv2.vconcat(grid_rows) + return combined_frame + +def main() -> None: + # Optional conversion: if '--convert' flag is set and conversion function exists, + # build the TensorRT engine from the ONNX model. + if "--convert" in sys.argv and build_engine is not None: + onnx_model_path = "models/model.onnx" + engine_path = "models/model.trt" + try: + engine = build_engine(onnx_model_path, engine_path) + print("TensorRT engine built and saved.") + except Exception as e: + print(f"Failed to build TensorRT engine: {e}") + # Exit after conversion if desired. + sys.exit(0) + + threads = [] + for cam in cameras: + t = threading.Thread( + target=monitor_camera, + args=(cam["url"], cam["name"], cam["roi"], + stop_event, display_frames, display_lock) + ) + t.start() + threads.append(t) + while not stop_event.is_set(): + with display_lock: + frames = list(display_frames.values()) + if frames: + combined_frame = combine_frames(frames) + if combined_frame is not None: + cv2.imshow("Combined Cameras", combined_frame) + if cv2.waitKey(1) & 0xFF == ord("q"): + stop_event.set() + break + for t in threads: + t.join() + cv2.destroyAllWindows() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/exports/.DS_Store b/exports/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/exports/.DS_Store differ diff --git a/models/model_viz.py b/models/model_viz.py new file mode 100644 index 0000000..0a057b9 --- /dev/null +++ b/models/model_viz.py @@ -0,0 +1,32 @@ +import torch +from ultralytics import YOLO +from torchviz import make_dot + +# Load the YOLOv8n model +model = YOLO('yolov8n.pt') + +# Create a dummy input tensor with the appropriate shape +dummy_input = torch.randn(1, 3, 640, 640) + +# Forward pass through the model to get the output +output = model.model(dummy_input) + +# Ensure the output is a tensor by checking if it's a list or tuple +if isinstance(output, (list, tuple)): + output_tensor = output[0] +else: + output_tensor = output + +# If the output tensor is still a list, convert it to a tensor +if isinstance(output_tensor, list): + output_tensor = torch.stack(output_tensor) + +# Convert the output tensor into a scalar by summing +scalar_output = output_tensor.sum() + +# Generate the graph from the scalar output +dot = make_dot(scalar_output, params=dict(model.model.named_parameters())) + +# Save the graph to a file and display it +dot.format = 'png' +dot.render('yolov8n_model_architecture') \ No newline at end of file diff --git a/yolov8n.pt b/models/yolov8n.pt similarity index 100% rename from yolov8n.pt rename to models/yolov8n.pt diff --git a/yolov8s.pt b/models/yolov8s.pt similarity index 100% rename from yolov8s.pt rename to models/yolov8s.pt diff --git a/onnx_to_tensorrt.py b/onnx_to_tensorrt.py new file mode 100644 index 0000000..efa972b --- /dev/null +++ b/onnx_to_tensorrt.py @@ -0,0 +1,38 @@ +# python +import tensorrt as trt + + +def build_engine(onnx_model_path: str, engine_path: str = None, max_batch_size: int = 1, + max_workspace_size: int = 1 << 28) -> trt.ICudaEngine: + """ + Build a TensorRT engine from an ONNX model. + + Parameters: + onnx_model_path: Path to the ONNX model. + engine_path: Optional path to save the serialized engine. + max_batch_size: Maximum batch size. + max_workspace_size: Maximum GPU workspace size in bytes. + + Returns: + A TensorRT ICudaEngine. + """ + TRT_LOGGER = trt.Logger(trt.Logger.WARNING) + builder = trt.Builder(TRT_LOGGER) + network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) + parser = trt.OnnxParser(network, TRT_LOGGER) + + with open(onnx_model_path, 'rb') as model: + if not parser.parse(model.read()): + errors = [parser.get_error(i) for i in range(parser.num_errors)] + error_messages = "\n".join(str(e) for e in errors) + raise RuntimeError(f"Failed to parse ONNX model:\n{error_messages}") + + builder.max_batch_size = max_batch_size + builder.max_workspace_size = max_workspace_size + engine = builder.build_cuda_engine(network) + + if engine_path: + with open(engine_path, 'wb') as f: + f.write(engine.serialize()) + + return engine \ No newline at end of file diff --git a/open_rtsp.py b/open_rtsp.py new file mode 100644 index 0000000..52e72f4 --- /dev/null +++ b/open_rtsp.py @@ -0,0 +1,45 @@ +import cv2 +import os +import time + + +def open_stream(rtsp_url): + # Force FFmpeg to use UDP transport for RTSP + os.environ["OPENCV_FFMPEG_CAPTURE_OPTIONS"] = "rtsp_transport;udp" + cap = cv2.VideoCapture(rtsp_url) + time.sleep(2) # Give the stream time to initialize + return cap + + +def main(): + rtsp_url = "rtsp://user1:1234abcd@115.79.213.124:10554/streaming/channels/501" + cap = open_stream(rtsp_url) + + if not cap.isOpened(): + print("Error: Unable to open the video stream initially.") + + print("Press 'q' to exit the stream.") + + while True: + ret, frame = cap.read() + + # If no frame is received, attempt to reconnect + if not ret: + print("No frame received. Attempting to reconnect...") + cap.release() + time.sleep(1) + cap = open_stream(rtsp_url) + continue + + cv2.imshow("RTSP Stream", frame) + + if cv2.waitKey(1) & 0xFF == ord('q'): + print("Exiting the stream...") + break + + cap.release() + cv2.destroyAllWindows() + + +if __name__ == "__main__": + main() \ No newline at end of file