diff --git a/.gitignore b/.gitignore
index 4a671fe..935c47d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,2 @@
/.DS_Store
-detect.py
+detect-car.py
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..13566b8
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/edge-ai-car-detect.iml b/.idea/edge-ai-car-detect.iml
new file mode 100644
index 0000000..8388dbc
--- /dev/null
+++ b/.idea/edge-ai-car-detect.iml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/material_theme_project_new.xml b/.idea/material_theme_project_new.xml
new file mode 100644
index 0000000..f0be38b
--- /dev/null
+++ b/.idea/material_theme_project_new.xml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..812ab5a
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..befe336
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..35eb1dd
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/__pycache__/camera_list.cpython-38.pyc b/__pycache__/camera_list.cpython-38.pyc
new file mode 100644
index 0000000..85ad382
Binary files /dev/null and b/__pycache__/camera_list.cpython-38.pyc differ
diff --git a/__pycache__/camera_list_test_20.cpython-38.pyc b/__pycache__/camera_list_test_20.cpython-38.pyc
new file mode 100644
index 0000000..c0f0df3
Binary files /dev/null and b/__pycache__/camera_list_test_20.cpython-38.pyc differ
diff --git a/__pycache__/onnx_to_tensorrt.cpython-38.pyc b/__pycache__/onnx_to_tensorrt.cpython-38.pyc
new file mode 100644
index 0000000..efc5fb6
Binary files /dev/null and b/__pycache__/onnx_to_tensorrt.cpython-38.pyc differ
diff --git a/camera_list.py b/camera_list.py
new file mode 100644
index 0000000..073c3c6
--- /dev/null
+++ b/camera_list.py
@@ -0,0 +1,22 @@
+cameras = [
+ {
+ 'url': 'rtsp://admin:L2EC70CF@d5030edfff7a.sn.mynetname.net:554/cam/realmonitor?channel=1&subtype=1',
+ 'name': 'UVK Gate',
+ 'roi': (250, 0, 450, 400)
+ },
+ {
+ 'url': 'rtsp://admin:L268C6B7@d5030edfff7a.sn.mynetname.net:556/cam/realmonitor?channel=1&subtype=1',
+ 'name': 'UVK Parking',
+ 'roi': (100, 50, 400, 400)
+ },
+ {
+ 'url': 'rtsp://admin:L201353B@hcr086zs3b5.sn.mynetname.net:556/cam/realmonitor?channel=1&subtype=1',
+ 'name': 'LBB Rooftop',
+ 'roi': (150, 20, 500, 500)
+ },
+ {
+ 'url': 'rtsp://user1:1234abcd@115.79.213.124:10554/streaming/channels/502',
+ 'name': '(HIK) PNA Tennis',
+ 'roi': (50, 200, 800, 250)
+ }
+ ]
\ No newline at end of file
diff --git a/camera_list_test_20.py b/camera_list_test_20.py
new file mode 100644
index 0000000..cdccaf9
--- /dev/null
+++ b/camera_list_test_20.py
@@ -0,0 +1,102 @@
+cameras = [
+ {
+ 'url': 'rtsp://admin:L2EC70CF@d5030edfff7a.sn.mynetname.net:554/cam/realmonitor?channel=1&subtype=1',
+ 'name': 'UVK Gate',
+ 'roi': (250, 0, 450, 400)
+ },
+ {
+ 'url': 'rtsp://admin:L268C6B7@d5030edfff7a.sn.mynetname.net:556/cam/realmonitor?channel=1&subtype=1',
+ 'name': 'UVK Parking',
+ 'roi': (100, 50, 400, 400)
+ },
+ {
+ 'url': 'rtsp://admin:L201353B@hcr086zs3b5.sn.mynetname.net:556/cam/realmonitor?channel=1&subtype=1',
+ 'name': 'LBB Rooftop',
+ 'roi': (150, 20, 500, 500)
+ },
+ {
+ 'url': 'rtsp://user1:1234abcd@115.79.213.124:10554/streaming/channels/502',
+ 'name': '(HIK) PNA Tennis',
+ 'roi': (50, 200, 800, 250)
+ },
+ {
+ 'url': 'rtsp://admin:L2EC70CF@d5030edfff7a.sn.mynetname.net:554/cam/realmonitor?channel=1&subtype=1',
+ 'name': 'UVK Gate 2',
+ 'roi': (250, 0, 450, 400)
+ },
+ {
+ 'url': 'rtsp://admin:L268C6B7@d5030edfff7a.sn.mynetname.net:556/cam/realmonitor?channel=1&subtype=1',
+ 'name': 'UVK Parking 2',
+ 'roi': (100, 50, 400, 400)
+ },
+ {
+ 'url': 'rtsp://admin:L201353B@hcr086zs3b5.sn.mynetname.net:556/cam/realmonitor?channel=1&subtype=1',
+ 'name': 'LBB Rooftop 2',
+ 'roi': (150, 20, 500, 500)
+ },
+ {
+ 'url': 'rtsp://user1:1234abcd@115.79.213.124:10554/streaming/channels/502',
+ 'name': '(HIK) PNA Tennis 2',
+ 'roi': (50, 200, 800, 250)
+ },
+ {
+ 'url': 'rtsp://admin:L2EC70CF@d5030edfff7a.sn.mynetname.net:554/cam/realmonitor?channel=1&subtype=1',
+ 'name': 'UVK Gate 3',
+ 'roi': (250, 0, 450, 400)
+ },
+ {
+ 'url': 'rtsp://admin:L268C6B7@d5030edfff7a.sn.mynetname.net:556/cam/realmonitor?channel=1&subtype=1',
+ 'name': 'UVK Parking 3',
+ 'roi': (100, 50, 400, 400)
+ },
+ {
+ 'url': 'rtsp://admin:L201353B@hcr086zs3b5.sn.mynetname.net:556/cam/realmonitor?channel=1&subtype=1',
+ 'name': 'LBB Rooftop 3',
+ 'roi': (150, 20, 500, 500)
+ },
+ {
+ 'url': 'rtsp://user1:1234abcd@115.79.213.124:10554/streaming/channels/502',
+ 'name': '(HIK) PNA Tennis 3',
+ 'roi': (50, 200, 800, 250)
+ },
+ {
+ 'url': 'rtsp://admin:L2EC70CF@d5030edfff7a.sn.mynetname.net:554/cam/realmonitor?channel=1&subtype=1',
+ 'name': 'UVK Gate 4',
+ 'roi': (250, 0, 450, 400)
+ },
+ {
+ 'url': 'rtsp://admin:L268C6B7@d5030edfff7a.sn.mynetname.net:556/cam/realmonitor?channel=1&subtype=1',
+ 'name': 'UVK Parking 4',
+ 'roi': (100, 50, 400, 400)
+ },
+ {
+ 'url': 'rtsp://admin:L201353B@hcr086zs3b5.sn.mynetname.net:556/cam/realmonitor?channel=1&subtype=1',
+ 'name': 'LBB Rooftop 4',
+ 'roi': (150, 20, 500, 500)
+ },
+ {
+ 'url': 'rtsp://user1:1234abcd@115.79.213.124:10554/streaming/channels/502',
+ 'name': '(HIK) PNA Tennis 4',
+ 'roi': (50, 200, 800, 250)
+ },
+ {
+ 'url': 'rtsp://admin:L2EC70CF@d5030edfff7a.sn.mynetname.net:554/cam/realmonitor?channel=1&subtype=1',
+ 'name': 'UVK Gate 5',
+ 'roi': (250, 0, 450, 400)
+ },
+ {
+ 'url': 'rtsp://admin:L268C6B7@d5030edfff7a.sn.mynetname.net:556/cam/realmonitor?channel=1&subtype=1',
+ 'name': 'UVK Parking 5',
+ 'roi': (100, 50, 400, 400)
+ },
+ {
+ 'url': 'rtsp://admin:L201353B@hcr086zs3b5.sn.mynetname.net:556/cam/realmonitor?channel=1&subtype=1',
+ 'name': 'LBB Rooftop 5',
+ 'roi': (150, 20, 500, 500)
+ },
+ {
+ 'url': 'rtsp://user1:1234abcd@115.79.213.124:10554/streaming/channels/502',
+ 'name': '(HIK) PNA Tennis 5',
+ 'roi': (50, 200, 800, 250)
+ }
+ ]
\ No newline at end of file
diff --git a/detect.py b/detect.py
deleted file mode 100755
index c6dd4a1..0000000
--- a/detect.py
+++ /dev/null
@@ -1,197 +0,0 @@
-import torch
-from ultralytics import YOLO
-import cv2
-import numpy as np
-from collections import defaultdict
-import time
-import subprocess
-
-class Notifier:
- def __init__(self):
- pass
-
- def speak(self, text):
- subprocess.run(['say', text])
-
-class VehicleTracker:
- def __init__(self, confidence_threshold=0.4, max_disappeared=30*10):
- # Initialize YOLO model with GPU support
- self.notifier = Notifier()
- self.device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
- print(f"Using device: {self.device}")
- self.notifier.speak("Detection Initiated")
- # Load YOLO model
- self.model = YOLO('yolov8s.pt') # or 'yolov8n.pt' for less accuracy but faster inference
- self.model.to(self.device)
-
- # Tracking parameters
- self.confidence_threshold = confidence_threshold
- self.max_disappeared = max_disappeared
- self.next_vehicle_id = 0
- self.vehicles = {}
- self.vehicle_history = defaultdict(list)
-
- # Valid vehicle classes in YOLO v8
- self.vehicle_classes = [2, 5, 7] # car, bus, truck in YOLOv8
-
- def process_frame(self, frame, target_fps=10):
- # Convert frame to RGB for YOLO
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-
- # Run inference
- results = self.model(frame_rgb, verbose=False)
-
- # Process detections
- current_vehicles = []
-
- for result in results:
- boxes = result.boxes
- for box in boxes:
- cls = int(box.cls[0])
- conf = float(box.conf[0])
-
- if conf > self.confidence_threshold and cls in self.vehicle_classes:
- # Get coordinates
- x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
- w = x2 - x1
- h = y2 - y1
- current_vehicles.append((int(x1), int(y1), int(w), int(h)))
-
- # Update tracking
- self.update_tracking(current_vehicles)
-
- # Draw results
- for vehicle_id, vehicle_info in self.vehicles.items():
- if vehicle_info["disappeared"] == 0:
- x, y, w, h = vehicle_info["box"]
- cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
-
- # Add status text
- status = self.get_vehicle_status(vehicle_id)
- cv2.putText(frame, f"ID: {vehicle_id} ({status})",
- (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX,
- 0.5, (0, 255, 0), 2)
-
- return frame
-
- def update_tracking(self, current_vehicles):
- # Mark all existing vehicles as disappeared initially
- for vehicle_id in self.vehicles:
- self.vehicles[vehicle_id]["disappeared"] += 1
-
- # Update or add new vehicles
- for box in current_vehicles:
- matched = False
- for vehicle_id, vehicle_info in self.vehicles.items():
- if self.calculate_overlap(box, vehicle_info["box"]) > 0.3:
- self.vehicles[vehicle_id]["box"] = box
- self.vehicles[vehicle_id]["disappeared"] = 0
- self.vehicle_history[vehicle_id].append(time.time())
- matched = True
- break
-
- if not matched:
- self.notifier.speak("Vehicle Arriving")
- self.vehicles[self.next_vehicle_id] = {
- "box": box,
- "disappeared": 0
- }
- self.vehicle_history[self.next_vehicle_id].append(time.time())
- self.next_vehicle_id += 1
-
- # Remove vehicles that have disappeared for too long
- for vehicle_id in list(self.vehicles.keys()):
- if self.vehicles[vehicle_id]["disappeared"] > self.max_disappeared:
- self.notifier.speak("Vehicle Leaving")
- del self.vehicles[vehicle_id]
-
- # Calculate IoU between two boxes (Intersection over Union)
-
- def calculate_overlap(self, box1, box2):
- x1, y1, w1, h1 = box1
- x2, y2, w2, h2 = box2
-
- # Calculate intersection
- x_left = max(x1, x2)
- y_top = max(y1, y2)
- x_right = min(x1 + w1, x2 + w2)
- y_bottom = min(y1 + h1, y2 + h2)
-
- if x_right < x_left or y_bottom < y_top:
- return 0.0
-
- intersection = (x_right - x_left) * (y_bottom - y_top)
-
- # Calculate union
- area1 = w1 * h1
- area2 = w2 * h2
- union = area1 + area2 - intersection
-
- return intersection / union if union > 0 else 0
-
- # Get vehicle status based on time present
-
- def get_vehicle_status(self, vehicle_id):
- if vehicle_id not in self.vehicle_history:
- return "Unknown"
-
- timestamps = self.vehicle_history[vehicle_id]
- if len(timestamps) < 2:
- return "Arriving"
-
- time_present = timestamps[-1] - timestamps[0]
- if time_present < 3:
- return "Arriving"
- else:
- return "Present"
-
-def main():
- # Initialize tracker
- tracker = VehicleTracker()
-
- # Access RTSP stream
- cap = cv2.VideoCapture('rtsp://192.168.1.1:7447/5EPTINH0aTXqTqC3')
-
- # Set buffer size
- cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
-
- # Target FPS and frame timing
- target_fps = 30 # Increased since we're using GPU
- frame_time = 1/target_fps
-
- # Performance monitoring
- frame_count = 0
- start_time = time.time()
-
- while True:
- loop_start = time.time()
-
- ret, frame = cap.read()
- if not ret:
- break
-
- # Process frame
- processed_frame = tracker.process_frame(frame)
-
- # Display the output
- cv2.imshow('Vehicle Detection', processed_frame)
-
- # Calculate and display FPS
- frame_count += 1
- if frame_count % 30 == 0:
- elapsed = time.time() - start_time
- fps = frame_count / elapsed
- print(f"FPS: {fps:.2f}")
-
- # Maintain target FPS
- processing_time = time.time() - loop_start
- delay = max(1, int((frame_time - processing_time) * 1000))
-
- if cv2.waitKey(delay) & 0xFF == ord('q'):
- break
-
- cap.release()
- cv2.destroyAllWindows()
-
-if __name__ == "__main__":
- main()
diff --git a/detect_person.py b/detect_person.py
new file mode 100755
index 0000000..6a2809d
--- /dev/null
+++ b/detect_person.py
@@ -0,0 +1,249 @@
+import os
+import threading
+import torch
+import cv2
+import numpy as np
+import time
+import datetime
+import subprocess
+import math
+
+from ultralytics import YOLO
+from camera_list import cameras
+
+# Global dictionary and lock for frames
+display_frames = {}
+display_lock = threading.Lock()
+stop_event = threading.Event()
+
+def get_device(preferred: str = None) -> torch.device:
+ """
+ Returns the torch device based on a preferred device override,
+ auto-detection of CUDA/MPS, or falls back to CPU.
+ """
+ if preferred is None:
+ preferred = os.getenv("TORCH_DEVICE", "auto")
+ if preferred != "auto":
+ return torch.device(preferred)
+ if torch.cuda.is_available():
+ return torch.device("cuda")
+ elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
+ return torch.device("mps")
+ else:
+ return torch.device("cpu")
+
+class Notifier:
+ def __init__(self, cooldown: float = 5) -> None:
+ self.cooldown = cooldown # seconds between notifications
+ self.last_spoken = 0
+
+ def speak(self, message: str) -> None:
+ current_time = time.time()
+ if current_time - self.last_spoken >= self.cooldown:
+ subprocess.Popen(['say', message])
+ self.last_spoken = current_time
+
+class PersonDetection:
+ def __init__(self,
+ confidence_threshold: float = 0.4,
+ roi: tuple = None,
+ movement_threshold: float = 5.0,
+ history_size: int = 3) -> None:
+ self.device = get_device()
+ print(f"Using device: {self.device}")
+ self.model = YOLO('models/yolov8n.pt')
+ self.model.to(self.device)
+ self.confidence_threshold = confidence_threshold
+ self.person_class = 0 # Person class in YOLOv8
+ self.notifier = Notifier()
+ self.roi = roi # tuple (x, y, w, h); if None, process full frame
+ self.movement_threshold = movement_threshold
+ self.previous_frame = None
+ self.frame_history = []
+ self.history_size = history_size
+ self.consecutive_detections = {}
+
+ def _inside_roi(self, box: tuple) -> bool:
+ if self.roi is None:
+ return True
+ x, y, w, h = box
+ cx = x + w / 2
+ cy = y + h / 2
+ rx, ry, rw, rh = self.roi
+ return (rx <= cx <= rx + rw) and (ry <= cy <= ry + rh)
+
+ def detect_persons(self, frame: np.ndarray) -> list:
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+ results = self.model(frame_rgb, verbose=False)
+ if self.previous_frame is None:
+ self.previous_frame = frame.copy()
+ return []
+
+ # Calculate difference between current and previous frame for motion detection
+ diff = cv2.absdiff(self.previous_frame, frame)
+ gray_diff = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)
+ blur_diff = cv2.GaussianBlur(gray_diff, (5, 5), 0)
+ _, thresh_diff = cv2.threshold(blur_diff, 20, 255, cv2.THRESH_BINARY)
+
+ current_time = time.time()
+ outdated_keys = [pos for pos, (last_time, _) in self.consecutive_detections.items()
+ if current_time - last_time > 5.0]
+ for key in outdated_keys:
+ del self.consecutive_detections[key]
+
+ persons = []
+ for result in results:
+ boxes = result.boxes
+ for box in boxes:
+ cls = int(box.cls[0])
+ conf = float(box.conf[0])
+ if conf > self.confidence_threshold and cls == self.person_class:
+ xyxy = box.xyxy[0].cpu().numpy().astype(int)
+ x1, y1, x2, y2 = xyxy
+ w = x2 - x1
+ h = y2 - y1
+ candidate = (x1, y1, w, h)
+ if self._inside_roi(candidate):
+ mask = np.zeros_like(thresh_diff)
+ mask[y1:y2, x1:x2] = 1
+ motion_pixels = cv2.countNonZero(thresh_diff * mask)
+ area = w * h
+ motion_percentage = (motion_pixels / area * 100) if area > 0 else 0
+ grid_x, grid_y = x1 // 50, y1 // 50
+ pos_key = (grid_x, grid_y)
+ if motion_percentage > self.movement_threshold:
+ if pos_key in self.consecutive_detections:
+ _, count = self.consecutive_detections[pos_key]
+ self.consecutive_detections[pos_key] = (current_time, count + 1)
+ else:
+ self.consecutive_detections[pos_key] = (current_time, 1)
+ if self.consecutive_detections[pos_key][1] >= 2 or motion_percentage > self.movement_threshold * 2:
+ persons.append(candidate)
+ else:
+ if pos_key in self.consecutive_detections:
+ _, count = self.consecutive_detections[pos_key]
+ self.consecutive_detections[pos_key] = (current_time, max(0, count - 1))
+ self.frame_history.append(frame.copy())
+ if len(self.frame_history) > self.history_size:
+ self.frame_history.pop(0)
+ self.previous_frame = frame.copy()
+ return persons
+
+ def draw_persons(self, frame: np.ndarray, persons: list) -> np.ndarray:
+ for (x, y, w, h) in persons:
+ cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
+ cv2.putText(frame, "Person", (x, y - 10),
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+ if self.roi is not None:
+ rx, ry, rw, rh = self.roi
+ cv2.rectangle(frame, (rx, ry), (rx + rw, ry + rh), (255, 0, 0), 2)
+ return frame
+
+ def export_image(self, frame: np.ndarray, camera_name: str) -> None:
+ export_dir = "exports"
+ if not os.path.exists(export_dir):
+ os.makedirs(export_dir)
+ timestamp = int(time.time())
+ filename = f"{export_dir}/{camera_name}_person_{timestamp}.png"
+ if cv2.imwrite(filename, frame):
+ print(f"Exported positive result to {filename}")
+ else:
+ print(f"Failed to export image to {filename}")
+
+def open_stream(rtsp_url: str, width: int = 640, height: int = 480) -> cv2.VideoCapture:
+ os.environ["OPENCV_FFMPEG_CAPTURE_OPTIONS"] = "rtsp_transport;udp"
+ cap = cv2.VideoCapture(rtsp_url)
+ cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
+ cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
+ time.sleep(2)
+ return cap
+
+def monitor_camera(camera_url: str, window_name: str, roi: tuple,
+ stop_event: threading.Event,
+ display_frames: dict,
+ display_lock: threading.Lock) -> None:
+ detection = PersonDetection(roi=roi)
+ if "(HIK)" in window_name:
+ print(f"{window_name}: Using HIK Vision capture method")
+ cap = open_stream(camera_url)
+ else:
+ cap = cv2.VideoCapture(camera_url)
+ cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
+ target_fps = 30
+ cap.set(cv2.CAP_PROP_FPS, target_fps)
+ target_fps = 30
+ frame_time = 1 / target_fps
+ frame_count = 0
+ start_time = time.time()
+ while not stop_event.is_set():
+ loop_start = time.time()
+ ret, frame = cap.read()
+ if not ret:
+ print(f"{window_name}: Unable to capture frame.")
+ break
+ persons = detection.detect_persons(frame)
+ if persons:
+ detection.notifier.speak("There is a person")
+ processed_frame = detection.draw_persons(frame, persons)
+ cv2.putText(processed_frame, window_name, (10, 30),
+ cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
+ with display_lock:
+ display_frames[window_name] = processed_frame
+ frame_count += 1
+ if frame_count % 30 == 0:
+ elapsed = time.time() - start_time
+ fps = frame_count / elapsed
+ print(f"{window_name} FPS: {fps:.2f}")
+ processing_time = time.time() - loop_start
+ delay = max(0, frame_time - processing_time)
+ time.sleep(delay)
+ cap.release()
+
+def combine_frames(frames: list) -> np.ndarray:
+ if not frames:
+ return None
+ n = len(frames)
+ cols = math.ceil(math.sqrt(n))
+ rows = math.ceil(n / cols)
+ h, w, channels = frames[0].shape
+ resized_frames = [cv2.resize(frame, (w, h)) for frame in frames]
+ grid_rows = []
+ for i in range(rows):
+ row_frames = []
+ for j in range(cols):
+ idx = i * cols + j
+ if idx < len(resized_frames):
+ row_frames.append(resized_frames[idx])
+ else:
+ row_frames.append(np.zeros((h, w, channels), dtype=np.uint8))
+ row = cv2.hconcat(row_frames)
+ grid_rows.append(row)
+ combined_frame = cv2.vconcat(grid_rows)
+ return combined_frame
+
+def main() -> None:
+ threads = []
+ for cam in cameras:
+ t = threading.Thread(
+ target=monitor_camera,
+ args=(cam['url'], cam['name'], cam['roi'],
+ stop_event, display_frames, display_lock)
+ )
+ t.start()
+ threads.append(t)
+ while not stop_event.is_set():
+ with display_lock:
+ frames = list(display_frames.values())
+ if frames:
+ combined_frame = combine_frames(frames)
+ if combined_frame is not None:
+ cv2.imshow("Combined Cameras", combined_frame)
+ if cv2.waitKey(1) & 0xFF == ord('q'):
+ stop_event.set()
+ break
+ for t in threads:
+ t.join()
+ cv2.destroyAllWindows()
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/detect_person_nvidia.py b/detect_person_nvidia.py
new file mode 100644
index 0000000..5ad347a
--- /dev/null
+++ b/detect_person_nvidia.py
@@ -0,0 +1,267 @@
+# python
+import os
+import sys
+import threading
+import torch
+import cv2
+import numpy as np
+import time
+import subprocess
+import math
+from ultralytics import YOLO
+from camera_list import cameras
+
+# Optional: Import the TensorRT conversion function
+try:
+ from onnx_to_tensorrt import build_engine
+except ImportError:
+ build_engine = None
+
+# Global dictionary and lock for frames
+display_frames = {}
+display_lock = threading.Lock()
+stop_event = threading.Event()
+
+def get_device(preferred: str = None) -> torch.device:
+ """
+ Returns the torch device based on a preferred device override,
+ auto-detection of CUDA/MPS, or falls back to CPU.
+ """
+ if preferred is None:
+ preferred = os.getenv("TORCH_DEVICE", "auto")
+ if preferred != "auto":
+ return torch.device(preferred)
+ if torch.cuda.is_available():
+ return torch.device("cuda")
+ elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
+ return torch.device("mps")
+ else:
+ return torch.device("cpu")
+
+class Notifier:
+ def __init__(self, cooldown: float = 5) -> None:
+ self.cooldown = cooldown # seconds between notifications
+ self.last_spoken = 0
+
+ def speak(self, message: str) -> None:
+ current_time = time.time()
+ if current_time - self.last_spoken >= self.cooldown:
+ subprocess.Popen(['say', message])
+ self.last_spoken = current_time
+
+class PersonDetection:
+ def __init__(self,
+ confidence_threshold: float = 0.4,
+ roi: tuple = None,
+ movement_threshold: float = 5.0,
+ history_size: int = 3) -> None:
+ self.device = get_device()
+ print(f"Using device: {self.device}")
+ self.model = YOLO("models/yolov8n.pt")
+ self.model.to(self.device)
+ self.confidence_threshold = confidence_threshold
+ self.person_class = 0 # Person class in YOLOv8
+ self.notifier = Notifier()
+ self.roi = roi # tuple (x, y, w, h); if None, full frame is processed
+ self.movement_threshold = movement_threshold
+ self.previous_frame = None
+ self.frame_history = []
+ self.history_size = history_size
+ self.consecutive_detections = {}
+
+ def _inside_roi(self, box: tuple) -> bool:
+ if self.roi is None:
+ return True
+ x, y, w, h = box
+ cx = x + w / 2
+ cy = y + h / 2
+ rx, ry, rw, rh = self.roi
+ return (rx <= cx <= rx + rw) and (ry <= cy <= ry + rh)
+
+ def detect_persons(self, frame: np.ndarray) -> list:
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+ results = self.model(frame_rgb, verbose=False)
+ if self.previous_frame is None:
+ self.previous_frame = frame.copy()
+ return []
+ diff = cv2.absdiff(self.previous_frame, frame)
+ gray_diff = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)
+ blur_diff = cv2.GaussianBlur(gray_diff, (5, 5), 0)
+ _, thresh_diff = cv2.threshold(blur_diff, 20, 255, cv2.THRESH_BINARY)
+
+ current_time = time.time()
+ outdated_keys = [key for key, (last_time, _) in self.consecutive_detections.items()
+ if current_time - last_time > 5.0]
+ for key in outdated_keys:
+ del self.consecutive_detections[key]
+
+ persons = []
+ for result in results:
+ boxes = result.boxes
+ for box in boxes:
+ cls = int(box.cls[0])
+ conf = float(box.conf[0])
+ if conf > self.confidence_threshold and cls == self.person_class:
+ xyxy = box.xyxy[0].cpu().numpy().astype(int)
+ x1, y1, x2, y2 = xyxy
+ w = x2 - x1
+ h = y2 - y1
+ candidate = (x1, y1, w, h)
+ if self._inside_roi(candidate):
+ mask = np.zeros_like(thresh_diff)
+ mask[y1:y2, x1:x2] = 1
+ motion_pixels = cv2.countNonZero(thresh_diff * mask)
+ area = w * h
+ motion_percentage = (motion_pixels / area * 100) if area > 0 else 0
+ grid_x, grid_y = x1 // 50, y1 // 50
+ pos_key = (grid_x, grid_y)
+ if motion_percentage > self.movement_threshold:
+ if pos_key in self.consecutive_detections:
+ _, count = self.consecutive_detections[pos_key]
+ self.consecutive_detections[pos_key] = (current_time, count + 1)
+ else:
+ self.consecutive_detections[pos_key] = (current_time, 1)
+ if (self.consecutive_detections[pos_key][1] >= 2 or
+ motion_percentage > self.movement_threshold * 2):
+ persons.append(candidate)
+ else:
+ if pos_key in self.consecutive_detections:
+ _, count = self.consecutive_detections[pos_key]
+ self.consecutive_detections[pos_key] = (current_time, max(0, count - 1))
+ self.frame_history.append(frame.copy())
+ if len(self.frame_history) > self.history_size:
+ self.frame_history.pop(0)
+ self.previous_frame = frame.copy()
+ return persons
+
+ def draw_persons(self, frame: np.ndarray, persons: list) -> np.ndarray:
+ for (x, y, w, h) in persons:
+ cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
+ cv2.putText(frame, "Person", (x, y - 10),
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+ if self.roi is not None:
+ rx, ry, rw, rh = self.roi
+ cv2.rectangle(frame, (rx, ry), (rx + rw, ry + rh), (255, 0, 0), 2)
+ return frame
+
+ def export_image(self, frame: np.ndarray, camera_name: str) -> None:
+ export_dir = "exports"
+ if not os.path.exists(export_dir):
+ os.makedirs(export_dir)
+ timestamp = int(time.time())
+ filename = f"{export_dir}/{camera_name}_person_{timestamp}.png"
+ if cv2.imwrite(filename, frame):
+ print(f"Exported positive result to {filename}")
+ else:
+ print(f"Failed to export image to {filename}")
+
+def open_stream(rtsp_url: str, width: int = 640, height: int = 480) -> cv2.VideoCapture:
+ os.environ["OPENCV_FFMPEG_CAPTURE_OPTIONS"] = "rtsp_transport;udp"
+ cap = cv2.VideoCapture(rtsp_url)
+ cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
+ cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
+ time.sleep(2)
+ return cap
+
+def monitor_camera(camera_url: str, window_name: str, roi: tuple,
+ stop_event: threading.Event,
+ display_frames: dict,
+ display_lock: threading.Lock) -> None:
+ detection = PersonDetection(roi=roi)
+ if "(HIK)" in window_name:
+ print(f"{window_name}: Using HIK Vision capture method")
+ cap = open_stream(camera_url)
+ else:
+ cap = cv2.VideoCapture(camera_url)
+ cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
+ target_fps = 30
+ cap.set(cv2.CAP_PROP_FPS, target_fps)
+ target_fps = 30
+ frame_time = 1 / target_fps
+ frame_count = 0
+ start_time = time.time()
+ while not stop_event.is_set():
+ loop_start = time.time()
+ ret, frame = cap.read()
+ if not ret:
+ print(f"{window_name}: Unable to capture frame.")
+ break
+ persons = detection.detect_persons(frame)
+ if persons:
+ detection.notifier.speak("There is a person")
+ processed_frame = detection.draw_persons(frame, persons)
+ cv2.putText(processed_frame, window_name, (10, 30),
+ cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
+ with display_lock:
+ display_frames[window_name] = processed_frame
+ frame_count += 1
+ if frame_count % 30 == 0:
+ elapsed = time.time() - start_time
+ fps = frame_count / elapsed
+ print(f"{window_name} FPS: {fps:.2f}")
+ processing_time = time.time() - loop_start
+ delay = max(0, frame_time - processing_time)
+ time.sleep(delay)
+ cap.release()
+
+def combine_frames(frames: list) -> np.ndarray:
+ if not frames:
+ return None
+ n = len(frames)
+ cols = math.ceil(math.sqrt(n))
+ rows = math.ceil(n / cols)
+ h, w, channels = frames[0].shape
+ resized_frames = [cv2.resize(frame, (w, h)) for frame in frames]
+ grid_rows = []
+ for i in range(rows):
+ row_frames = []
+ for j in range(cols):
+ idx = i * cols + j
+ if idx < len(resized_frames):
+ row_frames.append(resized_frames[idx])
+ else:
+ row_frames.append(np.zeros((h, w, channels), dtype=np.uint8))
+ row = cv2.hconcat(row_frames)
+ grid_rows.append(row)
+ combined_frame = cv2.vconcat(grid_rows)
+ return combined_frame
+
+def main() -> None:
+ # Optional conversion: if '--convert' flag is set and conversion function exists,
+ # build the TensorRT engine from the ONNX model.
+ if "--convert" in sys.argv and build_engine is not None:
+ onnx_model_path = "models/model.onnx"
+ engine_path = "models/model.trt"
+ try:
+ engine = build_engine(onnx_model_path, engine_path)
+ print("TensorRT engine built and saved.")
+ except Exception as e:
+ print(f"Failed to build TensorRT engine: {e}")
+ # Exit after conversion if desired.
+ sys.exit(0)
+
+ threads = []
+ for cam in cameras:
+ t = threading.Thread(
+ target=monitor_camera,
+ args=(cam["url"], cam["name"], cam["roi"],
+ stop_event, display_frames, display_lock)
+ )
+ t.start()
+ threads.append(t)
+ while not stop_event.is_set():
+ with display_lock:
+ frames = list(display_frames.values())
+ if frames:
+ combined_frame = combine_frames(frames)
+ if combined_frame is not None:
+ cv2.imshow("Combined Cameras", combined_frame)
+ if cv2.waitKey(1) & 0xFF == ord("q"):
+ stop_event.set()
+ break
+ for t in threads:
+ t.join()
+ cv2.destroyAllWindows()
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/exports/.DS_Store b/exports/.DS_Store
new file mode 100644
index 0000000..5008ddf
Binary files /dev/null and b/exports/.DS_Store differ
diff --git a/models/model_viz.py b/models/model_viz.py
new file mode 100644
index 0000000..0a057b9
--- /dev/null
+++ b/models/model_viz.py
@@ -0,0 +1,32 @@
+import torch
+from ultralytics import YOLO
+from torchviz import make_dot
+
+# Load the YOLOv8n model
+model = YOLO('yolov8n.pt')
+
+# Create a dummy input tensor with the appropriate shape
+dummy_input = torch.randn(1, 3, 640, 640)
+
+# Forward pass through the model to get the output
+output = model.model(dummy_input)
+
+# Ensure the output is a tensor by checking if it's a list or tuple
+if isinstance(output, (list, tuple)):
+ output_tensor = output[0]
+else:
+ output_tensor = output
+
+# If the output tensor is still a list, convert it to a tensor
+if isinstance(output_tensor, list):
+ output_tensor = torch.stack(output_tensor)
+
+# Convert the output tensor into a scalar by summing
+scalar_output = output_tensor.sum()
+
+# Generate the graph from the scalar output
+dot = make_dot(scalar_output, params=dict(model.model.named_parameters()))
+
+# Save the graph to a file and display it
+dot.format = 'png'
+dot.render('yolov8n_model_architecture')
\ No newline at end of file
diff --git a/yolov8n.pt b/models/yolov8n.pt
similarity index 100%
rename from yolov8n.pt
rename to models/yolov8n.pt
diff --git a/yolov8s.pt b/models/yolov8s.pt
similarity index 100%
rename from yolov8s.pt
rename to models/yolov8s.pt
diff --git a/onnx_to_tensorrt.py b/onnx_to_tensorrt.py
new file mode 100644
index 0000000..efa972b
--- /dev/null
+++ b/onnx_to_tensorrt.py
@@ -0,0 +1,38 @@
+# python
+import tensorrt as trt
+
+
+def build_engine(onnx_model_path: str, engine_path: str = None, max_batch_size: int = 1,
+ max_workspace_size: int = 1 << 28) -> trt.ICudaEngine:
+ """
+ Build a TensorRT engine from an ONNX model.
+
+ Parameters:
+ onnx_model_path: Path to the ONNX model.
+ engine_path: Optional path to save the serialized engine.
+ max_batch_size: Maximum batch size.
+ max_workspace_size: Maximum GPU workspace size in bytes.
+
+ Returns:
+ A TensorRT ICudaEngine.
+ """
+ TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
+ builder = trt.Builder(TRT_LOGGER)
+ network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
+ parser = trt.OnnxParser(network, TRT_LOGGER)
+
+ with open(onnx_model_path, 'rb') as model:
+ if not parser.parse(model.read()):
+ errors = [parser.get_error(i) for i in range(parser.num_errors)]
+ error_messages = "\n".join(str(e) for e in errors)
+ raise RuntimeError(f"Failed to parse ONNX model:\n{error_messages}")
+
+ builder.max_batch_size = max_batch_size
+ builder.max_workspace_size = max_workspace_size
+ engine = builder.build_cuda_engine(network)
+
+ if engine_path:
+ with open(engine_path, 'wb') as f:
+ f.write(engine.serialize())
+
+ return engine
\ No newline at end of file
diff --git a/open_rtsp.py b/open_rtsp.py
new file mode 100644
index 0000000..52e72f4
--- /dev/null
+++ b/open_rtsp.py
@@ -0,0 +1,45 @@
+import cv2
+import os
+import time
+
+
+def open_stream(rtsp_url):
+ # Force FFmpeg to use UDP transport for RTSP
+ os.environ["OPENCV_FFMPEG_CAPTURE_OPTIONS"] = "rtsp_transport;udp"
+ cap = cv2.VideoCapture(rtsp_url)
+ time.sleep(2) # Give the stream time to initialize
+ return cap
+
+
+def main():
+ rtsp_url = "rtsp://user1:1234abcd@115.79.213.124:10554/streaming/channels/501"
+ cap = open_stream(rtsp_url)
+
+ if not cap.isOpened():
+ print("Error: Unable to open the video stream initially.")
+
+ print("Press 'q' to exit the stream.")
+
+ while True:
+ ret, frame = cap.read()
+
+ # If no frame is received, attempt to reconnect
+ if not ret:
+ print("No frame received. Attempting to reconnect...")
+ cap.release()
+ time.sleep(1)
+ cap = open_stream(rtsp_url)
+ continue
+
+ cv2.imshow("RTSP Stream", frame)
+
+ if cv2.waitKey(1) & 0xFF == ord('q'):
+ print("Exiting the stream...")
+ break
+
+ cap.release()
+ cv2.destroyAllWindows()
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file