diff --git a/README.md b/README.md index e23a2884..3df5cbab 100644 --- a/README.md +++ b/README.md @@ -41,19 +41,27 @@ Detect face and predict gaze from webcam ```python from l2cs import Pipeline, render import cv2 +import pathlib +import torch + +CWD = pathlib.Path.cwd() gaze_pipeline = Pipeline( weights=CWD / 'models' / 'L2CSNet_gaze360.pkl', arch='ResNet50', - device=torch.device('cpu') # or 'gpu' + device=torch.device('cpu') # or 'cuda', 'opengl', ... ) -cap = cv2.VideoCapture(cam) +cap = cv2.VideoCapture(0) _, frame = cap.read() # Process frame and visualize results = gaze_pipeline.step(frame) frame = render(frame, results) + +cv2.imshow("Detected face", frame) +cv2.waitKey(0) +cv2.destroyAllWindows() ``` ## Demo diff --git a/l2cs/pipeline.py b/l2cs/pipeline.py index 90c9d19c..1cb7bb0f 100644 --- a/l2cs/pipeline.py +++ b/l2cs/pipeline.py @@ -8,7 +8,7 @@ from dataclasses import dataclass from face_detection import RetinaFace -from .utils import prep_input_numpy, getArch +from .utils import prep_input_numpy, getArch, stackSave from .results import GazeResultContainer @@ -47,7 +47,7 @@ def __init__( self.idx_tensor = [idx for idx in range(90)] self.idx_tensor = torch.FloatTensor(self.idx_tensor).to(self.device) - def step(self, frame: np.ndarray) -> GazeResultContainer: + def step(self, frame: np.ndarray, single_face: bool = False) -> GazeResultContainer: # Creating containers face_imgs = [] @@ -58,13 +58,15 @@ def step(self, frame: np.ndarray) -> GazeResultContainer: if self.include_detector: faces = self.detector(frame) - if faces is not None: + if faces is not None: for box, landmark, score in faces: # Apply threshold if score < self.confidence_threshold: continue + accepted_scores.append(score) + # Extract safe min and max of x,y x_min=int(box[0]) if x_min < 0: @@ -86,8 +88,17 @@ def step(self, frame: np.ndarray) -> GazeResultContainer: landmarks.append(landmark) scores.append(score) + # if single_face, only take the face with the highest score + if single_face and len(face_imgs) > 1: + max_score_index = accepted_scores.index(max(accepted_scores)) + face_imgs = [face_imgs[max_score_index]] + # Predict gaze - pitch, yaw = self.predict_gaze(np.stack(face_imgs)) + if len(face_imgs) != 0: + pitch, yaw = self.predict_gaze(np.stack(face_imgs)) + else: + pitch = np.empty((0,1)) # kind of random numbers, always assert successfull detection of face/gaze with GazeResultContainer.detection + yaw = np.empty((0,1)) else: @@ -97,13 +108,15 @@ def step(self, frame: np.ndarray) -> GazeResultContainer: else: pitch, yaw = self.predict_gaze(frame) + detection = len(face_imgs) > 0 # test, if there are any detected faces/eyes # Save data results = GazeResultContainer( pitch=pitch, yaw=yaw, - bboxes=np.stack(bboxes), - landmarks=np.stack(landmarks), - scores=np.stack(scores) + bboxes=stackSave(bboxes), + landmarks=stackSave(landmarks), + scores=stackSave(scores), + detection=detection ) return results diff --git a/l2cs/results.py b/l2cs/results.py index 68e0bba8..c94acf29 100644 --- a/l2cs/results.py +++ b/l2cs/results.py @@ -6,6 +6,7 @@ class GazeResultContainer: pitch: np.ndarray yaw: np.ndarray - bboxes: np.ndarray - landmarks: np.ndarray - scores: np.ndarray + bboxes: np.ndarray|None + landmarks: np.ndarray|None + scores: np.ndarray|None + detection: bool \ No newline at end of file diff --git a/l2cs/utils.py b/l2cs/utils.py index 43b3e708..7f40980e 100644 --- a/l2cs/utils.py +++ b/l2cs/utils.py @@ -68,7 +68,6 @@ def angular(gaze, label): def select_device(device='', batch_size=None): # device = 'cpu' or '0' or '0,1,2,3' - s = f'YOLOv3 🚀 {git_describe() or date_modified()} torch {torch.__version__} ' # string cpu = device.lower() == 'cpu' if cpu: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False @@ -85,9 +84,8 @@ def select_device(device='', batch_size=None): space = ' ' * len(s) for i, d in enumerate(devices): p = torch.cuda.get_device_properties(i) - s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / 1024 ** 2}MB)\n" # bytes to MB else: - s += 'CPU\n' + pass return torch.device('cuda:0' if cuda else 'cpu') @@ -143,3 +141,8 @@ def getArch(arch,bins): 'The default value of ResNet50 will be used instead!') model = L2CS( torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], bins) return model + +def stackSave(ar:list): + """ Wrapper for np.stack to with error handling when trying to stack empty lists. If the length of the passed list == 0, returns None else returns np.stack(ar) as expected + """ + return np.stack(ar) if len(ar) > 0 else None \ No newline at end of file diff --git a/l2cs/vis.py b/l2cs/vis.py index b741d2f5..44133325 100644 --- a/l2cs/vis.py +++ b/l2cs/vis.py @@ -33,7 +33,12 @@ def draw_bbox(frame: np.ndarray, bbox: np.ndarray): return frame def render(frame: np.ndarray, results: GazeResultContainer): - + + # Check if there is a detection in the frame/results object. If not, return an image with annotation "No detection". + if not results.detection: + frame = cv2.putText(frame, "No detection", (10,40), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0,255,0), 2, cv2.LINE_AA) + return frame + # Draw bounding boxes for bbox in results.bboxes: frame = draw_bbox(frame, bbox)