Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,19 +41,27 @@ Detect face and predict gaze from webcam
```python
from l2cs import Pipeline, render
import cv2
import pathlib
import torch

CWD = pathlib.Path.cwd()

gaze_pipeline = Pipeline(
weights=CWD / 'models' / 'L2CSNet_gaze360.pkl',
arch='ResNet50',
device=torch.device('cpu') # or 'gpu'
device=torch.device('cpu') # or 'cuda', 'opengl', ...
)

cap = cv2.VideoCapture(cam)
cap = cv2.VideoCapture(0)
_, frame = cap.read()

# Process frame and visualize
results = gaze_pipeline.step(frame)
frame = render(frame, results)

cv2.imshow("Detected face", frame)
cv2.waitKey(0)
cv2.destroyAllWindows()
```

## Demo
Expand Down
27 changes: 20 additions & 7 deletions l2cs/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from dataclasses import dataclass
from face_detection import RetinaFace

from .utils import prep_input_numpy, getArch
from .utils import prep_input_numpy, getArch, stackSave
from .results import GazeResultContainer


Expand Down Expand Up @@ -47,7 +47,7 @@ def __init__(
self.idx_tensor = [idx for idx in range(90)]
self.idx_tensor = torch.FloatTensor(self.idx_tensor).to(self.device)

def step(self, frame: np.ndarray) -> GazeResultContainer:
def step(self, frame: np.ndarray, single_face: bool = False) -> GazeResultContainer:

# Creating containers
face_imgs = []
Expand All @@ -58,13 +58,15 @@ def step(self, frame: np.ndarray) -> GazeResultContainer:
if self.include_detector:
faces = self.detector(frame)

if faces is not None:
if faces is not None:
for box, landmark, score in faces:

# Apply threshold
if score < self.confidence_threshold:
continue

accepted_scores.append(score)

# Extract safe min and max of x,y
x_min=int(box[0])
if x_min < 0:
Expand All @@ -86,8 +88,17 @@ def step(self, frame: np.ndarray) -> GazeResultContainer:
landmarks.append(landmark)
scores.append(score)

# if single_face, only take the face with the highest score
if single_face and len(face_imgs) > 1:
max_score_index = accepted_scores.index(max(accepted_scores))
face_imgs = [face_imgs[max_score_index]]

# Predict gaze
pitch, yaw = self.predict_gaze(np.stack(face_imgs))
if len(face_imgs) != 0:
pitch, yaw = self.predict_gaze(np.stack(face_imgs))
else:
pitch = np.empty((0,1)) # kind of random numbers, always assert successfull detection of face/gaze with GazeResultContainer.detection
yaw = np.empty((0,1))

else:

Expand All @@ -97,13 +108,15 @@ def step(self, frame: np.ndarray) -> GazeResultContainer:
else:
pitch, yaw = self.predict_gaze(frame)

detection = len(face_imgs) > 0 # test, if there are any detected faces/eyes
# Save data
results = GazeResultContainer(
pitch=pitch,
yaw=yaw,
bboxes=np.stack(bboxes),
landmarks=np.stack(landmarks),
scores=np.stack(scores)
bboxes=stackSave(bboxes),
landmarks=stackSave(landmarks),
scores=stackSave(scores),
detection=detection
)

return results
Expand Down
7 changes: 4 additions & 3 deletions l2cs/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class GazeResultContainer:

pitch: np.ndarray
yaw: np.ndarray
bboxes: np.ndarray
landmarks: np.ndarray
scores: np.ndarray
bboxes: np.ndarray|None
landmarks: np.ndarray|None
scores: np.ndarray|None
detection: bool
9 changes: 6 additions & 3 deletions l2cs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ def angular(gaze, label):

def select_device(device='', batch_size=None):
# device = 'cpu' or '0' or '0,1,2,3'
s = f'YOLOv3 🚀 {git_describe() or date_modified()} torch {torch.__version__} ' # string
cpu = device.lower() == 'cpu'
if cpu:
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False
Expand All @@ -85,9 +84,8 @@ def select_device(device='', batch_size=None):
space = ' ' * len(s)
for i, d in enumerate(devices):
p = torch.cuda.get_device_properties(i)
s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / 1024 ** 2}MB)\n" # bytes to MB
else:
s += 'CPU\n'
pass

return torch.device('cuda:0' if cuda else 'cpu')

Expand Down Expand Up @@ -143,3 +141,8 @@ def getArch(arch,bins):
'The default value of ResNet50 will be used instead!')
model = L2CS( torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], bins)
return model

def stackSave(ar:list):
""" Wrapper for np.stack to with error handling when trying to stack empty lists. If the length of the passed list == 0, returns None else returns np.stack(ar) as expected
"""
return np.stack(ar) if len(ar) > 0 else None
7 changes: 6 additions & 1 deletion l2cs/vis.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,12 @@ def draw_bbox(frame: np.ndarray, bbox: np.ndarray):
return frame

def render(frame: np.ndarray, results: GazeResultContainer):


# Check if there is a detection in the frame/results object. If not, return an image with annotation "No detection".
if not results.detection:
frame = cv2.putText(frame, "No detection", (10,40), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0,255,0), 2, cv2.LINE_AA)
return frame

# Draw bounding boxes
for bbox in results.bboxes:
frame = draw_bbox(frame, bbox)
Expand Down