Video support: Adding models for video and object detection track (#51)

JonasWurst · michal-lightly · web-flow · commit d8f8f84a2007 · 2026-01-14T15:09:48.000+01:00
* Adding models: video and object detection track

* Apply suggestions from code review

Co-authored-by: michal-lightly &lt;105644579+michal-lightly@users.noreply.github.com&gt;

* fix naming

---------

Co-authored-by: michal-lightly &lt;105644579+michal-lightly@users.noreply.github.com&gt;
diff --git a/src/labelformat/model/object_detection_track.py b/src/labelformat/model/object_detection_track.py
@@ -0,0 +1,67 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from argparse import ArgumentParser
+from dataclasses import dataclass
+from typing import Iterable, List
+
+from labelformat.model.bounding_box import BoundingBox
+from labelformat.model.category import Category
+from labelformat.model.video import Video
+
+
+@dataclass(frozen=True)
+class SingleObjectDetectionTrack:
+    category: Category
+    boxes: list[BoundingBox | None]
+    # TODO (Jonas, 01/2026): Add confidence
+
+
+@dataclass(frozen=True)
+class VideoObjectDetectionTrack:
+    """
+    The base class for a video alongside with its object detection track annotations.
+    A video consists of N frames and M objects. Each object is defined by N boxes - one for each frame.
+    If an object is not present on a frame, the corresponding entry is set to None.
+    """
+
+    video: Video
+    objects: List[SingleObjectDetectionTrack]
+
+    def __post_init__(self) -> None:
+        number_of_frames = self.video.number_of_frames
+
+        for obj in self.objects:
+            if len(obj.boxes) != number_of_frames:
+                raise ValueError(
+                    "Length of object detection track does not match the number of frames in the video."
+                )
+
+
+class ObjectDetectionTrackInput(ABC):
+    @staticmethod
+    @abstractmethod
+    def add_cli_arguments(parser: ArgumentParser) -> None:
+        raise NotImplementedError()
+
+    @abstractmethod
+    def get_categories(self) -> Iterable[Category]:
+        raise NotImplementedError()
+
+    @abstractmethod
+    def get_videos(self) -> Iterable[Video]:
+        raise NotImplementedError()
+
+    @abstractmethod
+    def get_labels(self) -> Iterable[VideoObjectDetectionTrack]:
+        raise NotImplementedError()
+
+
+class ObjectDetectionTrackOutput(ABC):
+    @staticmethod
+    @abstractmethod
+    def add_cli_arguments(parser: ArgumentParser) -> None:
+        raise NotImplementedError()
+
+    def save(self, label_input: ObjectDetectionTrackInput) -> None:
+        raise NotImplementedError()
diff --git a/src/labelformat/model/video.py b/src/labelformat/model/video.py
@@ -0,0 +1,11 @@
+from dataclasses import dataclass
+
+
+@dataclass(frozen=True)
+class Video:
+    id: int
+    filename: str
+    width: int
+    height: int
+    number_of_frames: int
+    # TODO (Jonas, 01/2026): Add list of frames
diff --git a/tests/unit/model/test_object_detection_track.py b/tests/unit/model/test_object_detection_track.py
@@ -0,0 +1,55 @@
+from __future__ import annotations
+
+import pytest
+
+from labelformat.model.bounding_box import BoundingBox
+from labelformat.model.category import Category
+from labelformat.model.object_detection_track import (
+    SingleObjectDetectionTrack,
+    VideoObjectDetectionTrack,
+)
+from labelformat.model.video import Video
+
+
+class TestVideoObjectDetectionTrack:
+    def test_post_init__frames_equal_boxes_length__valid(self) -> None:
+        track_a = SingleObjectDetectionTrack(
+            category=Category(id=0, name="cat"),
+            boxes=[BoundingBox(xmin=0, ymin=0, xmax=1, ymax=1) for _ in range(2)],
+        )
+
+        track_b = SingleObjectDetectionTrack(
+            category=Category(id=1, name="dog"),
+            boxes=[BoundingBox(xmin=0, ymin=0, xmax=1, ymax=1) for _ in range(2)],
+        )
+
+        video = Video(id=0, filename="test.mov", width=1, height=1, number_of_frames=2)
+
+        detections = VideoObjectDetectionTrack(
+            video=video,
+            objects=[track_a, track_b],
+        )
+        assert len(detections.objects) == 2
+        assert len(detections.objects[0].boxes) == 2
+
+    def test_post_init__frames_equal_boxes_length___invalid(self) -> None:
+        track_a = SingleObjectDetectionTrack(
+            category=Category(id=0, name="cat"),
+            boxes=[BoundingBox(xmin=0, ymin=0, xmax=1, ymax=1) for _ in range(2)],
+        )
+
+        track_b = SingleObjectDetectionTrack(
+            category=Category(id=1, name="dog"),
+            boxes=[BoundingBox(xmin=0, ymin=0, xmax=1, ymax=1) for _ in range(3)],
+        )
+
+        video = Video(id=0, filename="test.mov", width=1, height=1, number_of_frames=2)
+
+        with pytest.raises(
+            ValueError,
+            match="Length of object detection track does not match the number of frames in the video.",
+        ):
+            VideoObjectDetectionTrack(
+                video=video,
+                objects=[track_a, track_b],
+            )