From 7f96b971d2a9f8c03885898fcf9f12795dc21e1f Mon Sep 17 00:00:00 2001 From: David Stranava Date: Sun, 24 May 2026 23:28:43 +0200 Subject: [PATCH 1/2] Added magic algorithm for polygon mutations --- .../schema/training-external.schema.ts | 14 +++ .../services/training-external.service.ts | 105 +++++++++++++++- apps/ml/app/ml/dataset.py | 11 +- apps/ml/app/ml/train_model.py | 6 +- apps/ml/app/models/image.py | 22 +++- apps/ml/app/models/labels/polygon_label.py | 113 ++++++++++++++++-- apps/ml/app/models/polygon_split_config.py | 33 +++++ apps/ml/app/models/training_config.py | 5 + 8 files changed, 294 insertions(+), 15 deletions(-) create mode 100644 apps/ml/app/models/polygon_split_config.py diff --git a/apps/api/src/modules/training-external/schema/training-external.schema.ts b/apps/api/src/modules/training-external/schema/training-external.schema.ts index 4f9d178f..3360cdb8 100644 --- a/apps/api/src/modules/training-external/schema/training-external.schema.ts +++ b/apps/api/src/modules/training-external/schema/training-external.schema.ts @@ -97,6 +97,19 @@ export const trainingRectangleLabelSchema = z.object({ ...trainingSharedLabelSchema, }); +/** + * Optional dataset-prep tweak that splits "bridged" polygon annotations into + * one label per visually-disconnected piece. Resolved from human-readable + * names in customHyperparams (`split_classes`) to internal indices on the + * API side before being sent to the ML service. See + * training-external.service.ts for the resolution + validation logic. + */ +export const trainingPolygonSplitSchema = z.object({ + enabled: z.boolean(), + class_indices: z.number().int().nonnegative().array(), + kernel_size: z.number().int().positive(), +}); + export const trainingConfigSchema = z.object({ output_types: z.enum(ModelOutputTypeEnum).array(), epochs: z.number(), @@ -123,6 +136,7 @@ export const trainingConfigSchema = z.object({ .array(), }), custom_hyperparams: z.record(z.string(), z.unknown()).default({}), + polygon_split: trainingPolygonSplitSchema.optional(), }); export const trainingOutputUploadSchema = z.object({ diff --git a/apps/api/src/modules/training-external/services/training-external.service.ts b/apps/api/src/modules/training-external/services/training-external.service.ts index 5b4f53f5..54906fa2 100644 --- a/apps/api/src/modules/training-external/services/training-external.service.ts +++ b/apps/api/src/modules/training-external/services/training-external.service.ts @@ -464,6 +464,16 @@ export class TrainingExternalService { } }) + // Extract the polygon-split feature flag (if any) from customHyperparams + // before forwarding the remaining hyperparams. The split keys are an + // ml-yolo / Luxonis dataset-prep concern, not trainer knobs, so they + // must NOT leak into the YAML config that gets deep-merged into the + // training runtime config. + const { polygon_split, remainingHyperparams } = this.extractPolygonSplit( + model.customHyperparams, + model.labels, + ); + const basePayload: TrainingBasePayload = { id: model.id, output_config: outputUploads, @@ -491,7 +501,8 @@ export class TrainingExternalService { keep_original: pp.keepOriginal, })), }, - custom_hyperparams: model.customHyperparams, + custom_hyperparams: remainingHyperparams, + ...(polygon_split && { polygon_split }), }, }; @@ -560,4 +571,96 @@ export class TrainingExternalService { } } } + + /** + * Pull the bridged-polygon-split feature flag out of customHyperparams, + * resolve its `split_classes` (human-readable label names) into the + * 0-based class indices the ML service uses, and return the remaining + * hyperparams with the split keys removed so they don't pollute the + * downstream trainer config. + * + * The flag is fully optional: when `split_bridged_polygons` is absent + * or falsy, this returns the original hyperparams unchanged. + * + * Throws BadRequestException with an actionable message on misconfiguration + * — empty class list, unknown names, non-positive / even kernel size, + * wrong types — to fail fast at the train-trigger step rather than + * silently disabling the feature mid-training. + */ + private extractPolygonSplit( + customHyperparams: Record, + labels: ModelEntity["labels"], + ): { + polygon_split: + | { enabled: true; class_indices: number[]; kernel_size: number } + | undefined; + remainingHyperparams: Record; + } { + const rest = { ...customHyperparams }; + const enabledRaw = rest.split_bridged_polygons; + const classesRaw = rest.split_classes; + const kernelRaw = rest.split_kernel_size; + + // Always strip the keys from what we pass to Luxonis, even if the flag + // is off — they're never valid trainer config. + delete rest.split_bridged_polygons; + delete rest.split_classes; + delete rest.split_kernel_size; + + if (!enabledRaw) { + return { polygon_split: undefined, remainingHyperparams: rest }; + } + + if (!Array.isArray(classesRaw) || classesRaw.length === 0) { + throw new BadRequestException( + "split_bridged_polygons is enabled but split_classes is missing or empty. " + + "Provide a non-empty array of label names, e.g. [\"kapie\", \"sunkovy salam\"].", + ); + } + + const labelNameToIndex = new Map( + labels.map((l, i) => [l.name, i]), + ); + + const unknown: string[] = []; + const indices: number[] = []; + for (const name of classesRaw) { + if (typeof name !== "string") { + throw new BadRequestException( + `split_classes entries must be strings, got ${typeof name}: ${JSON.stringify(name)}`, + ); + } + const idx = labelNameToIndex.get(name); + if (idx === undefined) { + unknown.push(name); + } else { + indices.push(idx); + } + } + + if (unknown.length > 0) { + const available = labels.map((l) => l.name).join(", "); + throw new BadRequestException( + `split_classes contains label name(s) not in this model: [${unknown.join(", ")}]. ` + + `Available labels: [${available}]`, + ); + } + + const kernel = kernelRaw === undefined ? 9 : kernelRaw; + if (typeof kernel !== "number" || !Number.isInteger(kernel) || kernel <= 0) { + throw new BadRequestException( + `split_kernel_size must be a positive integer, got ${JSON.stringify(kernelRaw)}`, + ); + } + if (kernel % 2 === 0) { + throw new BadRequestException( + `split_kernel_size must be odd (so the morphological kernel has a centred pixel), got ${kernel}`, + ); + } + + return { + polygon_split: { enabled: true, class_indices: indices, kernel_size: kernel }, + remainingHyperparams: rest, + }; + } } diff --git a/apps/ml/app/ml/dataset.py b/apps/ml/app/ml/dataset.py index ad266477..483eee28 100644 --- a/apps/ml/app/ml/dataset.py +++ b/apps/ml/app/ml/dataset.py @@ -8,6 +8,7 @@ from ..models.dataset_config import DatasetConfig from ..models.image import Image from ..models.model_type import ModelType +from ..models.polygon_split_config import PolygonSplitConfig DATASET_DIR = "dataset" DATASET_CONFIG = "dataset_config.yml" @@ -86,7 +87,11 @@ def prepare_classification_directory( def prepare_dataset( - dir: str, images: list[Image], config: DatasetConfig, task_type: ModelType + dir: str, + images: list[Image], + config: DatasetConfig, + task_type: ModelType, + polygon_split: PolygonSplitConfig | None = None, ): global VAL_DIR dir = f"{dir}/{DATASET_DIR}" @@ -112,4 +117,6 @@ def prepare_dataset( ) copy_image(image, f"{image_dir}/{curr_dir}") with open(f"{label_dir}/{curr_dir}/{label_filename}", "w") as f: - f.write("\n".join(image.labels_str(task_type))) + f.write( + "\n".join(image.labels_str(task_type, polygon_split=polygon_split)) + ) diff --git a/apps/ml/app/ml/train_model.py b/apps/ml/app/ml/train_model.py index 9d54475c..3e02848f 100644 --- a/apps/ml/app/ml/train_model.py +++ b/apps/ml/app/ml/train_model.py @@ -72,7 +72,11 @@ def run_training(config: ModelConfig): luxonis_config = generate_luxonis_config(config, dir) config_path = f"{dir}/config.yml" prepare_dataset( - dir, config.data, config.training_config.dataset_config, config.type + dir, + config.data, + config.training_config.dataset_config, + config.type, + polygon_split=config.training_config.polygon_split, ) preprocess_cfg = config.training_config.dataset_config if preprocess_cfg.preprocessings: diff --git a/apps/ml/app/models/image.py b/apps/ml/app/models/image.py index df19f6f7..915c37f3 100644 --- a/apps/ml/app/models/image.py +++ b/apps/ml/app/models/image.py @@ -7,6 +7,7 @@ from .labels.polygon_label import PolygonLabel from .labels.rectangle_label import RectangleLabel from .model_type import ModelType +from .polygon_split_config import PolygonSplitConfig class Image(BaseSchema): @@ -15,16 +16,31 @@ class Image(BaseSchema): height: int labels: list[Union[ClassificationLabel, PolygonLabel, RectangleLabel]] = [] - def labels_str(self, model_type: ModelType | None = None) -> list[str]: + def labels_str( + self, + model_type: ModelType | None = None, + polygon_split: PolygonSplitConfig | None = None, + ) -> list[str]: result = [] for label in self.labels: if isinstance(label, ClassificationLabel): result.append(label.to_str(self.width, self.height)) elif isinstance(label, PolygonLabel): + kernel = ( + polygon_split.kernel_size + if polygon_split + and polygon_split.enabled + and label.label.label_number in polygon_split.class_indices + else None + ) if model_type == ModelType.DETECTION: - result.append(label.to_bbox_str(self.width, self.height)) + result.extend( + label.to_bbox_str_lines(self.width, self.height, kernel) + ) else: - result.append(label.to_str(self.width, self.height)) + result.extend( + label.to_str_lines(self.width, self.height, kernel) + ) elif isinstance(label, RectangleLabel): result.append(label.to_str(self.width, self.height)) return result diff --git a/apps/ml/app/models/labels/polygon_label.py b/apps/ml/app/models/labels/polygon_label.py index 382b0478..5ea2db4f 100644 --- a/apps/ml/app/models/labels/polygon_label.py +++ b/apps/ml/app/models/labels/polygon_label.py @@ -1,25 +1,94 @@ +import cv2 +import numpy as np + from ..base_schema import BaseSchema from .label import Label +# Components smaller than this fraction of the opened polygon's area are +# discarded — they're almost always rasterisation noise rather than real +# pieces of the object. +_MIN_COMPONENT_AREA_RATIO = 0.05 + class PolygonLabel(BaseSchema): label: Label points: list[tuple[float, float]] - def __get_normalized_points( - self, width: int, height: int - ) -> list[tuple[float, float]]: + # Points are stored in percentage space (0-100); YOLO label files want + # 0-1 normalised coords. Width/height are unused at the normalisation + # step but kept in the signature for symmetry with other label types. + def __normalized_points(self) -> list[tuple[float, float]]: return [(x / 100, y / 100) for x, y in self.points] - def to_str(self, width: int, height: int) -> str: - normalized_points = self.__get_normalized_points(width, height) + def __split_components( + self, width: int, height: int, kernel_size: int + ) -> list[list[tuple[float, float]]]: + """Rasterise the polygon, apply a morphological opening to break thin + bridges between visually-disconnected pieces, and emit one polygon + (in normalised 0-1 coords) per surviving connected component. + + Falls back to the original polygon when the opening leaves a single + component, when the polygon is degenerate, or when any geometry step + would otherwise produce no usable contour. + """ + if width <= 0 or height <= 0 or kernel_size <= 0: + return [self.__normalized_points()] + + if len(self.points) < 3: + return [self.__normalized_points()] + + pixel_pts = np.array( + [ + (int(round((x / 100) * width)), int(round((y / 100) * height))) + for x, y in self.points + ], + dtype=np.int32, + ) + + mask = np.zeros((height, width), dtype=np.uint8) + cv2.fillPoly(mask, [pixel_pts], 255) + + kernel = cv2.getStructuringElement( + cv2.MORPH_ELLIPSE, (kernel_size, kernel_size) + ) + opened = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel) + + num_labels, comp_labels = cv2.connectedComponents(opened) + # num_labels counts background as label 0, so <= 2 means at most one + # real component survived the opening — no split happened. + if num_labels <= 2: + return [self.__normalized_points()] + + total_area = int(np.count_nonzero(opened)) + if total_area == 0: + return [self.__normalized_points()] + + out: list[list[tuple[float, float]]] = [] + for cid in range(1, num_labels): + comp = np.where(comp_labels == cid, 255, 0).astype(np.uint8) + comp_area = int(np.count_nonzero(comp)) + if comp_area < _MIN_COMPONENT_AREA_RATIO * total_area: + continue + contours, _ = cv2.findContours( + comp, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_TC89_KCOS + ) + if not contours: + continue + biggest = max(contours, key=cv2.contourArea).squeeze() + if biggest.ndim != 2 or biggest.shape[0] < 3: + continue + out.append( + [(float(p[0]) / width, float(p[1]) / height) for p in biggest] + ) + + return out or [self.__normalized_points()] + + def __seg_line(self, normalized_points: list[tuple[float, float]]) -> str: points_str = " ".join(f"{x:.6f} {y:.6f}" for x, y in normalized_points) return f"{self.label.label_number} {points_str}" - def to_bbox_str(self, width: int, height: int) -> str: - """Convert polygon to YOLO bbox format (cx, cy, w, h) normalized.""" - normalized_points = self.__get_normalized_points(width, height) + def __bbox_line(self, normalized_points: list[tuple[float, float]]) -> str: xs = [x for x, y in normalized_points] ys = [y for x, y in normalized_points] x_min = max(0.0, min(xs)) @@ -31,3 +100,31 @@ def to_bbox_str(self, width: int, height: int) -> str: w = x_max - x_min h = y_max - y_min return f"{self.label.label_number} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}" + + def __resolve_polygons( + self, width: int, height: int, split_kernel_size: int | None + ) -> list[list[tuple[float, float]]]: + if split_kernel_size: + return self.__split_components(width, height, split_kernel_size) + return [self.__normalized_points()] + + def to_str_lines( + self, width: int, height: int, split_kernel_size: int | None = None + ) -> list[str]: + polygons = self.__resolve_polygons(width, height, split_kernel_size) + return [self.__seg_line(p) for p in polygons] + + def to_bbox_str_lines( + self, width: int, height: int, split_kernel_size: int | None = None + ) -> list[str]: + polygons = self.__resolve_polygons(width, height, split_kernel_size) + return [self.__bbox_line(p) for p in polygons] + + # Backwards-compatible single-string accessors. The split-aware caller + # (Image.labels_str) uses the *_lines variants directly; these remain + # for any other consumers that expect a single label string. + def to_str(self, width: int, height: int) -> str: + return self.to_str_lines(width, height)[0] + + def to_bbox_str(self, width: int, height: int) -> str: + return self.to_bbox_str_lines(width, height)[0] diff --git a/apps/ml/app/models/polygon_split_config.py b/apps/ml/app/models/polygon_split_config.py new file mode 100644 index 00000000..2b76d344 --- /dev/null +++ b/apps/ml/app/models/polygon_split_config.py @@ -0,0 +1,33 @@ +from pydantic import field_validator + +from .base_schema import BaseSchema + + +class PolygonSplitConfig(BaseSchema): + """Optional dataset-prep step that splits a single polygon annotation into + multiple components when the polygon traces a thin "bridge" between two + visually-separate pieces (current annotation convention for kapie / + sunkovy salam). Each component becomes its own YOLO label line so the + model is trained on natural single-blob predictions instead of a single + instance that spans empty space. + + Disabled unless explicitly enabled on the model's customHyperparams. + """ + + enabled: bool = False + # Internal class indices (0-based) that should be subject to the split. + # Translated by the API from human-readable label names before sending. + class_indices: list[int] = [] + # Square structuring element size, in pixels of the original image. Must + # be a positive odd integer so the morphological opening kernel has a + # well-defined centre. + kernel_size: int = 9 + + @field_validator("kernel_size") + @classmethod + def _kernel_must_be_positive_odd(cls, v: int) -> int: + if v <= 0: + raise ValueError(f"kernel_size must be positive, got {v}") + if v % 2 == 0: + raise ValueError(f"kernel_size must be odd, got {v}") + return v diff --git a/apps/ml/app/models/training_config.py b/apps/ml/app/models/training_config.py index fb7d451a..9c32ed33 100644 --- a/apps/ml/app/models/training_config.py +++ b/apps/ml/app/models/training_config.py @@ -1,6 +1,7 @@ from .base_schema import BaseSchema from .dataset_config import DatasetConfig from .model_type import ModelOutputType +from .polygon_split_config import PolygonSplitConfig class OutputUpload(BaseSchema): @@ -15,3 +16,7 @@ class TrainingConfig(BaseSchema): dataset_config: DatasetConfig output_types: list[ModelOutputType] custom_hyperparams: dict = {} + # Optional dataset-prep tweak that splits "bridged" polygon annotations + # into one label entry per visually-disconnected piece. See + # PolygonSplitConfig for the full rationale. + polygon_split: PolygonSplitConfig | None = None From 16031fdfeac1ea23759b718bc1544e1eb0156370 Mon Sep 17 00:00:00 2001 From: David Stranava Date: Mon, 25 May 2026 00:13:19 +0200 Subject: [PATCH 2/2] ML yolo magic algorithm --- apps/ml-yolo/app/ml/dataset.py | 11 +- apps/ml-yolo/app/ml/train_model.py | 1 + apps/ml-yolo/app/models/image.py | 22 +++- .../app/models/labels/polygon_label.py | 113 ++++++++++++++++-- .../app/models/polygon_split_config.py | 33 +++++ apps/ml-yolo/app/models/training_config.py | 5 + 6 files changed, 172 insertions(+), 13 deletions(-) create mode 100644 apps/ml-yolo/app/models/polygon_split_config.py diff --git a/apps/ml-yolo/app/ml/dataset.py b/apps/ml-yolo/app/ml/dataset.py index ad266477..483eee28 100644 --- a/apps/ml-yolo/app/ml/dataset.py +++ b/apps/ml-yolo/app/ml/dataset.py @@ -8,6 +8,7 @@ from ..models.dataset_config import DatasetConfig from ..models.image import Image from ..models.model_type import ModelType +from ..models.polygon_split_config import PolygonSplitConfig DATASET_DIR = "dataset" DATASET_CONFIG = "dataset_config.yml" @@ -86,7 +87,11 @@ def prepare_classification_directory( def prepare_dataset( - dir: str, images: list[Image], config: DatasetConfig, task_type: ModelType + dir: str, + images: list[Image], + config: DatasetConfig, + task_type: ModelType, + polygon_split: PolygonSplitConfig | None = None, ): global VAL_DIR dir = f"{dir}/{DATASET_DIR}" @@ -112,4 +117,6 @@ def prepare_dataset( ) copy_image(image, f"{image_dir}/{curr_dir}") with open(f"{label_dir}/{curr_dir}/{label_filename}", "w") as f: - f.write("\n".join(image.labels_str(task_type))) + f.write( + "\n".join(image.labels_str(task_type, polygon_split=polygon_split)) + ) diff --git a/apps/ml-yolo/app/ml/train_model.py b/apps/ml-yolo/app/ml/train_model.py index 98e04870..5aab628c 100644 --- a/apps/ml-yolo/app/ml/train_model.py +++ b/apps/ml-yolo/app/ml/train_model.py @@ -202,6 +202,7 @@ def run_training(config: ModelConfig) -> None: config.data, config.training_config.dataset_config, config.type, + polygon_split=config.training_config.polygon_split, ) # 2) Optional deterministic preprocessings. diff --git a/apps/ml-yolo/app/models/image.py b/apps/ml-yolo/app/models/image.py index df19f6f7..915c37f3 100644 --- a/apps/ml-yolo/app/models/image.py +++ b/apps/ml-yolo/app/models/image.py @@ -7,6 +7,7 @@ from .labels.polygon_label import PolygonLabel from .labels.rectangle_label import RectangleLabel from .model_type import ModelType +from .polygon_split_config import PolygonSplitConfig class Image(BaseSchema): @@ -15,16 +16,31 @@ class Image(BaseSchema): height: int labels: list[Union[ClassificationLabel, PolygonLabel, RectangleLabel]] = [] - def labels_str(self, model_type: ModelType | None = None) -> list[str]: + def labels_str( + self, + model_type: ModelType | None = None, + polygon_split: PolygonSplitConfig | None = None, + ) -> list[str]: result = [] for label in self.labels: if isinstance(label, ClassificationLabel): result.append(label.to_str(self.width, self.height)) elif isinstance(label, PolygonLabel): + kernel = ( + polygon_split.kernel_size + if polygon_split + and polygon_split.enabled + and label.label.label_number in polygon_split.class_indices + else None + ) if model_type == ModelType.DETECTION: - result.append(label.to_bbox_str(self.width, self.height)) + result.extend( + label.to_bbox_str_lines(self.width, self.height, kernel) + ) else: - result.append(label.to_str(self.width, self.height)) + result.extend( + label.to_str_lines(self.width, self.height, kernel) + ) elif isinstance(label, RectangleLabel): result.append(label.to_str(self.width, self.height)) return result diff --git a/apps/ml-yolo/app/models/labels/polygon_label.py b/apps/ml-yolo/app/models/labels/polygon_label.py index 382b0478..5ea2db4f 100644 --- a/apps/ml-yolo/app/models/labels/polygon_label.py +++ b/apps/ml-yolo/app/models/labels/polygon_label.py @@ -1,25 +1,94 @@ +import cv2 +import numpy as np + from ..base_schema import BaseSchema from .label import Label +# Components smaller than this fraction of the opened polygon's area are +# discarded — they're almost always rasterisation noise rather than real +# pieces of the object. +_MIN_COMPONENT_AREA_RATIO = 0.05 + class PolygonLabel(BaseSchema): label: Label points: list[tuple[float, float]] - def __get_normalized_points( - self, width: int, height: int - ) -> list[tuple[float, float]]: + # Points are stored in percentage space (0-100); YOLO label files want + # 0-1 normalised coords. Width/height are unused at the normalisation + # step but kept in the signature for symmetry with other label types. + def __normalized_points(self) -> list[tuple[float, float]]: return [(x / 100, y / 100) for x, y in self.points] - def to_str(self, width: int, height: int) -> str: - normalized_points = self.__get_normalized_points(width, height) + def __split_components( + self, width: int, height: int, kernel_size: int + ) -> list[list[tuple[float, float]]]: + """Rasterise the polygon, apply a morphological opening to break thin + bridges between visually-disconnected pieces, and emit one polygon + (in normalised 0-1 coords) per surviving connected component. + + Falls back to the original polygon when the opening leaves a single + component, when the polygon is degenerate, or when any geometry step + would otherwise produce no usable contour. + """ + if width <= 0 or height <= 0 or kernel_size <= 0: + return [self.__normalized_points()] + + if len(self.points) < 3: + return [self.__normalized_points()] + + pixel_pts = np.array( + [ + (int(round((x / 100) * width)), int(round((y / 100) * height))) + for x, y in self.points + ], + dtype=np.int32, + ) + + mask = np.zeros((height, width), dtype=np.uint8) + cv2.fillPoly(mask, [pixel_pts], 255) + + kernel = cv2.getStructuringElement( + cv2.MORPH_ELLIPSE, (kernel_size, kernel_size) + ) + opened = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel) + + num_labels, comp_labels = cv2.connectedComponents(opened) + # num_labels counts background as label 0, so <= 2 means at most one + # real component survived the opening — no split happened. + if num_labels <= 2: + return [self.__normalized_points()] + + total_area = int(np.count_nonzero(opened)) + if total_area == 0: + return [self.__normalized_points()] + + out: list[list[tuple[float, float]]] = [] + for cid in range(1, num_labels): + comp = np.where(comp_labels == cid, 255, 0).astype(np.uint8) + comp_area = int(np.count_nonzero(comp)) + if comp_area < _MIN_COMPONENT_AREA_RATIO * total_area: + continue + contours, _ = cv2.findContours( + comp, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_TC89_KCOS + ) + if not contours: + continue + biggest = max(contours, key=cv2.contourArea).squeeze() + if biggest.ndim != 2 or biggest.shape[0] < 3: + continue + out.append( + [(float(p[0]) / width, float(p[1]) / height) for p in biggest] + ) + + return out or [self.__normalized_points()] + + def __seg_line(self, normalized_points: list[tuple[float, float]]) -> str: points_str = " ".join(f"{x:.6f} {y:.6f}" for x, y in normalized_points) return f"{self.label.label_number} {points_str}" - def to_bbox_str(self, width: int, height: int) -> str: - """Convert polygon to YOLO bbox format (cx, cy, w, h) normalized.""" - normalized_points = self.__get_normalized_points(width, height) + def __bbox_line(self, normalized_points: list[tuple[float, float]]) -> str: xs = [x for x, y in normalized_points] ys = [y for x, y in normalized_points] x_min = max(0.0, min(xs)) @@ -31,3 +100,31 @@ def to_bbox_str(self, width: int, height: int) -> str: w = x_max - x_min h = y_max - y_min return f"{self.label.label_number} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}" + + def __resolve_polygons( + self, width: int, height: int, split_kernel_size: int | None + ) -> list[list[tuple[float, float]]]: + if split_kernel_size: + return self.__split_components(width, height, split_kernel_size) + return [self.__normalized_points()] + + def to_str_lines( + self, width: int, height: int, split_kernel_size: int | None = None + ) -> list[str]: + polygons = self.__resolve_polygons(width, height, split_kernel_size) + return [self.__seg_line(p) for p in polygons] + + def to_bbox_str_lines( + self, width: int, height: int, split_kernel_size: int | None = None + ) -> list[str]: + polygons = self.__resolve_polygons(width, height, split_kernel_size) + return [self.__bbox_line(p) for p in polygons] + + # Backwards-compatible single-string accessors. The split-aware caller + # (Image.labels_str) uses the *_lines variants directly; these remain + # for any other consumers that expect a single label string. + def to_str(self, width: int, height: int) -> str: + return self.to_str_lines(width, height)[0] + + def to_bbox_str(self, width: int, height: int) -> str: + return self.to_bbox_str_lines(width, height)[0] diff --git a/apps/ml-yolo/app/models/polygon_split_config.py b/apps/ml-yolo/app/models/polygon_split_config.py new file mode 100644 index 00000000..2b76d344 --- /dev/null +++ b/apps/ml-yolo/app/models/polygon_split_config.py @@ -0,0 +1,33 @@ +from pydantic import field_validator + +from .base_schema import BaseSchema + + +class PolygonSplitConfig(BaseSchema): + """Optional dataset-prep step that splits a single polygon annotation into + multiple components when the polygon traces a thin "bridge" between two + visually-separate pieces (current annotation convention for kapie / + sunkovy salam). Each component becomes its own YOLO label line so the + model is trained on natural single-blob predictions instead of a single + instance that spans empty space. + + Disabled unless explicitly enabled on the model's customHyperparams. + """ + + enabled: bool = False + # Internal class indices (0-based) that should be subject to the split. + # Translated by the API from human-readable label names before sending. + class_indices: list[int] = [] + # Square structuring element size, in pixels of the original image. Must + # be a positive odd integer so the morphological opening kernel has a + # well-defined centre. + kernel_size: int = 9 + + @field_validator("kernel_size") + @classmethod + def _kernel_must_be_positive_odd(cls, v: int) -> int: + if v <= 0: + raise ValueError(f"kernel_size must be positive, got {v}") + if v % 2 == 0: + raise ValueError(f"kernel_size must be odd, got {v}") + return v diff --git a/apps/ml-yolo/app/models/training_config.py b/apps/ml-yolo/app/models/training_config.py index 1ea4e6d1..4349d6b1 100644 --- a/apps/ml-yolo/app/models/training_config.py +++ b/apps/ml-yolo/app/models/training_config.py @@ -1,6 +1,7 @@ from .base_schema import BaseSchema from .dataset_config import DatasetConfig from .model_type import ModelOutputType +from .polygon_split_config import PolygonSplitConfig class OutputUpload(BaseSchema): @@ -21,3 +22,7 @@ class TrainingConfig(BaseSchema): # we don't ship a calibration dataset). Optional/defaulted so the # field stays backward-compatible with older API builds. quantization: str = "FP16" + # Optional dataset-prep tweak that splits "bridged" polygon annotations + # into one label entry per visually-disconnected piece. See + # PolygonSplitConfig for the full rationale. + polygon_split: PolygonSplitConfig | None = None