From 3dedbae1b64b21b0988f70908c169a60e781c44e Mon Sep 17 00:00:00 2001 From: fcdl94 Date: Fri, 3 Oct 2025 12:36:37 +0000 Subject: [PATCH 1/7] feat(system): enhance archive extraction by removing __MACOSX directory and validating train/validation splits - Added functionality to remove the __MACOSX directory after extraction. - Implemented checks to ensure the presence of 'train' and 'validation' directories in the extracted content, raising errors if not found. --- focoos/utils/system.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/focoos/utils/system.py b/focoos/utils/system.py index 8217e6f8..fdfd1564 100644 --- a/focoos/utils/system.py +++ b/focoos/utils/system.py @@ -1,6 +1,7 @@ import importlib.metadata as metadata import os import platform +import shutil import subprocess import sys import tarfile @@ -340,9 +341,25 @@ def extract_archive( logger.info(f"[elapsed {t1 - t0:.3f} ] Extracted archive to: {extracted_dir}") comm.synchronize() + # Remove __MACOSX directory + if "__MACOSX" in os.listdir(extracted_dir): + shutil.rmtree(os.path.join(extracted_dir, "__MACOSX")) + if len(list_dir(extracted_dir)) == 1: extracted_dir = list_dir(extracted_dir)[0] + POSSIBLE_TRAIN_DIRS = ["train", "training"] + POSSIBLE_VAL_DIRS = ["valid", "val", "validation"] + if len(list_dir(extracted_dir)) > 1: + if not any(dir in POSSIBLE_TRAIN_DIRS for dir in os.listdir(extracted_dir)): + raise FileNotFoundError( + f"Train split not found in {extracted_dir}: {[str(x) for x in list_dir(extracted_dir)]}. You should provide a zip dataset with only a root folder or train and val subfolders." + ) + if not any(dir in POSSIBLE_VAL_DIRS for dir in os.listdir(extracted_dir)): + raise FileNotFoundError( + f"Validation split not found in {extracted_dir}: {[str(x) for x in list_dir(extracted_dir)]}. You should provide a zip dataset with only a root folder or train and val subfolders." + ) + # Optionally delete the original archive if delete_original: os.remove(archive_path) From 058b6b2c47e6a34a0adc902336d414d1ceba4991 Mon Sep 17 00:00:00 2001 From: fcdl94 Date: Fri, 3 Oct 2025 14:27:14 +0000 Subject: [PATCH 2/7] refactor(system): rename list_dir to list_directories and streamline directory validation - Renamed the function `list_dir` to `list_directories` for clarity. - Updated the extraction logic to use the new function name. - Improved validation checks for 'train' and 'validation' directories by using `Path` objects for better consistency and readability. --- focoos/utils/system.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/focoos/utils/system.py b/focoos/utils/system.py index fdfd1564..70ab5af8 100644 --- a/focoos/utils/system.py +++ b/focoos/utils/system.py @@ -267,7 +267,7 @@ def is_inside_sagemaker(): return res -def list_dir(base_directory: Union[str, Path]) -> List[Path]: +def list_directories(base_directory: Union[str, Path]) -> List[Path]: """ A function that lists directories within a base directory. @@ -311,10 +311,9 @@ def extract_archive( # Determine the extraction path t0 = time.time() base_dir = os.path.dirname(archive_path) + extracted_dir = base_dir if destination is not None: extracted_dir = os.path.join(base_dir, destination) - else: - extracted_dir = base_dir if comm.is_main_process(): logger.info(f"Extracting archive: {archive_path} to {extracted_dir}") @@ -345,20 +344,20 @@ def extract_archive( if "__MACOSX" in os.listdir(extracted_dir): shutil.rmtree(os.path.join(extracted_dir, "__MACOSX")) - if len(list_dir(extracted_dir)) == 1: - extracted_dir = list_dir(extracted_dir)[0] + if len(list_directories(extracted_dir)) == 1: + extracted_dir = list_directories(extracted_dir)[0] POSSIBLE_TRAIN_DIRS = ["train", "training"] POSSIBLE_VAL_DIRS = ["valid", "val", "validation"] - if len(list_dir(extracted_dir)) > 1: - if not any(dir in POSSIBLE_TRAIN_DIRS for dir in os.listdir(extracted_dir)): - raise FileNotFoundError( - f"Train split not found in {extracted_dir}: {[str(x) for x in list_dir(extracted_dir)]}. You should provide a zip dataset with only a root folder or train and val subfolders." - ) - if not any(dir in POSSIBLE_VAL_DIRS for dir in os.listdir(extracted_dir)): - raise FileNotFoundError( - f"Validation split not found in {extracted_dir}: {[str(x) for x in list_dir(extracted_dir)]}. You should provide a zip dataset with only a root folder or train and val subfolders." - ) + inner_dirs = list_directories(extracted_dir) + if not any(dir.name in POSSIBLE_TRAIN_DIRS for dir in inner_dirs): + raise FileNotFoundError( + f"Train split not found in {extracted_dir}: {[str(x) for x in inner_dirs]}. You should provide a zip dataset with only a root folder or train and val subfolders." + ) + if not any(dir.name in POSSIBLE_VAL_DIRS for dir in inner_dirs): + raise FileNotFoundError( + f"Validation split not found in {extracted_dir}: {[str(x) for x in inner_dirs]}. You should provide a zip dataset with only a root folder or train and val subfolders." + ) # Optionally delete the original archive if delete_original: From 1ebec241712b6dd1362781b656c89d0f4fa69821 Mon Sep 17 00:00:00 2001 From: fcdl94 Date: Fri, 3 Oct 2025 17:09:31 +0200 Subject: [PATCH 3/7] feat(device): enhance MPS support - Updated `get_gpus_count` to support MPS alongside CUDA. - Modified default value of `amp_enabled` in training and validation functions to False. - Improved model loading logic to handle MPS and CUDA availability with appropriate error handling. --- focoos/cli/cli.py | 4 ++-- focoos/models/focoos_model.py | 17 ++++++++++++----- focoos/ports.py | 11 ++++++++--- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/focoos/cli/cli.py b/focoos/cli/cli.py index 0b75139e..bb1bd543 100644 --- a/focoos/cli/cli.py +++ b/focoos/cli/cli.py @@ -259,7 +259,7 @@ def train( num_gpus: Annotated[int, typer.Option(help="Number of GPUs to use")] = get_gpus_count(), device: Annotated[str, typer.Option(help="Device to use")] = "cuda", workers: Annotated[int, typer.Option(help="Number of workers")] = 4, - amp_enabled: Annotated[bool, typer.Option(help="Enable automatic mixed precision")] = True, + amp_enabled: Annotated[bool, typer.Option(help="Enable automatic mixed precision")] = False, ddp_broadcast_buffers: Annotated[bool, typer.Option(help="Broadcast buffers in DDP")] = False, ddp_find_unused: Annotated[bool, typer.Option(help="Find unused parameters in DDP")] = True, checkpointer_period: Annotated[int, typer.Option(help="Checkpoint save period")] = 1000, @@ -502,7 +502,7 @@ def val( num_gpus: Annotated[int, typer.Option(help="Number of GPUs")] = get_gpus_count(), device: Annotated[str, typer.Option(help="Device")] = "cuda", workers: Annotated[int, typer.Option(help="Number of workers")] = 4, - amp_enabled: Annotated[bool, typer.Option(help="Enable AMP")] = True, + amp_enabled: Annotated[bool, typer.Option(help="Enable AMP")] = False, ddp_broadcast_buffers: Annotated[bool, typer.Option(help="DDP broadcast buffers")] = False, ddp_find_unused: Annotated[bool, typer.Option(help="DDP find unused")] = True, checkpointer_period: Annotated[int, typer.Option(help="Checkpointer period")] = 1000, diff --git a/focoos/models/focoos_model.py b/focoos/models/focoos_model.py index ad4ade7d..efcc1a29 100644 --- a/focoos/models/focoos_model.py +++ b/focoos/models/focoos_model.py @@ -114,10 +114,17 @@ def __init__(self, model: BaseModelNN, model_info: ModelInfo): self.processor.eval() self.model = model.eval() - try: - self.model = self.model.cuda() - except Exception: - logger.warning("Unable to use CUDA") + if torch.cuda.is_available(): + try: + self.model = self.model.cuda() + except Exception: + logger.warning("Unable to use CUDA") + + if torch.backends.mps.is_available(): + try: + self.model = self.model.to(device="mps") + except Exception: + logger.warning("Unable to use MPS") if self.model_info.weights_uri: self._load_weights() @@ -392,7 +399,7 @@ def export( runtime_type: RuntimeType = RuntimeType.TORCHSCRIPT_32, onnx_opset: int = 18, out_dir: Optional[str] = None, - device: Literal["cuda", "cpu", "auto"] = "auto", + device: Literal["cuda", "cpu", "mps", "auto"] = "auto", simplify_onnx: bool = True, overwrite: bool = True, image_size: Optional[Union[int, Tuple[int, int]]] = None, diff --git a/focoos/ports.py b/focoos/ports.py index 34164d37..73902fc5 100644 --- a/focoos/ports.py +++ b/focoos/ports.py @@ -950,16 +950,21 @@ class DatasetSplitType(str, Enum): def get_gpus_count(): try: - import torch.cuda + import torch - return torch.cuda.device_count() + if torch.backends.mps.is_available(): + return 1 + elif torch.cuda.is_available(): + return torch.cuda.device_count() + else: + return 0 except ImportError: return 0 SchedulerType = Literal["POLY", "FIXED", "COSINE", "MULTISTEP"] OptimizerType = Literal["ADAMW", "SGD", "RMSPROP"] -DeviceType = Literal["cuda", "cpu"] +DeviceType = Literal["cuda", "cpu", "mps"] @dataclass From 0c23ca05ec19b11227df50b8211a0867ec6f2f98 Mon Sep 17 00:00:00 2001 From: fcdl94 Date: Fri, 3 Oct 2025 17:32:33 +0200 Subject: [PATCH 4/7] fix(amp): enable automatic mixed precision by default and add CUDA availability check - Changed default value of `amp_enabled` to True in training and validation functions. - Added a check to ensure CUDA is available when using AMP, with a warning if not. --- focoos/cli/cli.py | 4 ++-- focoos/trainer/trainer.py | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/focoos/cli/cli.py b/focoos/cli/cli.py index bb1bd543..0b75139e 100644 --- a/focoos/cli/cli.py +++ b/focoos/cli/cli.py @@ -259,7 +259,7 @@ def train( num_gpus: Annotated[int, typer.Option(help="Number of GPUs to use")] = get_gpus_count(), device: Annotated[str, typer.Option(help="Device to use")] = "cuda", workers: Annotated[int, typer.Option(help="Number of workers")] = 4, - amp_enabled: Annotated[bool, typer.Option(help="Enable automatic mixed precision")] = False, + amp_enabled: Annotated[bool, typer.Option(help="Enable automatic mixed precision")] = True, ddp_broadcast_buffers: Annotated[bool, typer.Option(help="Broadcast buffers in DDP")] = False, ddp_find_unused: Annotated[bool, typer.Option(help="Find unused parameters in DDP")] = True, checkpointer_period: Annotated[int, typer.Option(help="Checkpoint save period")] = 1000, @@ -502,7 +502,7 @@ def val( num_gpus: Annotated[int, typer.Option(help="Number of GPUs")] = get_gpus_count(), device: Annotated[str, typer.Option(help="Device")] = "cuda", workers: Annotated[int, typer.Option(help="Number of workers")] = 4, - amp_enabled: Annotated[bool, typer.Option(help="Enable AMP")] = False, + amp_enabled: Annotated[bool, typer.Option(help="Enable AMP")] = True, ddp_broadcast_buffers: Annotated[bool, typer.Option(help="DDP broadcast buffers")] = False, ddp_find_unused: Annotated[bool, typer.Option(help="DDP find unused")] = True, checkpointer_period: Annotated[int, typer.Option(help="Checkpointer period")] = 1000, diff --git a/focoos/trainer/trainer.py b/focoos/trainer/trainer.py index 79c8e97f..3ce4ac5d 100644 --- a/focoos/trainer/trainer.py +++ b/focoos/trainer/trainer.py @@ -632,7 +632,7 @@ def __init__( self.zero_grad_before_forward = zero_grad_before_forward # AMP setup - if amp: + if amp and torch.cuda.is_available(): if grad_scaler is None: # the init_scale avoids the first step to be too large # and the scheduler.step() warning @@ -725,8 +725,10 @@ def run_step(self): if self.zero_grad_before_forward: self.optimizer.zero_grad() - if self.amp: - assert torch.cuda.is_available(), "[UnifiedTrainerLoop] CUDA is required for AMP training!" + if not torch.cuda.is_available(): + logger.warning("[UnifiedTrainerLoop] CUDA is not available, training without AMP!") + + if self.amp and torch.cuda.is_available(): with autocast(enabled=self.amp, dtype=self.precision, device_type="cuda"): # we need to have preprocess data here images, targets = self.processor.preprocess(data, dtype=self.precision, device=self.model.device) From b64f9999dafca21c6aa7f08e304c57f0c8679a65 Mon Sep 17 00:00:00 2001 From: fcdl94 Date: Fri, 3 Oct 2025 18:06:57 +0200 Subject: [PATCH 5/7] feat(device): improve device selection logic for training - Enhanced device selection to prioritize CUDA, MPS, or fallback to CPU based on availability. - Removed redundant CUDA availability check from the training loop. --- focoos/models/focoos_model.py | 11 ++++++++++- focoos/trainer/trainer.py | 9 ++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/focoos/models/focoos_model.py b/focoos/models/focoos_model.py index efcc1a29..09099cdb 100644 --- a/focoos/models/focoos_model.py +++ b/focoos/models/focoos_model.py @@ -160,8 +160,17 @@ def _setup_model_for_training(self, train_args: TrainerArgs, data_train: MapData """ device = get_cpu_name() system_info = get_system_info() - if system_info.gpu_info and system_info.gpu_info.devices and len(system_info.gpu_info.devices) > 0: + if ( + train_args.device == "cuda" + and system_info.gpu_info + and system_info.gpu_info.devices + and len(system_info.gpu_info.devices) > 0 + ): device = system_info.gpu_info.devices[0].gpu_name + elif train_args.device == "mps" and torch.backends.mps.is_available(): + device = "mps" + else: + device = "cpu" self.model_info.ref = None self.model_info.train_args = train_args # type: ignore diff --git a/focoos/trainer/trainer.py b/focoos/trainer/trainer.py index 3ce4ac5d..d0bef2bf 100644 --- a/focoos/trainer/trainer.py +++ b/focoos/trainer/trainer.py @@ -77,6 +77,8 @@ def train( self.resume = args.resume self.finished = False + self.model.to(self.args.device) + self.args.run_name = self.args.run_name.strip() # Setup logging and environment self.output_dir = os.path.join(self.args.output_dir, self.args.run_name) @@ -631,6 +633,10 @@ def __init__( self.gather_metric_period = gather_metric_period self.zero_grad_before_forward = zero_grad_before_forward + if not torch.cuda.is_available(): + logger.warning("[UnifiedTrainerLoop] CUDA is not available, training without AMP!") + amp = False + # AMP setup if amp and torch.cuda.is_available(): if grad_scaler is None: @@ -725,9 +731,6 @@ def run_step(self): if self.zero_grad_before_forward: self.optimizer.zero_grad() - if not torch.cuda.is_available(): - logger.warning("[UnifiedTrainerLoop] CUDA is not available, training without AMP!") - if self.amp and torch.cuda.is_available(): with autocast(enabled=self.amp, dtype=self.precision, device_type="cuda"): # we need to have preprocess data here From 6724fabd3468a93da7ef7c28f2dcc97427c17786 Mon Sep 17 00:00:00 2001 From: fcdl94 Date: Sat, 4 Oct 2025 09:26:37 +0200 Subject: [PATCH 6/7] feat(hub): add IMAGE_CLASSIFIER to supported model families - Expanded the list of supported model families in focoos_hub.py to include IMAGE_CLASSIFIER for enhanced functionality. --- focoos/hub/focoos_hub.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/focoos/hub/focoos_hub.py b/focoos/hub/focoos_hub.py index 5cbb3717..1d165c91 100644 --- a/focoos/hub/focoos_hub.py +++ b/focoos/hub/focoos_hub.py @@ -37,7 +37,12 @@ logger = get_logger("HUB") -SUPPORTED_MODEL_FAMILIES = [ModelFamily.BISENETFORMER, ModelFamily.DETR, ModelFamily.MASKFORMER] +SUPPORTED_MODEL_FAMILIES = [ + ModelFamily.BISENETFORMER, + ModelFamily.DETR, + ModelFamily.MASKFORMER, + ModelFamily.IMAGE_CLASSIFIER, +] class FocoosHUB: From a4412be7fb4daa0047c412b0f0de65ff7e723f19 Mon Sep 17 00:00:00 2001 From: Ivan Murabito Date: Tue, 21 Oct 2025 08:41:07 +0000 Subject: [PATCH 7/7] feat(gpu): add MPS availability to GPUInfo and update focoos version - Introduced a new field `mps_available` in the `GPUInfo` class to indicate MPS support. - Updated the `get_gpu_info` function to populate the `mps_available` field based on the availability of MPS. - Bumped focoos version to 0.22.0 in the lock file. --- focoos/ports.py | 2 ++ focoos/utils/system.py | 2 +- uv.lock | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/focoos/ports.py b/focoos/ports.py index 73902fc5..c319d9aa 100644 --- a/focoos/ports.py +++ b/focoos/ports.py @@ -676,6 +676,7 @@ class GPUInfo(PydanticBase): gpu_cuda_version: Optional[str] = None total_gpu_memory_gb: Optional[float] = None devices: Optional[list[GPUDevice]] = None + mps_available: Optional[bool] = None class SystemInfo(PydanticBase): @@ -743,6 +744,7 @@ def pprint(self, level: Literal["INFO", "DEBUG"] = "DEBUG"): output_lines.append(f" - total_memory_gb: {value.get('total_gpu_memory_gb')} GB") output_lines.append(f" - gpu_driver: {value.get('gpu_driver')}") output_lines.append(f" - gpu_cuda_version: {value.get('gpu_cuda_version')}") + output_lines.append(f" - mps_available: {value.get('mps_available')}") if value.get("devices"): output_lines.append(" - devices:") for device in value.get("devices", []): diff --git a/focoos/utils/system.py b/focoos/utils/system.py index 70ab5af8..295046c1 100644 --- a/focoos/utils/system.py +++ b/focoos/utils/system.py @@ -66,7 +66,7 @@ def get_gpu_info() -> GPUInfo: GPUInfo: An object containing comprehensive GPU information including devices list, driver version, CUDA version and GPU count. """ - gpu_info = GPUInfo() + gpu_info = GPUInfo(mps_available=torch.backends.mps.is_available()) gpus_device = [] try: # Get all GPU information in a single query diff --git a/uv.lock b/uv.lock index 852ee1bc..a0581cb9 100644 --- a/uv.lock +++ b/uv.lock @@ -677,7 +677,7 @@ wheels = [ [[package]] name = "focoos" -version = "0.20.2" +version = "0.22.0" source = { editable = "." } dependencies = [ { name = "colorama" },