From 3dedbae1b64b21b0988f70908c169a60e781c44e Mon Sep 17 00:00:00 2001
From: fcdl94 <f.cermelli94@gmail.com>
Date: Fri, 3 Oct 2025 12:36:37 +0000
Subject: [PATCH 1/7] feat(system): enhance archive extraction by removing
 __MACOSX directory and validating train/validation splits

- Added functionality to remove the __MACOSX directory after extraction.
- Implemented checks to ensure the presence of 'train' and 'validation' directories in the extracted content, raising errors if not found.
---
 focoos/utils/system.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/focoos/utils/system.py b/focoos/utils/system.py
index 8217e6f8..fdfd1564 100644
--- a/focoos/utils/system.py
+++ b/focoos/utils/system.py
@@ -1,6 +1,7 @@
 import importlib.metadata as metadata
 import os
 import platform
+import shutil
 import subprocess
 import sys
 import tarfile
@@ -340,9 +341,25 @@ def extract_archive(
         logger.info(f"[elapsed {t1 - t0:.3f} ] Extracted archive to: {extracted_dir}")
 
     comm.synchronize()
+    # Remove __MACOSX directory
+    if "__MACOSX" in os.listdir(extracted_dir):
+        shutil.rmtree(os.path.join(extracted_dir, "__MACOSX"))
+
     if len(list_dir(extracted_dir)) == 1:
         extracted_dir = list_dir(extracted_dir)[0]
 
+    POSSIBLE_TRAIN_DIRS = ["train", "training"]
+    POSSIBLE_VAL_DIRS = ["valid", "val", "validation"]
+    if len(list_dir(extracted_dir)) > 1:
+        if not any(dir in POSSIBLE_TRAIN_DIRS for dir in os.listdir(extracted_dir)):
+            raise FileNotFoundError(
+                f"Train split not found in {extracted_dir}: {[str(x) for x in list_dir(extracted_dir)]}. You should provide a zip dataset with only a root folder or train and val subfolders."
+            )
+        if not any(dir in POSSIBLE_VAL_DIRS for dir in os.listdir(extracted_dir)):
+            raise FileNotFoundError(
+                f"Validation split not found in {extracted_dir}: {[str(x) for x in list_dir(extracted_dir)]}. You should provide a zip dataset with only a root folder or train and val subfolders."
+            )
+
     # Optionally delete the original archive
     if delete_original:
         os.remove(archive_path)

From 058b6b2c47e6a34a0adc902336d414d1ceba4991 Mon Sep 17 00:00:00 2001
From: fcdl94 <f.cermelli94@gmail.com>
Date: Fri, 3 Oct 2025 14:27:14 +0000
Subject: [PATCH 2/7] refactor(system): rename list_dir to list_directories and
 streamline directory validation

- Renamed the function `list_dir` to `list_directories` for clarity.
- Updated the extraction logic to use the new function name.
- Improved validation checks for 'train' and 'validation' directories by using `Path` objects for better consistency and readability.
---
 focoos/utils/system.py | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/focoos/utils/system.py b/focoos/utils/system.py
index fdfd1564..70ab5af8 100644
--- a/focoos/utils/system.py
+++ b/focoos/utils/system.py
@@ -267,7 +267,7 @@ def is_inside_sagemaker():
     return res
 
 
-def list_dir(base_directory: Union[str, Path]) -> List[Path]:
+def list_directories(base_directory: Union[str, Path]) -> List[Path]:
     """
     A function that lists directories within a base directory.
 
@@ -311,10 +311,9 @@ def extract_archive(
     # Determine the extraction path
     t0 = time.time()
     base_dir = os.path.dirname(archive_path)
+    extracted_dir = base_dir
     if destination is not None:
         extracted_dir = os.path.join(base_dir, destination)
-    else:
-        extracted_dir = base_dir
 
     if comm.is_main_process():
         logger.info(f"Extracting archive: {archive_path} to {extracted_dir}")
@@ -345,20 +344,20 @@ def extract_archive(
     if "__MACOSX" in os.listdir(extracted_dir):
         shutil.rmtree(os.path.join(extracted_dir, "__MACOSX"))
 
-    if len(list_dir(extracted_dir)) == 1:
-        extracted_dir = list_dir(extracted_dir)[0]
+    if len(list_directories(extracted_dir)) == 1:
+        extracted_dir = list_directories(extracted_dir)[0]
 
     POSSIBLE_TRAIN_DIRS = ["train", "training"]
     POSSIBLE_VAL_DIRS = ["valid", "val", "validation"]
-    if len(list_dir(extracted_dir)) > 1:
-        if not any(dir in POSSIBLE_TRAIN_DIRS for dir in os.listdir(extracted_dir)):
-            raise FileNotFoundError(
-                f"Train split not found in {extracted_dir}: {[str(x) for x in list_dir(extracted_dir)]}. You should provide a zip dataset with only a root folder or train and val subfolders."
-            )
-        if not any(dir in POSSIBLE_VAL_DIRS for dir in os.listdir(extracted_dir)):
-            raise FileNotFoundError(
-                f"Validation split not found in {extracted_dir}: {[str(x) for x in list_dir(extracted_dir)]}. You should provide a zip dataset with only a root folder or train and val subfolders."
-            )
+    inner_dirs = list_directories(extracted_dir)
+    if not any(dir.name in POSSIBLE_TRAIN_DIRS for dir in inner_dirs):
+        raise FileNotFoundError(
+            f"Train split not found in {extracted_dir}: {[str(x) for x in inner_dirs]}. You should provide a zip dataset with only a root folder or train and val subfolders."
+        )
+    if not any(dir.name in POSSIBLE_VAL_DIRS for dir in inner_dirs):
+        raise FileNotFoundError(
+            f"Validation split not found in {extracted_dir}: {[str(x) for x in inner_dirs]}. You should provide a zip dataset with only a root folder or train and val subfolders."
+        )
 
     # Optionally delete the original archive
     if delete_original:

From 1ebec241712b6dd1362781b656c89d0f4fa69821 Mon Sep 17 00:00:00 2001
From: fcdl94 <f.cermelli94@gmail.com>
Date: Fri, 3 Oct 2025 17:09:31 +0200
Subject: [PATCH 3/7] feat(device): enhance MPS support

- Updated `get_gpus_count` to support MPS alongside CUDA.
- Modified default value of `amp_enabled` in training and validation functions to False.
- Improved model loading logic to handle MPS and CUDA availability with appropriate error handling.
---
 focoos/cli/cli.py             |  4 ++--
 focoos/models/focoos_model.py | 17 ++++++++++++-----
 focoos/ports.py               | 11 ++++++++---
 3 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/focoos/cli/cli.py b/focoos/cli/cli.py
index 0b75139e..bb1bd543 100644
--- a/focoos/cli/cli.py
+++ b/focoos/cli/cli.py
@@ -259,7 +259,7 @@ def train(
     num_gpus: Annotated[int, typer.Option(help="Number of GPUs to use")] = get_gpus_count(),
     device: Annotated[str, typer.Option(help="Device to use")] = "cuda",
     workers: Annotated[int, typer.Option(help="Number of workers")] = 4,
-    amp_enabled: Annotated[bool, typer.Option(help="Enable automatic mixed precision")] = True,
+    amp_enabled: Annotated[bool, typer.Option(help="Enable automatic mixed precision")] = False,
     ddp_broadcast_buffers: Annotated[bool, typer.Option(help="Broadcast buffers in DDP")] = False,
     ddp_find_unused: Annotated[bool, typer.Option(help="Find unused parameters in DDP")] = True,
     checkpointer_period: Annotated[int, typer.Option(help="Checkpoint save period")] = 1000,
@@ -502,7 +502,7 @@ def val(
     num_gpus: Annotated[int, typer.Option(help="Number of GPUs")] = get_gpus_count(),
     device: Annotated[str, typer.Option(help="Device")] = "cuda",
     workers: Annotated[int, typer.Option(help="Number of workers")] = 4,
-    amp_enabled: Annotated[bool, typer.Option(help="Enable AMP")] = True,
+    amp_enabled: Annotated[bool, typer.Option(help="Enable AMP")] = False,
     ddp_broadcast_buffers: Annotated[bool, typer.Option(help="DDP broadcast buffers")] = False,
     ddp_find_unused: Annotated[bool, typer.Option(help="DDP find unused")] = True,
     checkpointer_period: Annotated[int, typer.Option(help="Checkpointer period")] = 1000,
diff --git a/focoos/models/focoos_model.py b/focoos/models/focoos_model.py
index ad4ade7d..efcc1a29 100644
--- a/focoos/models/focoos_model.py
+++ b/focoos/models/focoos_model.py
@@ -114,10 +114,17 @@ def __init__(self, model: BaseModelNN, model_info: ModelInfo):
         self.processor.eval()
         self.model = model.eval()
 
-        try:
-            self.model = self.model.cuda()
-        except Exception:
-            logger.warning("Unable to use CUDA")
+        if torch.cuda.is_available():
+            try:
+                self.model = self.model.cuda()
+            except Exception:
+                logger.warning("Unable to use CUDA")
+
+        if torch.backends.mps.is_available():
+            try:
+                self.model = self.model.to(device="mps")
+            except Exception:
+                logger.warning("Unable to use MPS")
 
         if self.model_info.weights_uri:
             self._load_weights()
@@ -392,7 +399,7 @@ def export(
         runtime_type: RuntimeType = RuntimeType.TORCHSCRIPT_32,
         onnx_opset: int = 18,
         out_dir: Optional[str] = None,
-        device: Literal["cuda", "cpu", "auto"] = "auto",
+        device: Literal["cuda", "cpu", "mps", "auto"] = "auto",
         simplify_onnx: bool = True,
         overwrite: bool = True,
         image_size: Optional[Union[int, Tuple[int, int]]] = None,
diff --git a/focoos/ports.py b/focoos/ports.py
index 34164d37..73902fc5 100644
--- a/focoos/ports.py
+++ b/focoos/ports.py
@@ -950,16 +950,21 @@ class DatasetSplitType(str, Enum):
 
 def get_gpus_count():
     try:
-        import torch.cuda
+        import torch
 
-        return torch.cuda.device_count()
+        if torch.backends.mps.is_available():
+            return 1
+        elif torch.cuda.is_available():
+            return torch.cuda.device_count()
+        else:
+            return 0
     except ImportError:
         return 0
 
 
 SchedulerType = Literal["POLY", "FIXED", "COSINE", "MULTISTEP"]
 OptimizerType = Literal["ADAMW", "SGD", "RMSPROP"]
-DeviceType = Literal["cuda", "cpu"]
+DeviceType = Literal["cuda", "cpu", "mps"]
 
 
 @dataclass

From 0c23ca05ec19b11227df50b8211a0867ec6f2f98 Mon Sep 17 00:00:00 2001
From: fcdl94 <f.cermelli94@gmail.com>
Date: Fri, 3 Oct 2025 17:32:33 +0200
Subject: [PATCH 4/7] fix(amp): enable automatic mixed precision by default and
 add CUDA availability check

- Changed default value of `amp_enabled` to True in training and validation functions.
- Added a check to ensure CUDA is available when using AMP, with a warning if not.
---
 focoos/cli/cli.py         | 4 ++--
 focoos/trainer/trainer.py | 8 +++++---
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/focoos/cli/cli.py b/focoos/cli/cli.py
index bb1bd543..0b75139e 100644
--- a/focoos/cli/cli.py
+++ b/focoos/cli/cli.py
@@ -259,7 +259,7 @@ def train(
     num_gpus: Annotated[int, typer.Option(help="Number of GPUs to use")] = get_gpus_count(),
     device: Annotated[str, typer.Option(help="Device to use")] = "cuda",
     workers: Annotated[int, typer.Option(help="Number of workers")] = 4,
-    amp_enabled: Annotated[bool, typer.Option(help="Enable automatic mixed precision")] = False,
+    amp_enabled: Annotated[bool, typer.Option(help="Enable automatic mixed precision")] = True,
     ddp_broadcast_buffers: Annotated[bool, typer.Option(help="Broadcast buffers in DDP")] = False,
     ddp_find_unused: Annotated[bool, typer.Option(help="Find unused parameters in DDP")] = True,
     checkpointer_period: Annotated[int, typer.Option(help="Checkpoint save period")] = 1000,
@@ -502,7 +502,7 @@ def val(
     num_gpus: Annotated[int, typer.Option(help="Number of GPUs")] = get_gpus_count(),
     device: Annotated[str, typer.Option(help="Device")] = "cuda",
     workers: Annotated[int, typer.Option(help="Number of workers")] = 4,
-    amp_enabled: Annotated[bool, typer.Option(help="Enable AMP")] = False,
+    amp_enabled: Annotated[bool, typer.Option(help="Enable AMP")] = True,
     ddp_broadcast_buffers: Annotated[bool, typer.Option(help="DDP broadcast buffers")] = False,
     ddp_find_unused: Annotated[bool, typer.Option(help="DDP find unused")] = True,
     checkpointer_period: Annotated[int, typer.Option(help="Checkpointer period")] = 1000,
diff --git a/focoos/trainer/trainer.py b/focoos/trainer/trainer.py
index 79c8e97f..3ce4ac5d 100644
--- a/focoos/trainer/trainer.py
+++ b/focoos/trainer/trainer.py
@@ -632,7 +632,7 @@ def __init__(
         self.zero_grad_before_forward = zero_grad_before_forward
 
         # AMP setup
-        if amp:
+        if amp and torch.cuda.is_available():
             if grad_scaler is None:
                 # the init_scale avoids the first step to be too large
                 # and the scheduler.step() warning
@@ -725,8 +725,10 @@ def run_step(self):
         if self.zero_grad_before_forward:
             self.optimizer.zero_grad()
 
-        if self.amp:
-            assert torch.cuda.is_available(), "[UnifiedTrainerLoop] CUDA is required for AMP training!"
+        if not torch.cuda.is_available():
+            logger.warning("[UnifiedTrainerLoop] CUDA is not available, training without AMP!")
+
+        if self.amp and torch.cuda.is_available():
             with autocast(enabled=self.amp, dtype=self.precision, device_type="cuda"):
                 # we need to have preprocess data here
                 images, targets = self.processor.preprocess(data, dtype=self.precision, device=self.model.device)

From b64f9999dafca21c6aa7f08e304c57f0c8679a65 Mon Sep 17 00:00:00 2001
From: fcdl94 <f.cermelli94@gmail.com>
Date: Fri, 3 Oct 2025 18:06:57 +0200
Subject: [PATCH 5/7] feat(device): improve device selection logic for training

- Enhanced device selection to prioritize CUDA, MPS, or fallback to CPU based on availability.
- Removed redundant CUDA availability check from the training loop.
---
 focoos/models/focoos_model.py | 11 ++++++++++-
 focoos/trainer/trainer.py     |  9 ++++++---
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/focoos/models/focoos_model.py b/focoos/models/focoos_model.py
index efcc1a29..09099cdb 100644
--- a/focoos/models/focoos_model.py
+++ b/focoos/models/focoos_model.py
@@ -160,8 +160,17 @@ def _setup_model_for_training(self, train_args: TrainerArgs, data_train: MapData
         """
         device = get_cpu_name()
         system_info = get_system_info()
-        if system_info.gpu_info and system_info.gpu_info.devices and len(system_info.gpu_info.devices) > 0:
+        if (
+            train_args.device == "cuda"
+            and system_info.gpu_info
+            and system_info.gpu_info.devices
+            and len(system_info.gpu_info.devices) > 0
+        ):
             device = system_info.gpu_info.devices[0].gpu_name
+        elif train_args.device == "mps" and torch.backends.mps.is_available():
+            device = "mps"
+        else:
+            device = "cpu"
         self.model_info.ref = None
 
         self.model_info.train_args = train_args  # type: ignore
diff --git a/focoos/trainer/trainer.py b/focoos/trainer/trainer.py
index 3ce4ac5d..d0bef2bf 100644
--- a/focoos/trainer/trainer.py
+++ b/focoos/trainer/trainer.py
@@ -77,6 +77,8 @@ def train(
         self.resume = args.resume
         self.finished = False
 
+        self.model.to(self.args.device)
+
         self.args.run_name = self.args.run_name.strip()
         # Setup logging and environment
         self.output_dir = os.path.join(self.args.output_dir, self.args.run_name)
@@ -631,6 +633,10 @@ def __init__(
         self.gather_metric_period = gather_metric_period
         self.zero_grad_before_forward = zero_grad_before_forward
 
+        if not torch.cuda.is_available():
+            logger.warning("[UnifiedTrainerLoop] CUDA is not available, training without AMP!")
+            amp = False
+
         # AMP setup
         if amp and torch.cuda.is_available():
             if grad_scaler is None:
@@ -725,9 +731,6 @@ def run_step(self):
         if self.zero_grad_before_forward:
             self.optimizer.zero_grad()
 
-        if not torch.cuda.is_available():
-            logger.warning("[UnifiedTrainerLoop] CUDA is not available, training without AMP!")
-
         if self.amp and torch.cuda.is_available():
             with autocast(enabled=self.amp, dtype=self.precision, device_type="cuda"):
                 # we need to have preprocess data here

From 6724fabd3468a93da7ef7c28f2dcc97427c17786 Mon Sep 17 00:00:00 2001
From: fcdl94 <f.cermelli94@gmail.com>
Date: Sat, 4 Oct 2025 09:26:37 +0200
Subject: [PATCH 6/7] feat(hub): add IMAGE_CLASSIFIER to supported model
 families

- Expanded the list of supported model families in focoos_hub.py to include IMAGE_CLASSIFIER for enhanced functionality.
---
 focoos/hub/focoos_hub.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/focoos/hub/focoos_hub.py b/focoos/hub/focoos_hub.py
index 5cbb3717..1d165c91 100644
--- a/focoos/hub/focoos_hub.py
+++ b/focoos/hub/focoos_hub.py
@@ -37,7 +37,12 @@
 logger = get_logger("HUB")
 
 
-SUPPORTED_MODEL_FAMILIES = [ModelFamily.BISENETFORMER, ModelFamily.DETR, ModelFamily.MASKFORMER]
+SUPPORTED_MODEL_FAMILIES = [
+    ModelFamily.BISENETFORMER,
+    ModelFamily.DETR,
+    ModelFamily.MASKFORMER,
+    ModelFamily.IMAGE_CLASSIFIER,
+]
 
 
 class FocoosHUB:

From a4412be7fb4daa0047c412b0f0de65ff7e723f19 Mon Sep 17 00:00:00 2001
From: Ivan Murabito <ivan.murabito@focoos.ai>
Date: Tue, 21 Oct 2025 08:41:07 +0000
Subject: [PATCH 7/7] feat(gpu): add MPS availability to GPUInfo and update
 focoos version

- Introduced a new field `mps_available` in the `GPUInfo` class to indicate MPS support.
- Updated the `get_gpu_info` function to populate the `mps_available` field based on the availability of MPS.
- Bumped focoos version to 0.22.0 in the lock file.
---
 focoos/ports.py        | 2 ++
 focoos/utils/system.py | 2 +-
 uv.lock                | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/focoos/ports.py b/focoos/ports.py
index 73902fc5..c319d9aa 100644
--- a/focoos/ports.py
+++ b/focoos/ports.py
@@ -676,6 +676,7 @@ class GPUInfo(PydanticBase):
     gpu_cuda_version: Optional[str] = None
     total_gpu_memory_gb: Optional[float] = None
     devices: Optional[list[GPUDevice]] = None
+    mps_available: Optional[bool] = None
 
 
 class SystemInfo(PydanticBase):
@@ -743,6 +744,7 @@ def pprint(self, level: Literal["INFO", "DEBUG"] = "DEBUG"):
                 output_lines.append(f"  - total_memory_gb: {value.get('total_gpu_memory_gb')} GB")
                 output_lines.append(f"  - gpu_driver: {value.get('gpu_driver')}")
                 output_lines.append(f"  - gpu_cuda_version: {value.get('gpu_cuda_version')}")
+                output_lines.append(f"  - mps_available: {value.get('mps_available')}")
                 if value.get("devices"):
                     output_lines.append("  - devices:")
                     for device in value.get("devices", []):
diff --git a/focoos/utils/system.py b/focoos/utils/system.py
index 70ab5af8..295046c1 100644
--- a/focoos/utils/system.py
+++ b/focoos/utils/system.py
@@ -66,7 +66,7 @@ def get_gpu_info() -> GPUInfo:
         GPUInfo: An object containing comprehensive GPU information including devices list,
                 driver version, CUDA version and GPU count.
     """
-    gpu_info = GPUInfo()
+    gpu_info = GPUInfo(mps_available=torch.backends.mps.is_available())
     gpus_device = []
     try:
         # Get all GPU information in a single query
diff --git a/uv.lock b/uv.lock
index 852ee1bc..a0581cb9 100644
--- a/uv.lock
+++ b/uv.lock
@@ -677,7 +677,7 @@ wheels = [
 
 [[package]]
 name = "focoos"
-version = "0.20.2"
+version = "0.22.0"
 source = { editable = "." }
 dependencies = [
     { name = "colorama" },