SARATRX: load HiViT pretrained weights

lucianchauvin · lucianchauvin · commit 036b217f7daf · 2025-07-21T17:22:44.000-05:00
diff --git a/.gitmodules b/.gitmodules
@@ -4,9 +4,6 @@
 [submodule "SARIAD/models/image/MFSA/SARDet_100K"]
 	path = SARIAD/models/image/MFSA/SARDet_100K
 	url = https://github.com/zcablii/SARDet_100K/
-[submodule "SARIAD/models/image/SARATR-X/SARATR-X"]
-	path = SARIAD/models/image/SARATR-X/SARATRX
-	url = https://github.com/waterdisappear/SARATR-X
 [submodule "SARIAD/models/image/SARATR-X/SARATRX"]
-	path = SARIAD/models/image/SARATR-X/SARATRX
-	url = git@github.com:waterdisappear/SARATR-X.git
+	path = SARIAD/models/image/SARATRX/SARATRX
+	url = git@github.com:lucianchauvin/SARATR-X.git
diff --git a/SARIAD/models/__init__.py b/SARIAD/models/__init__.py
@@ -0,0 +1 @@
+from .image.SARATRX import *
diff --git a/SARIAD/models/image/SARATR-X/SARATRX b/SARIAD/models/image/SARATR-X/SARATRX
diff --git a/SARIAD/models/image/SARATRX/SARATRX b/SARIAD/models/image/SARATRX/SARATRX
@@ -0,0 +1 @@
+Subproject commit 1055154826400497d9d999c6b00b853ca6b36416
diff --git a/SARIAD/models/image/SARATRX/__init__.py b/SARIAD/models/image/SARATRX/__init__.py
@@ -0,0 +1 @@
+from .lightning_model import *
diff --git a/SARIAD/models/image/SARATRX/lightning_model.py b/SARIAD/models/image/SARATRX/lightning_model.py
@@ -0,0 +1,67 @@
+import logging
+import torch
+from lightning.pytorch.utilities.types import STEP_OUTPUT
+from anomalib.data import Batch
+from anomalib.models.components import AnomalibModule
+from SARIAD.models.image.SARATRX.SARATRX.pretraining.models.models_hivit_mae import HiViTMaskedAutoencoder
+from SARIAD.models.image.SARATRX.SARATRX.pretraining.models.models_hivit import HiViT
+from SARIAD.models.image.SARATRX.SARATRX.pretraining.util.pos_embed import interpolate_pos_embed
+from SARIAD.utils.blob_utils import fetch_blob
+from anomalib.post_processing import PostProcessor
+
+logger = logging.getLogger(__name__)
+
+class SARATRX(AnomalibModule):
+    def __init__(self, pre_processor=True, post_processor=True, num_classes=2):
+        super().__init__(pre_processor, post_processor)
+        self.trainer_arguments = {
+            "max_epochs": 100,
+            "accelerator": "gpu",
+            "devices": 1,
+            "check_val_every_n_epoch": 1,
+            "callbacks": [],
+            "logger": True,
+        }
+
+        self.model = HiViT(num_classes=num_classes)
+        self.outputs = []
+
+        fetch_blob("mae_hivit_base_1600ep.pth", drive_file_id="1VZQz4buhlepZ5akTcEvrA3a_nxsQZ8eQ", is_archive=False);
+        checkpoint = torch.load("mae_hivit_base_1600ep.pth", map_location='cpu')
+        # print(checkpoint)
+
+        checkpoint_model = checkpoint
+        state_dict = self.model.state_dict()
+        print(len(state_dict.keys()))
+        for k in ['head.weight', 'head.bias']:
+            if k in checkpoint_model and checkpoint_model[k].shape != state_dict[k].shape:
+                print(f"Removing key {k} from pretrained checkpoint")
+                del checkpoint_model[k]
+
+        interpolate_pos_embed(self.model, checkpoint_model)
+
+        msg = self.model.load_state_dict(checkpoint_model, strict=False)
+        print(msg)
+        print(self.model)
+
+    def configure_post_processor(self):
+        return PostProcessor()
+
+    def training_step(self, batch: Batch, *args, **kwargs) -> None:
+        output = self.model(batch.image)
+        self.outputs.append(output)
+
+    def configure_optimizers(self):
+        pass
+
+    def validation_step(self, batch: Batch, *args, **kwargs) -> STEP_OUTPUT:
+        pass
+
+    def learning_type(self):
+        pass
+
+    def trainer_arguments(self):
+        pass
+
+if __name__ == "__main__":
+    SARATRX()
diff --git a/SARIAD/models/image/YOLO/__init__.py b/SARIAD/models/image/YOLO/__init__.py
diff --git a/SARIAD/models/image/YOLO/lightning_model.py b/SARIAD/models/image/YOLO/lightning_model.py
@@ -0,0 +1,26 @@
+import logging
+import torch
+from lightning.pytorch.utilities.types import STEP_OUTPUT
+from anomalib import LearningType
+from anomalib.data import Batch
+from anomalib.metrics import Evaluator
+from anomalib.models.components import AnomalibModule
+from anomalib.post_processing import PostProcessor
+from anomalib.visualization import Visualizer
+
+from .torch_model import YOLOAnomalyModel
+
+logger = logging.getLogger(__name__)
+
+class YOLOAnomaly(AnomalibModule):
+    def __init__(self, backbone = "yolov8n.pt", pre_processor = True, post_processor = True):
+        super().__init__(pre_processor=pre_processor, post_processor=post_processor)
+
+        self.model = YOLOAnomalyModel(model_path=backbone)
+
+    def training_step(self, batch: Batch, *args, **kwargs) -> None:
+        pass
+    def fit(self) -> None:
+        pass
+    def validation_step(self, batch: Batch, *args, **kwargs) -> STEP_OUTPUT:
+        pass
diff --git a/SARIAD/models/image/YOLO/torch_model.py b/SARIAD/models/image/YOLO/torch_model.py
@@ -0,0 +1,13 @@
+from ultralytics import YOLO
+import torch
+from torch import nn
+from torch.nn import functional as F
+from anomalib.data import InferenceBatch
+from anomalib.models.components import MultiVariateGaussian 
+
+class YOLOAnomalyModel(nn.Module):
+    def __init__(self, model_path = "yolov8n.pt"):
+        super().__init__()
+        self.yolo = YOLO(model_path)
+
+    def forward(self, input_tensor):
diff --git a/SARIAD/utils/blob_utils.py b/SARIAD/utils/blob_utils.py
@@ -3,66 +3,87 @@
 
 from SARIAD.config import DATASETS_PATH
 
-def fetch_blob(path, link="", drive_file_id="", kaggle="", ext="zip"):
+def fetch_blob(path, link="", drive_file_id="", kaggle="", is_archive=True, ext="zip"):
     """
     Fetches the dataset blob from a direct link, Google Drive, or Kaggle,
     and extracts it directly to the specified path.
 
     Parameters:
-    - path: str, The full path to the directory where the extracted blob should reside.
-    - link: str, optional, direct HTTP(s) link to an archive.
-    - drive_file_id: str, optional, ID for Google Drive file (archive).
+    - path: str, The full path to the directory where the extracted blob should reside (if is_archive=True)
+            or the full path to the file itself (if is_archive=False).
+    - link: str, optional, direct HTTP(s) link to an archive or single file.
+    - drive_file_id: str, optional, ID for Google Drive file (archive or single file).
     - kaggle: str, optional, KaggleHub dataset slug.
-    - ext: str, archive type (zip, tar.gz, rar, tar), used for link and drive_file_id.
-           This parameter is ignored if 'kaggle' is provided.
+    - is_archive: bool, set to True if the fetched item is an archive that needs extraction.
+                  Set to False for single files like .pth.
+    - ext: str, archive type (zip, tar.gz, rar, tar) or file extension (e.g., "pth"),
+           used for link and drive_file_id. This parameter is ignored if 'kaggle' is provided.
     """
-    if os.path.exists(path) and os.path.isdir(path) and len(os.listdir(path)) > 0:
-        print(f"Dataset found locally at: {path}")
-        return
+    if is_archive:
+        if os.path.exists(path) and os.path.isdir(path) and len(os.listdir(path)) > 0:
+            print(f"Dataset found locally at: {path}")
+            return
+    else:
+        if os.path.exists(path) and os.path.isfile(path):
+            print(f"File found locally at: {path}")
+            return
 
     print(f"Dataset not found locally at {path}. Downloading...")
-    os.makedirs(path, exist_ok=True)
+    if is_archive:
+        os.makedirs(path, exist_ok=True)
+    else:
+        os.makedirs(os.path.dirname(path) or '.', exist_ok=True)
 
     if link:
-        temp_archive_name = f"{os.path.basename(path)}_archive.{ext}"
-        temp_archive_path = os.path.join(os.path.dirname(path) or '.', temp_archive_name)
+        if is_archive:
+            temp_target_path = os.path.join(os.path.dirname(path) or '.', f"{os.path.basename(path)}_archive.{ext}")
+        else:
+            temp_target_path = path
         
         response = requests.get(link, stream=True)
         if response.status_code != 200:
             raise RuntimeError(f"Failed to download file from {link}: HTTP {response.status_code}")
         
         total_size_in_bytes = int(response.headers.get('content-length', 0))
         block_size = 8192
-        progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True, desc=f"Downloading {os.path.basename(temp_archive_path)}")
+        progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True, desc=f"Downloading {os.path.basename(temp_target_path)}")
         
-        with open(temp_archive_path, 'wb') as f:
+        with open(temp_target_path, 'wb') as f:
             for chunk in response.iter_content(chunk_size=block_size):
                 progress_bar.update(len(chunk))
                 f.write(chunk)
         progress_bar.close()
 
-        print(f"Extracting the {ext} archive...")
-        _extract_archive(temp_archive_path, path, ext)
-        os.remove(temp_archive_path)
-        print(f"Downloaded and extracted to {path}.")
+        if is_archive:
+            print(f"Extracting the {ext} archive...")
+            _extract_archive(temp_target_path, path, ext)
+            os.remove(temp_target_path)
+            print(f"Downloaded and extracted to {path}.")
+        else:
+            print(f"Downloaded file to {path}.")
 
     elif drive_file_id:
-        temp_archive_name = f"{os.path.basename(path)}_archive.{ext}"
-        temp_archive_path = os.path.join(os.path.dirname(path) or '.', temp_archive_name)
+        if is_archive:
+            temp_target_path = os.path.join(os.path.dirname(path) or '.', f"{os.path.basename(path)}_archive.{ext}")
+        else:
+            temp_target_path = path # For single files, download directly to the final path
         
         print(f"Downloading from Google Drive ID: {drive_file_id}")
-        gdown.download(f"https://drive.google.com/uc?id={drive_file_id}", temp_archive_path, quiet=False)
+        gdown.download(f"https://drive.google.com/uc?id={drive_file_id}", temp_target_path, quiet=False)
         
-        print(f"Extracting the {ext} archive...")
-        _extract_archive(temp_archive_path, path, ext)
-        os.remove(temp_archive_path)
-        print(f"Downloaded and extracted to {path}.")
+        if is_archive:
+            print(f"Extracting the {ext} archive...")
+            _extract_archive(temp_target_path, path, ext)
+            os.remove(temp_target_path)
+            print(f"Downloaded and extracted to {path}.")
+        else:
+            print(f"Downloaded file to {path}.")
 
     elif kaggle:
         downloaded_kaggle_path = kagglehub.dataset_download(kaggle)
         print(f"KaggleHub {kaggle} dataset downloaded to: {downloaded_kaggle_path}")
         
-        os.makedirs(path, exist_ok=True)
+        os.makedirs(path, exist_ok=True) # Always treat kaggle as an archive/dataset for now
 
         for item in os.listdir(downloaded_kaggle_path):
             s = os.path.join(downloaded_kaggle_path, item)
@@ -132,25 +153,29 @@ def _extract_archive(archive_path, extract_to, ext):
             shutil.move(os.path.join(temp_extract_dir, item), extract_to)
         shutil.rmtree(temp_extract_dir)
 
-def fetch_dataset(dataset_name, datasets_dir=DATASETS_PATH, link="", drive_file_id="", kaggle="", ext="zip"):
+def fetch_dataset(dataset_name, datasets_dir=DATASETS_PATH, link="", drive_file_id="", kaggle="", is_archive=True, ext="zip"):
     """
     Fetches a dataset blob from a direct link, Google Drive, or Kaggle,
     maintaining backward compatibility with the original fetch_blob signature.
 
     Parameters:
-    - dataset_name: str, The name of the dataset. This will be the directory name inside datasets_dir.
+    - dataset_name: str, The name of the dataset. This will be the directory name inside datasets_dir
+                    for archives, or the file name if is_archive is False.
     - datasets_dir: str, The root directory where datasets are stored.
-    - link: str, optional, direct HTTP(s) link to an archive.
-    - drive_file_id: str, optional, ID for Google Drive file (archive).
+    - link: str, optional, direct HTTP(s) link to an archive or file.
+    - drive_file_id: str, optional, ID for Google Drive file (archive or file).
     - kaggle: str, optional, KaggleHub dataset slug.
-    - ext: str, archive type (zip, tar.gz, rar, tar), used for link and drive_file_id.
-           This parameter is ignored if 'kaggle' is provided.
+    - ext: str, archive type (zip, tar.gz, rar, tar) or file extension (e.g., "pth"),
+           used for link and drive_file_id. This parameter is ignored if 'kaggle' is provided.
+    - is_archive: bool, set to True if the fetched item is an archive that needs extraction.
+                  Set to False for single files like .pth.
     """
     full_dataset_path = os.path.join(datasets_dir, dataset_name)
     fetch_blob(
         path=full_dataset_path,
         link=link,
         drive_file_id=drive_file_id,
         kaggle=kaggle,
-        ext=ext
+        ext=ext,
+        is_archive=is_archive
     )