shakes76
diff --git a/‎.DS_Store‎
0 Bytes b/‎.DS_Store‎
0 Bytes
diff --git a/‎dataset.py‎
Lines changed: 96 additions & 105 deletions b/‎dataset.py‎
Lines changed: 96 additions & 105 deletions
diff --git a/‎modules.py‎
Lines changed: 17 additions & 6 deletions b/‎modules.py‎
Lines changed: 17 additions & 6 deletions
@@ -1,74 +1,123 @@
 import os
-import torch
 from torch.utils.data import Dataset
+import torchvision.transforms as transforms
 from PIL import Image
+import torch
 import numpy as np
 import cv2
-import torchvision.transforms as transforms
 
 class ISICDataset(Dataset):
-
-    def __init__(self, img_dir='ISIC2018/ISIC2018_Task1-2_Training_Input_x2', 
-                 annot_dir='ISIC2018/ISIC2018_Task1_Training_GroundTruth_x2', 
-                 mode='train', transform=None, img_size=640, grid_size=80):
+    def __init__(self, img_dir, annot_dir, mode='train', transform=None, img_size=640, model_output_grid_size=80):
         """
         Initializes the ISICDataset.
-
-        Parameters:
-            img_dir (str): Path to the directory containing images.
-            annot_dir (str): Path to the directory containing annotation files (optional for test mode).
-            mode (str): Mode of the dataset, either 'train' (with annotations) or 'test' (without annotations).
-            transform (callable, optional): Optional transformations to apply to the images.
         """
-    
+        print("Initializing ISICDataset...")
+
         self.img_dir = img_dir
-        self.img_size = img_size
         self.annot_dir = annot_dir if mode == 'train' else None
         self.mode = mode
-        self.transform = transform if transform else self.default_transforms()
-        self.img_files = sorted([f for f in os.listdir(img_dir) if f.endswith('.jpg')])
         self.img_size = img_size
-        self.grid_size = grid_size
+        self.transform = transform if transform else self.default_transforms()
         self.num_anchors = 3
-        
+        self.grid_size = model_output_grid_size  # This should be the same as the model's output grid size (80 in your case)
 
-    def default_transforms(self):
-        """
-        Defines default transformations for images if none are provided.
+        # Get list of image files
+        print("Loading image files...")
+        self.img_files = sorted([f for f in os.listdir(img_dir) if f.endswith('.jpg')])
 
-        Returns:
-            transform (callable): Transformation pipeline with resizing, normalization, and conversion to tensor.
-        """
+        if self.mode == 'train':
+            # Filtering only those images that have corresponding annotation files
+            print("Filtering images with corresponding annotations...")
+            annot_files = set([f.replace('_segmentation.png', '') for f in os.listdir(annot_dir) if f.endswith('.png')])
+            self.img_files = [f for f in self.img_files if f.replace('.jpg', '') in annot_files]
+
+        # Safeguard against empty dataset
+        if not self.img_files:
+            raise ValueError(f"No valid images found in {img_dir} with corresponding annotations in {annot_dir}")
+
+        print(f"Dataset initialized with {len(self.img_files)} images.")
+
+    def default_transforms(self):
+        print("Setting default image transformations...")
         return transforms.Compose([
-            transforms.Resize((self.img_size, self.img_size)),  # Resizing for consistent dimensions
+            transforms.Resize((self.img_size, self.img_size)),
             transforms.RandomHorizontalFlip(),
             transforms.RandomVerticalFlip(),
-            transforms.ToTensor(),  # Converting image to PyTorch tensor
+            transforms.ToTensor(),
             transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
-            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # Normalizing with ImageNet standard 
+            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
         ])
 
     def __len__(self):
-        """
-        Returns the total number of images in the dataset.
-
-        Returns:
-            int: Number of images.
-        """
         return len(self.img_files)
-    
-    def convert_to_yolo_format(self, bbox, img_width, img_height):
-        """
-        Converts bounding box coordinates from (x_min, y_min, x_max, y_max) to YOLO format.
 
-        Parameters:
-            bbox (list): Bounding box in [x_min, y_min, x_max, y_max] format.
-            img_width (int): Width of the image.
-            img_height (int): Height of the image.
+    def __getitem__(self, idx):
+        print(f"Getting item {idx}...")
+        img_path = os.path.join(self.img_dir, self.img_files[idx])
+        print(f"Loading image from: {img_path}")
 
-        Returns:
-            tuple: Bounding box in YOLO format (x_center, y_center, width, height), normalized to the image size.
-        """
+        try:
+            image = Image.open(img_path).convert("RGB")
+        except Exception as e:
+            print(f"Error loading image {img_path}: {e}")
+            # In case of error, create a dummy image to keep format consistent
+            image = torch.zeros((3, self.img_size, self.img_size))
+
+        if self.transform:
+            image = self.transform(image)
+
+        if self.mode == 'train':
+            annot_filename = self.img_files[idx].replace('.jpg', '_segmentation.png')
+            annot_path = os.path.join(self.annot_dir, annot_filename)
+            print(f"Loading annotation from: {annot_path}")
+
+            if not os.path.exists(annot_path):
+                print("Annotation file not found, creating dummy target.")
+                return image, torch.zeros((self.num_anchors, self.grid_size, self.grid_size, 85))
+
+            try:
+                mask = Image.open(annot_path).convert("L")
+            except Exception as e:
+                print(f"Error loading annotation {annot_path}: {e}")
+                return image, torch.zeros((self.num_anchors, self.grid_size, self.grid_size, 85))
+
+            mask = mask.resize((self.img_size, self.img_size))
+            print("Annotation loaded and resized.")
+
+            # Convert mask to numpy array and extract bounding boxes
+            boxes = self.mask_to_bounding_boxes(mask)
+
+            # Create a target tensor of size (num_anchors, grid_size, grid_size, 85) and populate it
+            target_tensor = torch.zeros((self.num_anchors, self.grid_size, self.grid_size, 85))
+
+            # Iterate over the bounding boxes and assign them to the appropriate grid cells and anchors
+            img_width, img_height = mask.size
+            for box in boxes:
+                x_min, y_min, x_max, y_max = box
+                # Calculate grid cell positions
+                grid_x = int((x_min + x_max) / 2 / img_width * self.grid_size)
+                grid_y = int((y_min + y_max) / 2 / img_height * self.grid_size)
+
+                # Ensure the grid coordinates are within bounds
+                grid_x = min(max(grid_x, 0), self.grid_size - 1)
+                grid_y = min(max(grid_y, 0), self.grid_size - 1)
+
+                # Convert box to YOLO format
+                x_center, y_center, width, height = self.convert_to_yolo_format(box, img_width, img_height)
+
+                # Assign to target tensor - in this case, using the first anchor (anchor 0)
+                target_tensor[0, grid_y, grid_x, 0:4] = torch.tensor([x_center, y_center, width, height])
+                target_tensor[0, grid_y, grid_x, 4] = 1.0  # Objectness score
+                # Set the class label - assuming one class for skin lesions
+                target_tensor[0, grid_y, grid_x, 5:] = torch.zeros(80)
+
+            return image, target_tensor
+        else:
+            # Return a dummy target for validation/test to ensure consistent return format
+            dummy_target = torch.zeros((self.num_anchors, self.grid_size, self.grid_size, 85))
+            return image, dummy_target
+
+    def convert_to_yolo_format(self, bbox, img_width, img_height):
         x_min, y_min, x_max, y_max = bbox
         x_center = (x_min + x_max) / 2.0 / img_width
         y_center = (y_min + y_max) / 2.0 / img_height
@@ -77,71 +126,13 @@ def convert_to_yolo_format(self, bbox, img_width, img_height):
         return x_center, y_center, width, height
 
     def mask_to_bounding_boxes(self, mask):
-        """
-        Extracts bounding boxes from a binary mask image.
-
-        Parameters:
-            mask (PIL.Image): Grayscale mask image.
-
-        Returns:
-            list: List of bounding boxes in [x_min, y_min, x_max, y_max] format.
-        """
         mask_np = np.array(mask)
         boxes = []
 
         contours, _ = cv2.findContours(mask_np, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
         for contour in contours:
             x, y, w, h = cv2.boundingRect(contour)
-            boxes.append([x, y, x + w, y + h])
-            
-        return boxes
+            if w > 0 and h > 0:  # Ensure valid bounding box
+                boxes.append([x, y, x + w, y + h])
 
-    def __getitem__(self, idx):
-        """
-        Retrieves the image (and annotation if in train mode) for a given index.
-
-        Parameters:
-            idx (int): Index of the image to retrieve.
-
-        Returns:
-            tuple: If in train mode, returns (image, yolo_boxes), else returns image.
-        """
-         # Load image
-        img_path = os.path.join(self.img_dir, self.img_files[idx])
-        image = Image.open(img_path).convert("RGB")
-
-        if self.transform:
-            image = self.transform(image)  # Resize and normalize the image
-
-        # Load and process annotations if in training mode
-        if self.mode == 'train':
-            # Load the segmentation mask as an image
-            annot_path = os.path.join(self.annot_dir, self.img_files[idx].replace('.jpg', '_segmentation.png'))
-            
-            if not os.path.exists(annot_path):
-                raise FileNotFoundError(f"Annotation file not found: {annot_path}")
-            
-            # Open the mask file as a grayscale image
-            mask = Image.open(annot_path).convert("L")  # Convert to grayscale
-            mask = mask.resize((self.img_size, self.img_size)) # Resize to match image dimensions
-            boxes = self.mask_to_bounding_boxes(mask)
-            img_width, img_height = mask.size
-
-            # Convert bounding boxes to YOLO format
-            yolo_boxes = [self.convert_to_yolo_format(box, img_width, img_height) for box in boxes]
-            
-            # Prepare YOLO-style target tensor with 85 channels (for single class dataset)
-            target = torch.zeros((self.num_anchors, self.grid_size, self.grid_size, 85))  # 85 for class probabilities
-
-            for x_center, y_center, width, height in yolo_boxes:
-                grid_x = int(x_center * self.grid_size)
-                grid_y = int(y_center * self.grid_size)
-                target[:, grid_y, grid_x, 0] = x_center
-                target[:, grid_y, grid_x, 1] = y_center
-                target[:, grid_y, grid_x, 2] = width
-                target[:, grid_y, grid_x, 3] = height
-                target[:, grid_y, grid_x, 4] = 1.0  # Object confidence score
-
-            return image, target  # Returning the image and YOLO-style target as a tuple
-        else:
-            return image
+        return boxes
@@ -1,16 +1,19 @@
 import torch
+import torch.nn as nn
 
-class LesionDetectionModel:
+class LesionDetectionModel(nn.Module):
     def __init__(self, model_weights='yolov7.pt', device='cpu'):
         """
-        Initializes the YOLOv7 model for lesion detection using PyTorch Hub.
+        Initializes the YOLOv7 model for lesion detection using PyTorch Hub with additional dropout layers.
 
         Parameters:
             model_weights (str): Path to the pre-trained YOLOv7 weights.
             device (str): Device to load the model on ('cuda' or 'cpu').
         """
+        super(LesionDetectionModel, self).__init__()
+
         self.device = torch.device('cuda' if device == 'cuda' and torch.cuda.is_available() else 'cpu')
-        
+
         # Load the YOLO model without the autoShape wrapper to get direct access to its layers
         self.model = torch.hub.load('WongKinYiu/yolov7', 'custom', model_weights, source='github', autoshape=False)
         self.model.to(self.device)
@@ -20,6 +23,9 @@ def __init__(self, model_weights='yolov7.pt', device='cpu'):
             for param in self.model.backbone.parameters():
                 param.requires_grad = False
 
+        # Add dropout after certain layers
+        self.dropout = nn.Dropout(p=0.2)  # Example of a dropout layer with 50% probability
+
     def forward(self, images):
         """
         Performs a forward pass through the model.
@@ -31,9 +37,14 @@ def forward(self, images):
             torch.Tensor: Model output with predictions for each bounding box.
         """
         images = images.to(self.device)
-        with torch.no_grad():
-            pred = self.model(images)[0]  # Get predictions
-        return pred
+
+        # Perform a forward pass through the original model
+        x = self.model(images)[0]
+
+        # Apply dropout before returning output
+        x = self.dropout(x)
+
+        return x
 
     def detect(self, images, conf_thres=0.25, iou_thres=0.8):
         """