KamaTechOrg · Leah2458git · Aug 7, 2024
diff --git a/1.jpg b/1.jpg
diff --git a/aspp.py b/aspp.py
@@ -0,0 +1,99 @@
+# camera-ready
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+class ASPP(nn.Module):
+    def __init__(self, num_classes):
+        super(ASPP, self).__init__()
+
+        self.conv_1x1_1 = nn.Conv2d(512, 256, kernel_size=1)
+        self.bn_conv_1x1_1 = nn.BatchNorm2d(256)
+
+        self.conv_3x3_1 = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=6, dilation=6)
+        self.bn_conv_3x3_1 = nn.BatchNorm2d(256)
+
+        self.conv_3x3_2 = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=12, dilation=12)
+        self.bn_conv_3x3_2 = nn.BatchNorm2d(256)
+
+        self.conv_3x3_3 = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=18, dilation=18)
+        self.bn_conv_3x3_3 = nn.BatchNorm2d(256)
+
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+
+        self.conv_1x1_2 = nn.Conv2d(512, 256, kernel_size=1)
+        self.bn_conv_1x1_2 = nn.BatchNorm2d(256)
+
+        self.conv_1x1_3 = nn.Conv2d(1280, 256, kernel_size=1) # (1280 = 5*256)
+        self.bn_conv_1x1_3 = nn.BatchNorm2d(256)
+
+        self.conv_1x1_4 = nn.Conv2d(256, num_classes, kernel_size=1)
+
+    def forward(self, feature_map):
+        # (feature_map has shape (batch_size, 512, h/16, w/16)) (assuming self.resnet is ResNet18_OS16 or ResNet34_OS16. If self.resnet instead is ResNet18_OS8 or ResNet34_OS8, it will be (batch_size, 512, h/8, w/8))
+
+        feature_map_h = feature_map.size()[2] # (== h/16)
+        feature_map_w = feature_map.size()[3] # (== w/16)
+
+        out_1x1 = F.relu(self.bn_conv_1x1_1(self.conv_1x1_1(feature_map))) # (shape: (batch_size, 256, h/16, w/16))
+        out_3x3_1 = F.relu(self.bn_conv_3x3_1(self.conv_3x3_1(feature_map))) # (shape: (batch_size, 256, h/16, w/16))
+        out_3x3_2 = F.relu(self.bn_conv_3x3_2(self.conv_3x3_2(feature_map))) # (shape: (batch_size, 256, h/16, w/16))
+        out_3x3_3 = F.relu(self.bn_conv_3x3_3(self.conv_3x3_3(feature_map))) # (shape: (batch_size, 256, h/16, w/16))
+
+        out_img = self.avg_pool(feature_map) # (shape: (batch_size, 512, 1, 1))
+        out_img = F.relu(self.bn_conv_1x1_2(self.conv_1x1_2(out_img))) # (shape: (batch_size, 256, 1, 1))
+        out_img = F.upsample(out_img, size=(feature_map_h, feature_map_w), mode="bilinear") # (shape: (batch_size, 256, h/16, w/16))
+
+        out = torch.cat([out_1x1, out_3x3_1, out_3x3_2, out_3x3_3, out_img], 1) # (shape: (batch_size, 1280, h/16, w/16))
+        out = F.relu(self.bn_conv_1x1_3(self.conv_1x1_3(out))) # (shape: (batch_size, 256, h/16, w/16))
+        out = self.conv_1x1_4(out) # (shape: (batch_size, num_classes, h/16, w/16))
+
+        return out
+
+class ASPP_Bottleneck(nn.Module):
+    def __init__(self, num_classes):
+        super(ASPP_Bottleneck, self).__init__()
+
+        self.conv_1x1_1 = nn.Conv2d(4*512, 256, kernel_size=1)
+        self.bn_conv_1x1_1 = nn.BatchNorm2d(256)
+
+        self.conv_3x3_1 = nn.Conv2d(4*512, 256, kernel_size=3, stride=1, padding=6, dilation=6)
+        self.bn_conv_3x3_1 = nn.BatchNorm2d(256)
+
+        self.conv_3x3_2 = nn.Conv2d(4*512, 256, kernel_size=3, stride=1, padding=12, dilation=12)
+        self.bn_conv_3x3_2 = nn.BatchNorm2d(256)
+
+        self.conv_3x3_3 = nn.Conv2d(4*512, 256, kernel_size=3, stride=1, padding=18, dilation=18)
+        self.bn_conv_3x3_3 = nn.BatchNorm2d(256)
+
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+
+        self.conv_1x1_2 = nn.Conv2d(4*512, 256, kernel_size=1)
+        self.bn_conv_1x1_2 = nn.BatchNorm2d(256)
+
+        self.conv_1x1_3 = nn.Conv2d(1280, 256, kernel_size=1) # (1280 = 5*256)
+        self.bn_conv_1x1_3 = nn.BatchNorm2d(256)
+
+        self.conv_1x1_4 = nn.Conv2d(256, num_classes, kernel_size=1)
+
+    def forward(self, feature_map):
+        # (feature_map has shape (batch_size, 4*512, h/16, w/16))
+
+        feature_map_h = feature_map.size()[2] # (== h/16)
+        feature_map_w = feature_map.size()[3] # (== w/16)
+
+        out_1x1 = F.relu(self.bn_conv_1x1_1(self.conv_1x1_1(feature_map))) # (shape: (batch_size, 256, h/16, w/16))
+        out_3x3_1 = F.relu(self.bn_conv_3x3_1(self.conv_3x3_1(feature_map))) # (shape: (batch_size, 256, h/16, w/16))
+        out_3x3_2 = F.relu(self.bn_conv_3x3_2(self.conv_3x3_2(feature_map))) # (shape: (batch_size, 256, h/16, w/16))
+        out_3x3_3 = F.relu(self.bn_conv_3x3_3(self.conv_3x3_3(feature_map))) # (shape: (batch_size, 256, h/16, w/16))
+
+        out_img = self.avg_pool(feature_map) # (shape: (batch_size, 512, 1, 1))
+        out_img = F.relu(self.bn_conv_1x1_2(self.conv_1x1_2(out_img))) # (shape: (batch_size, 256, 1, 1))
+        out_img = F.upsample(out_img, size=(feature_map_h, feature_map_w), mode="bilinear") # (shape: (batch_size, 256, h/16, w/16))
+
+        out = torch.cat([out_1x1, out_3x3_1, out_3x3_2, out_3x3_3, out_img], 1) # (shape: (batch_size, 1280, h/16, w/16))
+        out = F.relu(self.bn_conv_1x1_3(self.conv_1x1_3(out))) # (shape: (batch_size, 256, h/16, w/16))
+        out = self.conv_1x1_4(out) # (shape: (batch_size, num_classes, h/16, w/16))
+
+        return out
diff --git a/deeplabv3.py b/deeplabv3.py
@@ -0,0 +1,47 @@
+# camera-ready
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+import os
+
+from resnet import ResNet18_OS16, ResNet34_OS16, ResNet50_OS16, ResNet101_OS16, ResNet152_OS16, ResNet18_OS8, ResNet34_OS8
+from aspp import ASPP, ASPP_Bottleneck
+
+class DeepLabV3(nn.Module):
+    def __init__(self, model_id, project_dir):
+        super(DeepLabV3, self).__init__()
+
+        self.num_classes = 20
+
+        self.model_id = model_id
+        self.project_dir = project_dir
+        self.create_model_dirs()
+
+        self.resnet = ResNet18_OS8() # NOTE! specify the type of ResNet here
+        self.aspp = ASPP(num_classes=self.num_classes) # NOTE! if you use ResNet50-152, set self.aspp = ASPP_Bottleneck(num_classes=self.num_classes) instead
+
+    def forward(self, x):
+        # (x has shape (batch_size, 3, h, w))
+
+        h = x.size()[2]
+        w = x.size()[3]
+
+        feature_map = self.resnet(x) # (shape: (batch_size, 512, h/16, w/16)) (assuming self.resnet is ResNet18_OS16 or ResNet34_OS16. If self.resnet is ResNet18_OS8 or ResNet34_OS8, it will be (batch_size, 512, h/8, w/8). If self.resnet is ResNet50-152, it will be (batch_size, 4*512, h/16, w/16))
+
+        output = self.aspp(feature_map) # (shape: (batch_size, num_classes, h/16, w/16))
+
+        output = F.upsample(output, size=(h, w), mode="bilinear") # (shape: (batch_size, num_classes, h, w))
+
+        return output
+
+    def create_model_dirs(self):
+        self.logs_dir = self.project_dir + "/training_logs"
+        self.model_dir = self.logs_dir + "/model_%s" % self.model_id
+        self.checkpoints_dir = self.model_dir + "/checkpoints"
+        if not os.path.exists(self.logs_dir):
+            os.makedirs(self.logs_dir)
+        if not os.path.exists(self.model_dir):
+            os.makedirs(self.model_dir)
+            os.makedirs(self.checkpoints_dir)
diff --git a/eval_on_val.py b/eval_on_val.py
@@ -0,0 +1,133 @@
+# # camera-ready
+
+# import sys
+
+# sys.path.append("/root/deeplabv3")
+
+# from datasets import DatasetVal # (this needs to be imported before torch, because cv2 needs to be imported before torch for some reason)
+
+
+# sys.path.append("/root/deeplabv3/model")
+# from deeplabv3 import DeepLabV3
+
+# sys.path.append("/root/deeplabv3/utils")
+# from utils import label_img_to_color
+
+# import torch
+# import torch.utils.data
+# import torch.nn as nn
+# from torch.autograd import Variable
+# import torch.optim as optim
+# import torch.nn.functional as F
+
+# import numpy as np
+# import pickle
+# import matplotlib
+# matplotlib.use("Agg")
+# import matplotlib.pyplot as plt
+# import cv2
+
+# batch_size = 2
+
+# network = DeepLabV3("eval_val", project_dir="/root/deeplabv3").cuda()
+# network.load_state_dict(torch.load("/root/deeplabv3/pretrained_models/model_13_2_2_2_epoch_580.pth"))
+
+# val_dataset = DatasetVal(cityscapes_data_path="/root/deeplabv3/data/cityscapes",
+#                          cityscapes_meta_path="/root/deeplabv3/data/cityscapes/meta")
+
+# num_val_batches = int(len(val_dataset)/batch_size)
+# print ("num_val_batches:", num_val_batches)
+
+# val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
+#                                          batch_size=batch_size, shuffle=False,
+#                                          num_workers=1)
+
+# with open("/root/deeplabv3/data/cityscapes/meta/class_weights.pkl", "rb") as file: # (needed for python3)
+#     class_weights = np.array(pickle.load(file))
+# class_weights = torch.from_numpy(class_weights)
+# class_weights = Variable(class_weights.type(torch.FloatTensor)).cuda()
+
+# # loss function
+# loss_fn = nn.CrossEntropyLoss(weight=class_weights)
+
+# network.eval() # (set in evaluation mode, this affects BatchNorm and dropout)
+# batch_losses = []
+# for step, (imgs, label_imgs, img_ids) in enumerate(val_loader):
+#     with torch.no_grad(): # (corresponds to setting volatile=True in all variables, this is done during inference to reduce memory consumption)
+#         imgs = Variable(imgs).cuda() # (shape: (batch_size, 3, img_h, img_w))
+#         label_imgs = Variable(label_imgs.type(torch.LongTensor)).cuda() # (shape: (batch_size, img_h, img_w))
+
+#         outputs = network(imgs) # (shape: (batch_size, num_classes, img_h, img_w))
+
+#         # compute the loss:
+#         loss = loss_fn(outputs, label_imgs)
+#         loss_value = loss.data.cpu().numpy()
+#         batch_losses.append(loss_value)
+
+#         ########################################################################
+#         # save data for visualization:
+#         ########################################################################
+#         outputs = outputs.data.cpu().numpy() # (shape: (batch_size, num_classes, img_h, img_w))
+#         pred_label_imgs = np.argmax(outputs, axis=1) # (shape: (batch_size, img_h, img_w))
+#         pred_label_imgs = pred_label_imgs.astype(np.uint8)
+
+#         for i in range(pred_label_imgs.shape[0]):
+#             if i == 0:
+#                 pred_label_img = pred_label_imgs[i] # (shape: (img_h, img_w))
+#                 img_id = img_ids[i]
+#                 img = imgs[i] # (shape: (3, img_h, img_w))
+
+#                 img = img.data.cpu().numpy()
+#                 img = np.transpose(img, (1, 2, 0)) # (shape: (img_h, img_w, 3))
+#                 img = img*np.array([0.229, 0.224, 0.225])
+#                 img = img + np.array([0.485, 0.456, 0.406])
+#                 img = img*255.0
+#                 img = img.astype(np.uint8)
+
+#                 pred_label_img_color = label_img_to_color(pred_label_img)
+#                 overlayed_img = 0.35*img + 0.65*pred_label_img_color
+#                 overlayed_img = overlayed_img.astype(np.uint8)
+
+#                 cv2.imwrite(network.model_dir + "/" + img_id + "_overlayed.png", overlayed_img)
+
+# val_loss = np.mean(batch_losses)
+# print ("val loss: %g" % val_loss)
+import os
+import sys
+import torch
+import numpy as np
+import cv2
+from deeplabv3 import DeepLabV3
+from utils import label_img_to_color
+
+image_path = sys.argv[1]  # Get the image path from the command line argument
+
+network = DeepLabV3("eval_val", project_dir="/root/deeplabv3").cuda()
+network.load_state_dict(torch.load("/notebooks/deeplabv3/pretrained_models/model_13_2_2_2_epoch_580.pth"))
+network.eval()
+
+img = cv2.imread(image_path, cv2.IMREAD_COLOR)
+img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert image to RGB format
+img = img.astype(np.float32) / 255.0
+img = (img - np.array([0.485, 0.456, 0.406])) / np.array([0.229, 0.224, 0.225])
+img = np.transpose(img, (2, 0, 1))  # Reshape image to (3, H, W)
+img = np.expand_dims(img, axis=0)  # Add batch dimension
+
+with torch.no_grad():
+    img_var = torch.from_numpy(img).float().cuda()
+    outputs = network(img_var)  # Perform inference on the image
+
+    outputs = outputs.data.cpu().numpy()  # Convert output to numpy array
+    pred_label_img = np.argmax(outputs, axis=1)[0]  # Get the predicted label image for the single input image
+
+    pred_label_img_color = label_img_to_color(pred_label_img)
+    pred_label_img_color_resized = cv2.resize(pred_label_img_color, (416, 416))  # Resize to match image dimensions
+
+    overlayed_img = 0.35 * img[0].transpose(1, 2, 0) + 0.65 * pred_label_img_color_resized
+    overlayed_img = overlayed_img.astype(np.uint8)
+    output_dir = "/notebooks/deeplabv3/output/"
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    cv2.imwrite("/notebooks/deeplabv3/output/overlayed_image.png", overlayed_img)  # Save the overlayed image
+
+print("Evaluation on the single image is done.")
diff --git a/model_13_2_2_2_epoch_580 (1).pth b/model_13_2_2_2_epoch_580 (1).pth
diff --git a/model_13_2_2_2_epoch_580.pth b/model_13_2_2_2_epoch_580.pth
@@ -0,0 +1,10 @@
+<!DOCTYPE html>
+<html>
+    <head>
+        <link rel="icon" href="//netfree.link/img/block-favicon.png"/>
+    </head>
+    <body>
+        <iframe id="netfree_block_iframe" name="netfree-block-iframe" src="https://netfree.link/block/#%7B%22block%22%3A%22risk-type%22%2C%22sourceStatusCode%22%3A200%2C%22page_info%22%3A%7B%22url%22%3A%22https%3A%2F%2Fraw.githubusercontent.com%2Ffregu856%2Fdeeplabv3%2Fmaster%2Fpretrained_models%2Fmodel_13_2_2_2_epoch_580.pth%22%2C%22referer%22%3A%22https%3A%2F%2Fgithub.com%2Ffregu856%2Fdeeplabv3%2Fblob%2Fmaster%2Fpretrained_models%2Fmodel_13_2_2_2_epoch_580.pth%22%7D%7D" style=" position: fixed; top: 0; left: 0; width: 100%; height: 100%; border: none; "></iframe>
+    </body>
+</html>
+