Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added 1.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
99 changes: 99 additions & 0 deletions aspp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# camera-ready

import torch
import torch.nn as nn
import torch.nn.functional as F

class ASPP(nn.Module):
def __init__(self, num_classes):
super(ASPP, self).__init__()

self.conv_1x1_1 = nn.Conv2d(512, 256, kernel_size=1)
self.bn_conv_1x1_1 = nn.BatchNorm2d(256)

self.conv_3x3_1 = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=6, dilation=6)
self.bn_conv_3x3_1 = nn.BatchNorm2d(256)

self.conv_3x3_2 = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=12, dilation=12)
self.bn_conv_3x3_2 = nn.BatchNorm2d(256)

self.conv_3x3_3 = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=18, dilation=18)
self.bn_conv_3x3_3 = nn.BatchNorm2d(256)

self.avg_pool = nn.AdaptiveAvgPool2d(1)

self.conv_1x1_2 = nn.Conv2d(512, 256, kernel_size=1)
self.bn_conv_1x1_2 = nn.BatchNorm2d(256)

self.conv_1x1_3 = nn.Conv2d(1280, 256, kernel_size=1) # (1280 = 5*256)
self.bn_conv_1x1_3 = nn.BatchNorm2d(256)

self.conv_1x1_4 = nn.Conv2d(256, num_classes, kernel_size=1)

def forward(self, feature_map):
# (feature_map has shape (batch_size, 512, h/16, w/16)) (assuming self.resnet is ResNet18_OS16 or ResNet34_OS16. If self.resnet instead is ResNet18_OS8 or ResNet34_OS8, it will be (batch_size, 512, h/8, w/8))

feature_map_h = feature_map.size()[2] # (== h/16)
feature_map_w = feature_map.size()[3] # (== w/16)

out_1x1 = F.relu(self.bn_conv_1x1_1(self.conv_1x1_1(feature_map))) # (shape: (batch_size, 256, h/16, w/16))
out_3x3_1 = F.relu(self.bn_conv_3x3_1(self.conv_3x3_1(feature_map))) # (shape: (batch_size, 256, h/16, w/16))
out_3x3_2 = F.relu(self.bn_conv_3x3_2(self.conv_3x3_2(feature_map))) # (shape: (batch_size, 256, h/16, w/16))
out_3x3_3 = F.relu(self.bn_conv_3x3_3(self.conv_3x3_3(feature_map))) # (shape: (batch_size, 256, h/16, w/16))

out_img = self.avg_pool(feature_map) # (shape: (batch_size, 512, 1, 1))
out_img = F.relu(self.bn_conv_1x1_2(self.conv_1x1_2(out_img))) # (shape: (batch_size, 256, 1, 1))
out_img = F.upsample(out_img, size=(feature_map_h, feature_map_w), mode="bilinear") # (shape: (batch_size, 256, h/16, w/16))

out = torch.cat([out_1x1, out_3x3_1, out_3x3_2, out_3x3_3, out_img], 1) # (shape: (batch_size, 1280, h/16, w/16))
out = F.relu(self.bn_conv_1x1_3(self.conv_1x1_3(out))) # (shape: (batch_size, 256, h/16, w/16))
out = self.conv_1x1_4(out) # (shape: (batch_size, num_classes, h/16, w/16))

return out

class ASPP_Bottleneck(nn.Module):
def __init__(self, num_classes):
super(ASPP_Bottleneck, self).__init__()

self.conv_1x1_1 = nn.Conv2d(4*512, 256, kernel_size=1)
self.bn_conv_1x1_1 = nn.BatchNorm2d(256)

self.conv_3x3_1 = nn.Conv2d(4*512, 256, kernel_size=3, stride=1, padding=6, dilation=6)
self.bn_conv_3x3_1 = nn.BatchNorm2d(256)

self.conv_3x3_2 = nn.Conv2d(4*512, 256, kernel_size=3, stride=1, padding=12, dilation=12)
self.bn_conv_3x3_2 = nn.BatchNorm2d(256)

self.conv_3x3_3 = nn.Conv2d(4*512, 256, kernel_size=3, stride=1, padding=18, dilation=18)
self.bn_conv_3x3_3 = nn.BatchNorm2d(256)

self.avg_pool = nn.AdaptiveAvgPool2d(1)

self.conv_1x1_2 = nn.Conv2d(4*512, 256, kernel_size=1)
self.bn_conv_1x1_2 = nn.BatchNorm2d(256)

self.conv_1x1_3 = nn.Conv2d(1280, 256, kernel_size=1) # (1280 = 5*256)
self.bn_conv_1x1_3 = nn.BatchNorm2d(256)

self.conv_1x1_4 = nn.Conv2d(256, num_classes, kernel_size=1)

def forward(self, feature_map):
# (feature_map has shape (batch_size, 4*512, h/16, w/16))

feature_map_h = feature_map.size()[2] # (== h/16)
feature_map_w = feature_map.size()[3] # (== w/16)

out_1x1 = F.relu(self.bn_conv_1x1_1(self.conv_1x1_1(feature_map))) # (shape: (batch_size, 256, h/16, w/16))
out_3x3_1 = F.relu(self.bn_conv_3x3_1(self.conv_3x3_1(feature_map))) # (shape: (batch_size, 256, h/16, w/16))
out_3x3_2 = F.relu(self.bn_conv_3x3_2(self.conv_3x3_2(feature_map))) # (shape: (batch_size, 256, h/16, w/16))
out_3x3_3 = F.relu(self.bn_conv_3x3_3(self.conv_3x3_3(feature_map))) # (shape: (batch_size, 256, h/16, w/16))

out_img = self.avg_pool(feature_map) # (shape: (batch_size, 512, 1, 1))
out_img = F.relu(self.bn_conv_1x1_2(self.conv_1x1_2(out_img))) # (shape: (batch_size, 256, 1, 1))
out_img = F.upsample(out_img, size=(feature_map_h, feature_map_w), mode="bilinear") # (shape: (batch_size, 256, h/16, w/16))

out = torch.cat([out_1x1, out_3x3_1, out_3x3_2, out_3x3_3, out_img], 1) # (shape: (batch_size, 1280, h/16, w/16))
out = F.relu(self.bn_conv_1x1_3(self.conv_1x1_3(out))) # (shape: (batch_size, 256, h/16, w/16))
out = self.conv_1x1_4(out) # (shape: (batch_size, num_classes, h/16, w/16))

return out
47 changes: 47 additions & 0 deletions deeplabv3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# camera-ready

import torch
import torch.nn as nn
import torch.nn.functional as F

import os

from resnet import ResNet18_OS16, ResNet34_OS16, ResNet50_OS16, ResNet101_OS16, ResNet152_OS16, ResNet18_OS8, ResNet34_OS8
from aspp import ASPP, ASPP_Bottleneck

class DeepLabV3(nn.Module):
def __init__(self, model_id, project_dir):
super(DeepLabV3, self).__init__()

self.num_classes = 20

self.model_id = model_id
self.project_dir = project_dir
self.create_model_dirs()

self.resnet = ResNet18_OS8() # NOTE! specify the type of ResNet here
self.aspp = ASPP(num_classes=self.num_classes) # NOTE! if you use ResNet50-152, set self.aspp = ASPP_Bottleneck(num_classes=self.num_classes) instead

def forward(self, x):
# (x has shape (batch_size, 3, h, w))

h = x.size()[2]
w = x.size()[3]

feature_map = self.resnet(x) # (shape: (batch_size, 512, h/16, w/16)) (assuming self.resnet is ResNet18_OS16 or ResNet34_OS16. If self.resnet is ResNet18_OS8 or ResNet34_OS8, it will be (batch_size, 512, h/8, w/8). If self.resnet is ResNet50-152, it will be (batch_size, 4*512, h/16, w/16))

output = self.aspp(feature_map) # (shape: (batch_size, num_classes, h/16, w/16))

output = F.upsample(output, size=(h, w), mode="bilinear") # (shape: (batch_size, num_classes, h, w))

return output

def create_model_dirs(self):
self.logs_dir = self.project_dir + "/training_logs"
self.model_dir = self.logs_dir + "/model_%s" % self.model_id
self.checkpoints_dir = self.model_dir + "/checkpoints"
if not os.path.exists(self.logs_dir):
os.makedirs(self.logs_dir)
if not os.path.exists(self.model_dir):
os.makedirs(self.model_dir)
os.makedirs(self.checkpoints_dir)
133 changes: 133 additions & 0 deletions eval_on_val.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# # camera-ready

# import sys

# sys.path.append("/root/deeplabv3")

# from datasets import DatasetVal # (this needs to be imported before torch, because cv2 needs to be imported before torch for some reason)


# sys.path.append("/root/deeplabv3/model")
# from deeplabv3 import DeepLabV3

# sys.path.append("/root/deeplabv3/utils")
# from utils import label_img_to_color

# import torch
# import torch.utils.data
# import torch.nn as nn
# from torch.autograd import Variable
# import torch.optim as optim
# import torch.nn.functional as F

# import numpy as np
# import pickle
# import matplotlib
# matplotlib.use("Agg")
# import matplotlib.pyplot as plt
# import cv2

# batch_size = 2

# network = DeepLabV3("eval_val", project_dir="/root/deeplabv3").cuda()
# network.load_state_dict(torch.load("/root/deeplabv3/pretrained_models/model_13_2_2_2_epoch_580.pth"))

# val_dataset = DatasetVal(cityscapes_data_path="/root/deeplabv3/data/cityscapes",
# cityscapes_meta_path="/root/deeplabv3/data/cityscapes/meta")

# num_val_batches = int(len(val_dataset)/batch_size)
# print ("num_val_batches:", num_val_batches)

# val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
# batch_size=batch_size, shuffle=False,
# num_workers=1)

# with open("/root/deeplabv3/data/cityscapes/meta/class_weights.pkl", "rb") as file: # (needed for python3)
# class_weights = np.array(pickle.load(file))
# class_weights = torch.from_numpy(class_weights)
# class_weights = Variable(class_weights.type(torch.FloatTensor)).cuda()

# # loss function
# loss_fn = nn.CrossEntropyLoss(weight=class_weights)

# network.eval() # (set in evaluation mode, this affects BatchNorm and dropout)
# batch_losses = []
# for step, (imgs, label_imgs, img_ids) in enumerate(val_loader):
# with torch.no_grad(): # (corresponds to setting volatile=True in all variables, this is done during inference to reduce memory consumption)
# imgs = Variable(imgs).cuda() # (shape: (batch_size, 3, img_h, img_w))
# label_imgs = Variable(label_imgs.type(torch.LongTensor)).cuda() # (shape: (batch_size, img_h, img_w))

# outputs = network(imgs) # (shape: (batch_size, num_classes, img_h, img_w))

# # compute the loss:
# loss = loss_fn(outputs, label_imgs)
# loss_value = loss.data.cpu().numpy()
# batch_losses.append(loss_value)

# ########################################################################
# # save data for visualization:
# ########################################################################
# outputs = outputs.data.cpu().numpy() # (shape: (batch_size, num_classes, img_h, img_w))
# pred_label_imgs = np.argmax(outputs, axis=1) # (shape: (batch_size, img_h, img_w))
# pred_label_imgs = pred_label_imgs.astype(np.uint8)

# for i in range(pred_label_imgs.shape[0]):
# if i == 0:
# pred_label_img = pred_label_imgs[i] # (shape: (img_h, img_w))
# img_id = img_ids[i]
# img = imgs[i] # (shape: (3, img_h, img_w))

# img = img.data.cpu().numpy()
# img = np.transpose(img, (1, 2, 0)) # (shape: (img_h, img_w, 3))
# img = img*np.array([0.229, 0.224, 0.225])
# img = img + np.array([0.485, 0.456, 0.406])
# img = img*255.0
# img = img.astype(np.uint8)

# pred_label_img_color = label_img_to_color(pred_label_img)
# overlayed_img = 0.35*img + 0.65*pred_label_img_color
# overlayed_img = overlayed_img.astype(np.uint8)

# cv2.imwrite(network.model_dir + "/" + img_id + "_overlayed.png", overlayed_img)

# val_loss = np.mean(batch_losses)
# print ("val loss: %g" % val_loss)
import os
import sys
import torch
import numpy as np
import cv2
from deeplabv3 import DeepLabV3
from utils import label_img_to_color

image_path = sys.argv[1] # Get the image path from the command line argument

network = DeepLabV3("eval_val", project_dir="/root/deeplabv3").cuda()
network.load_state_dict(torch.load("/notebooks/deeplabv3/pretrained_models/model_13_2_2_2_epoch_580.pth"))
network.eval()

img = cv2.imread(image_path, cv2.IMREAD_COLOR)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Convert image to RGB format
img = img.astype(np.float32) / 255.0
img = (img - np.array([0.485, 0.456, 0.406])) / np.array([0.229, 0.224, 0.225])
img = np.transpose(img, (2, 0, 1)) # Reshape image to (3, H, W)
img = np.expand_dims(img, axis=0) # Add batch dimension

with torch.no_grad():
img_var = torch.from_numpy(img).float().cuda()
outputs = network(img_var) # Perform inference on the image

outputs = outputs.data.cpu().numpy() # Convert output to numpy array
pred_label_img = np.argmax(outputs, axis=1)[0] # Get the predicted label image for the single input image

pred_label_img_color = label_img_to_color(pred_label_img)
pred_label_img_color_resized = cv2.resize(pred_label_img_color, (416, 416)) # Resize to match image dimensions

overlayed_img = 0.35 * img[0].transpose(1, 2, 0) + 0.65 * pred_label_img_color_resized
overlayed_img = overlayed_img.astype(np.uint8)
output_dir = "/notebooks/deeplabv3/output/"
if not os.path.exists(output_dir):
os.makedirs(output_dir)
cv2.imwrite("/notebooks/deeplabv3/output/overlayed_image.png", overlayed_img) # Save the overlayed image

print("Evaluation on the single image is done.")
Binary file added model_13_2_2_2_epoch_580 (1).pth
Binary file not shown.
10 changes: 10 additions & 0 deletions model_13_2_2_2_epoch_580.pth
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<!DOCTYPE html>
<html>
<head>
<link rel="icon" href="//netfree.link/img/block-favicon.png"/>
</head>
<body>
<iframe id="netfree_block_iframe" name="netfree-block-iframe" src="https://netfree.link/block/#%7B%22block%22%3A%22risk-type%22%2C%22sourceStatusCode%22%3A200%2C%22page_info%22%3A%7B%22url%22%3A%22https%3A%2F%2Fraw.githubusercontent.com%2Ffregu856%2Fdeeplabv3%2Fmaster%2Fpretrained_models%2Fmodel_13_2_2_2_epoch_580.pth%22%2C%22referer%22%3A%22https%3A%2F%2Fgithub.com%2Ffregu856%2Fdeeplabv3%2Fblob%2Fmaster%2Fpretrained_models%2Fmodel_13_2_2_2_epoch_580.pth%22%7D%7D" style=" position: fixed; top: 0; left: 0; width: 100%; height: 100%; border: none; "></iframe>
</body>
</html>

Loading