DEEPScreen2/models.py at main · HUBioDataLab/DEEPScreen2 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import torch
import torch.nn as nn
import torch.nn.functional as F
from operator import itemgetter
from transformers import Swinv2Config, Swinv2ForImageClassification
from ultralytics import YOLO

class CNNModel1(nn.Module):
    def __init__(self, fully_layer_1, fully_layer_2, drop_rate):
        super(CNNModel1, self).__init__()

        self.conv1 = nn.Conv2d(3, 32, 2)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, 2)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, 2)
        self.bn3 = nn.BatchNorm2d(128)
        self.conv4 = nn.Conv2d(128, 64, 2)
        self.bn4 = nn.BatchNorm2d(64)
        self.conv5 = nn.Conv2d(64, 32, 2)
        self.bn5 = nn.BatchNorm2d(32)

        self.pool = nn.MaxPool2d(2, 2)
        self.drop_rate = drop_rate
        #self.dropout = nn.Dropout(drop_rate)
        self.fc1 = nn.Linear(#32*5*5,
                             32*8*8, fully_layer_1)
        self.fc2 = nn.Linear(fully_layer_1, fully_layer_2)
        self.fc3 = nn.Linear(fully_layer_2, 2)

    def forward(self, x):
        #print(x.shape)
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        #print(x.shape)
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        #print(x.shape)
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        #print(x.shape)
        x = self.pool(F.relu(self.bn4(self.conv4(x))))
        #print(x.shape)
        x = self.pool(F.relu(self.bn5(self.conv5(x))))
        #print(x.shape)

        x = x.view(-1, #32*5*5) # For 200x200 images
                   32*8*8)  # For 300x300 images

        """
        x = self.dropout(F.relu(self.fc1(x))
        x = self.dropout(F.relu(self.fc2(x))
        x = self.dropout(x)"""
        x = F.dropout(F.relu(self.fc1(x)), self.drop_rate, self.training)
        x = F.dropout(F.relu(self.fc2(x)), self.drop_rate, self.training)
        x = self.fc3(x)

        return x


class CNNModel2(nn.Module):
    """
    Improved CNN over CNNModel1:
    - 3x3 kernels (vs 2x2): larger receptive field, sees full molecule
    - Monotonically increasing channels (no bottleneck): 32→64→128→256→256
    - GlobalAveragePooling (vs fixed flatten): spatial-position invariant
    """
    def __init__(self, fully_layer_1, fully_layer_2, drop_rate):
        super(CNNModel2, self).__init__()

        self.conv1 = nn.Conv2d(3,   32,  3, padding=1)
        self.bn1   = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32,  64,  3, padding=1)
        self.bn2   = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64,  128, 3, padding=1)
        self.bn3   = nn.BatchNorm2d(128)
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1)
        self.bn4   = nn.BatchNorm2d(256)
        self.conv5 = nn.Conv2d(256, 256, 3, padding=1)
        self.bn5   = nn.BatchNorm2d(256)

        self.pool = nn.MaxPool2d(2, 2)
        self.gap  = nn.AdaptiveAvgPool2d(1)

        self.drop_rate = drop_rate
        self.fc1 = nn.Linear(256, fully_layer_1)
        self.fc2 = nn.Linear(fully_layer_1, fully_layer_2)
        self.fc3 = nn.Linear(fully_layer_2, 2)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        x = self.pool(F.relu(self.bn4(self.conv4(x))))
        x = self.pool(F.relu(self.bn5(self.conv5(x))))

        x = self.gap(x).view(x.size(0), -1)

        x = F.dropout(F.relu(self.fc1(x)), self.drop_rate, self.training)
        x = F.dropout(F.relu(self.fc2(x)), self.drop_rate, self.training)
        x = self.fc3(x)

        return x

# TODO: Create other models

class ViT(nn.Module):
    def __init__(self, window_size,hidden_size,att_drop,drop_path_rate,drop_rate,layer_norm_eps,encoder_stride,embed_dim,depths,mlp_ratio,num_classes = 2):
        super(ViT, self).__init__()

        configuration = Swinv2Config()
        configuration.hidden_size = hidden_size
        configuration.image_size = 300
        configuration.hidden_dropout_prob = float(drop_rate)
        configuration.window_size = int(window_size)
        configuration.attention_probs_dropout_prob  = float(att_drop)
        configuration.drop_path_rate  = float(drop_path_rate)
        configuration.layer_norm_eps = float(layer_norm_eps)
        configuration.encoder_stride = int(encoder_stride)
        configuration.embed_dim = int(embed_dim)
        configuration.depths = depths
        configuration.mlp_ratio = mlp_ratio

        configuration.num_labels = num_classes

        model = Swinv2ForImageClassification(configuration)
        self.vit = model

    def forward(self, x, return_attention=False):
        outputs = self.vit(
            x,
            output_attentions=return_attention,
            return_dict=True
        )

        if return_attention:
            return outputs.logits, outputs.attentions

        return outputs.logits


class YOLOv11Classifier(nn.Module):
    def __init__(self, num_classes, model_size):
        super().__init__()
        yolo = YOLO(f"{model_size}-cls.pt")
        head = yolo.model.model[-1]
        if hasattr(head, "linear"):
            in_features = head.linear.in_features
            head.linear = nn.Linear(in_features, num_classes)
        elif hasattr(head, "fc"):
            in_features = head.fc.in_features
            head.fc = nn.Linear(in_features, num_classes)
        self.model = yolo.model
    def forward(self, x):
        out = self.model(x)
        if isinstance(out, (tuple, list)):
            out = out[0]
        return out