-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathmodels.py
More file actions
155 lines (129 loc) · 5.63 KB
/
Copy pathmodels.py
File metadata and controls
155 lines (129 loc) · 5.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import torch
import torch.nn as nn
import torch.nn.functional as F
from operator import itemgetter
from transformers import Swinv2Config, Swinv2ForImageClassification
from ultralytics import YOLO
class CNNModel1(nn.Module):
def __init__(self, fully_layer_1, fully_layer_2, drop_rate):
super(CNNModel1, self).__init__()
self.conv1 = nn.Conv2d(3, 32, 2)
self.bn1 = nn.BatchNorm2d(32)
self.conv2 = nn.Conv2d(32, 64, 2)
self.bn2 = nn.BatchNorm2d(64)
self.conv3 = nn.Conv2d(64, 128, 2)
self.bn3 = nn.BatchNorm2d(128)
self.conv4 = nn.Conv2d(128, 64, 2)
self.bn4 = nn.BatchNorm2d(64)
self.conv5 = nn.Conv2d(64, 32, 2)
self.bn5 = nn.BatchNorm2d(32)
self.pool = nn.MaxPool2d(2, 2)
self.drop_rate = drop_rate
#self.dropout = nn.Dropout(drop_rate)
self.fc1 = nn.Linear(#32*5*5,
32*8*8, fully_layer_1)
self.fc2 = nn.Linear(fully_layer_1, fully_layer_2)
self.fc3 = nn.Linear(fully_layer_2, 2)
def forward(self, x):
#print(x.shape)
x = self.pool(F.relu(self.bn1(self.conv1(x))))
#print(x.shape)
x = self.pool(F.relu(self.bn2(self.conv2(x))))
#print(x.shape)
x = self.pool(F.relu(self.bn3(self.conv3(x))))
#print(x.shape)
x = self.pool(F.relu(self.bn4(self.conv4(x))))
#print(x.shape)
x = self.pool(F.relu(self.bn5(self.conv5(x))))
#print(x.shape)
x = x.view(-1, #32*5*5) # For 200x200 images
32*8*8) # For 300x300 images
"""
x = self.dropout(F.relu(self.fc1(x))
x = self.dropout(F.relu(self.fc2(x))
x = self.dropout(x)"""
x = F.dropout(F.relu(self.fc1(x)), self.drop_rate, self.training)
x = F.dropout(F.relu(self.fc2(x)), self.drop_rate, self.training)
x = self.fc3(x)
return x
class CNNModel2(nn.Module):
"""
Improved CNN over CNNModel1:
- 3x3 kernels (vs 2x2): larger receptive field, sees full molecule
- Monotonically increasing channels (no bottleneck): 32→64→128→256→256
- GlobalAveragePooling (vs fixed flatten): spatial-position invariant
"""
def __init__(self, fully_layer_1, fully_layer_2, drop_rate):
super(CNNModel2, self).__init__()
self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
self.bn1 = nn.BatchNorm2d(32)
self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
self.bn2 = nn.BatchNorm2d(64)
self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
self.bn3 = nn.BatchNorm2d(128)
self.conv4 = nn.Conv2d(128, 256, 3, padding=1)
self.bn4 = nn.BatchNorm2d(256)
self.conv5 = nn.Conv2d(256, 256, 3, padding=1)
self.bn5 = nn.BatchNorm2d(256)
self.pool = nn.MaxPool2d(2, 2)
self.gap = nn.AdaptiveAvgPool2d(1)
self.drop_rate = drop_rate
self.fc1 = nn.Linear(256, fully_layer_1)
self.fc2 = nn.Linear(fully_layer_1, fully_layer_2)
self.fc3 = nn.Linear(fully_layer_2, 2)
def forward(self, x):
x = self.pool(F.relu(self.bn1(self.conv1(x))))
x = self.pool(F.relu(self.bn2(self.conv2(x))))
x = self.pool(F.relu(self.bn3(self.conv3(x))))
x = self.pool(F.relu(self.bn4(self.conv4(x))))
x = self.pool(F.relu(self.bn5(self.conv5(x))))
x = self.gap(x).view(x.size(0), -1)
x = F.dropout(F.relu(self.fc1(x)), self.drop_rate, self.training)
x = F.dropout(F.relu(self.fc2(x)), self.drop_rate, self.training)
x = self.fc3(x)
return x
# TODO: Create other models
class ViT(nn.Module):
def __init__(self, window_size,hidden_size,att_drop,drop_path_rate,drop_rate,layer_norm_eps,encoder_stride,embed_dim,depths,mlp_ratio,num_classes = 2):
super(ViT, self).__init__()
configuration = Swinv2Config()
configuration.hidden_size = hidden_size
configuration.image_size = 300
configuration.hidden_dropout_prob = float(drop_rate)
configuration.window_size = int(window_size)
configuration.attention_probs_dropout_prob = float(att_drop)
configuration.drop_path_rate = float(drop_path_rate)
configuration.layer_norm_eps = float(layer_norm_eps)
configuration.encoder_stride = int(encoder_stride)
configuration.embed_dim = int(embed_dim)
configuration.depths = depths
configuration.mlp_ratio = mlp_ratio
configuration.num_labels = num_classes
model = Swinv2ForImageClassification(configuration)
self.vit = model
def forward(self, x, return_attention=False):
outputs = self.vit(
x,
output_attentions=return_attention,
return_dict=True
)
if return_attention:
return outputs.logits, outputs.attentions
return outputs.logits
class YOLOv11Classifier(nn.Module):
def __init__(self, num_classes, model_size):
super().__init__()
yolo = YOLO(f"{model_size}-cls.pt")
head = yolo.model.model[-1]
if hasattr(head, "linear"):
in_features = head.linear.in_features
head.linear = nn.Linear(in_features, num_classes)
elif hasattr(head, "fc"):
in_features = head.fc.in_features
head.fc = nn.Linear(in_features, num_classes)
self.model = yolo.model
def forward(self, x):
out = self.model(x)
if isinstance(out, (tuple, list)):
out = out[0]
return out