-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain-XSS(linear2).py
More file actions
100 lines (81 loc) · 3.35 KB
/
main-XSS(linear2).py
File metadata and controls
100 lines (81 loc) · 3.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import pandas as pd
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
from sklearn.feature_extraction.text import CountVectorizer
# Carica il dataset
ds = pd.read_csv("/home/g_sml/Challenge/datasets-cyber/XSS_SQL.csv")
ds = ds.dropna(subset=['Sentence']) # Elimina righe con NaN in 'Sentence'
# Preprocessing dei dati
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(ds['Sentence']) # Vettorizzazione delle frasi
y = ds['Label'].values
# Dividi il dataset in training set e test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Creazione dei DataLoader
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_dataset = TensorDataset(torch.tensor(X_train.toarray()).float(), torch.tensor(y_train).long())
test_dataset = TensorDataset(torch.tensor(X_test.toarray()).float(), torch.tensor(y_test).long())
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, pin_memory=True)
# Definizione del modello
class TinyModel(nn.Module):
def __init__(self, input_dim):
super(TinyModel, self).__init__()
self.linear1 = nn.Linear(input_dim, 128)
self.relu = nn.ReLU()
self.linear2 = nn.Linear(128, 128)
self.dropout = nn.Dropout(0.22)
self.linear3 = nn.Linear(128, 128)
self.relu = nn.ReLU()
self.linear4 = nn.Linear(128, 64)
self.linear6 = nn.Linear(64, 64)
self.dropout1 = nn.Dropout(0.15)
self.linear5 = nn.Linear(64, 2) # 2 classi di output per un problema di classificazione binaria
def forward(self, x):
x = self.linear1(x)
x = self.relu(x)
x = self.linear2(x)
x = self.dropout(x)
x = self.relu(x)
x = self.linear3(x)
x = self.relu(x)
x = self.linear4(x)
x = self.linear6(x)
x = self.relu(x)
x = self.dropout1(x)
x = self.linear5(x)
return x
input_dim = X_train.shape[1] # Numero di features dopo la vettorizzazione
# Inizializzazione del modello
model = TinyModel(input_dim).to(device)
# Definizione della loss e dell'ottimizzatore
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0000002)
# Addestramento del modello
epochs = 250
for epoch in range(epochs):
model.train()
for batch_X, batch_y in train_loader:
batch_X, batch_y = batch_X.to(device), batch_y.to(device)
optimizer.zero_grad()
output = model(batch_X)
loss = criterion(output, batch_y)
loss.backward()
optimizer.step()
# Valutazione del modello
model.eval()
correct = 0
total = 0
with torch.no_grad():
for batch_X, batch_y in test_loader:
batch_X, batch_y = batch_X.to(device), batch_y.to(device)
output = model(batch_X)
_, predicted = torch.max(output.data, 1)
total += batch_y.size(0)
correct += (predicted == batch_y).sum().item()
accuracy = correct / total
print(f"Epoch [{epoch+1}/{epochs}], Test Accuracy: {accuracy:.4f}, Loss {loss:.3}")
model_scripted = torch.jit.script(model) # Export to TorchScript
model_scripted.save('/home/g_sml/Challenge/Modelli/model4-ultimo.pt') # Save