forked from Stalin-143/Ai-Basic-programes
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmalware-analysis.py
More file actions
68 lines (58 loc) · 2.27 KB
/
malware-analysis.py
File metadata and controls
68 lines (58 loc) · 2.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
# Load Dataset (Example dataset: CSV with features and a 'label' column)
def load_data(file_path):
try:
data = pd.read_csv(file_path)
print("Data loaded successfully.")
return data
except Exception as e:
print(f"Error loading data: {e}")
return None
# Preprocess the data
def preprocess_data(data):
X = data.drop(columns=['label']) # Features
y = data['label'] # Labels (0: Benign, 1: Malware)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
return X_train, X_test, y_train, y_test
# Train the model
def train_model(X_train, y_train):
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
print("Model trained successfully.")
return model
# Evaluate the model
def evaluate_model(model, X_test, y_test):
predictions = model.predict(X_test)
print("Model Evaluation:")
print(classification_report(y_test, predictions))
print(f"Accuracy: {accuracy_score(y_test, predictions)}")
# Save the trained model
def save_model(model, file_name):
joblib.dump(model, file_name)
print(f"Model saved as {file_name}")
# Load and use the model for predictions
def load_model(file_name):
model = joblib.load(file_name)
print(f"Model loaded from {file_name}")
return model
def predict_sample(model, sample):
prediction = model.predict([sample])
print(f"Prediction: {'Malware' if prediction[0] == 1 else 'Benign'}")
# Main execution
if __name__ == "__main__":
dataset_path = "malware_dataset.csv" # Replace with your dataset path
model_path = "malware_model.joblib"
data = load_data(dataset_path)
if data is not None:
X_train, X_test, y_train, y_test = preprocess_data(data)
model = train_model(X_train, y_train)
evaluate_model(model, X_test, y_test)
save_model(model, model_path)
# Example prediction
sample = X_test.iloc[0] # Replace with your sample data
loaded_model = load_model(model_path)
predict_sample(loaded_model, sample.tolist())