-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsorting_alg.py
More file actions
108 lines (99 loc) · 4.73 KB
/
sorting_alg.py
File metadata and controls
108 lines (99 loc) · 4.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import csv # Import the CSV module for working with CSV files
import sys # Import the sys module to work with system arguments and functions
# Check if the script received exactly one command-line argument (the input file)
if len(sys.argv) != 2:
# Inform user about correct usage
print("Usage: python sorting_alg.py <input_file>")
# Exit with an error code if incorrect number of arguments is provided
sys.exit(1)
existing_cvs = [] # Initialize an empty list to store CVE IDs from the input file
# Name for the output file with "_sorted" appended
output_file = sys.argv[1]+"_sorted"
# Open the input file and read its content line-by-line
with open(sys.argv[1], mode="r", newline="", encoding="utf-8") as imported:
line = imported.readline() # Read the first line
while line: # Continue until the end of the file
existing_cvs.append(line.strip()) # Add the cleaned line to the list
line = imported.readline() # Read the next line
# Open the reference CSV file containing vulnerability data
# Path to the CSV file with vulnerability data
csv_file = "res/vuln_downloaded_data/CVE.csv"
with open(csv_file, mode="r", newline="", encoding="utf-8") as csvfile:
# Create a CSV reader object to read as dictionaries
reader = csv.DictReader(csvfile)
# Convert the reader to a list of dictionaries
vulnerabilities = list(reader)
# Weights for different scoring parameters (CVSS, EPSS, CISA) for composite score calculation
# Set the weights for calculating the composite score
WEIGHTS = [0.3, # CVSS
0.3, # EPSS
0.4 # CISA
]
# Keywords to be used when extracting specific vulnerability information from CSV data
CVSS_keywords = ["'CVE'", "'CVSS2_baseScore'", "'CVSS3_baseScore'", "'CVSS2_exploitabilityScore'", "'CVSS3_exploitabilityScore'",
"'CVSS2_impactScore'", "'CVSS3_impactScore'", "'EPSS'", "'CISA_dateAdded'"]
# Function to calculate a composite score for each vulnerability based on predefined weights
def calculate_composite_score(vulnerability):
cve = {}
score = [0, 0, 0] # Initialize the score array
cvss_couter = 0 # Counter for the number of CVSS scores
for key, value in vulnerability.items():
value = value.replace("'", "") # Clean value by removing extra quotes
if (isANumber(value)): # Check if the value is a number
if (key == CVSS_keywords[6]): # EPSS score keyword
score[1] = float(value)*10 # Assign EPSS score
else:
score[0] += float(value) # Add to CVSS score
cvss_couter += 1 # Increment CVSS counter
if (key == CVSS_keywords[len(CVSS_keywords)-1]): # Last keyword in the list
score[2] = 1 # Set CISA score
if cvss_couter > 0:
score[0] /= cvss_couter # Average the CVSS scores
result = 0.0
for i in range(0, 2):
# Calculate the composite score
result += float(WEIGHTS[i])*float(score[i])
return result # Return the calculated composite score
# Helper function to check if a string can be converted to a number
def isANumber(s):
try:
try:
float(s) # Try converting to float
return True
except ValueError:
return False
int(s)
return True
except ValueError:
return False
# Clean the vulnerability data by removing unwanted values and keeping only valid key-value pairs
cleaned_vulnerabilities = []
for v in vulnerabilities:
cleaned_dict = {}
for k, val in v.items():
if val not in [None, "'null'", "NULL", "null", "None", "'NONE'"] and k in CVSS_keywords:
# Add valid key-value pairs to the cleaned dictionary
cleaned_dict[k] = val
# Append to the cleaned vulnerabilities list
cleaned_vulnerabilities.append(cleaned_dict)
# Sort the cleaned vulnerabilities based on composite score in descending order
sorted_vulnerabilities = sorted(
cleaned_vulnerabilities,
key=lambda v: calculate_composite_score(v),
reverse=True # Sort in descending order (highest score first)
)
# Create an index dictionary to map CVE IDs to their sorted positions
index = {}
for i, d in enumerate(sorted_vulnerabilities):
for l in range(0, len(existing_cvs)): # Loop through existing CVE IDs
cve = str(d[CVSS_keywords[0]])
cve = cve.replace("'", "")
if cve == existing_cvs[l]:
# Store the index of the corresponding CVE ID
index[existing_cvs[l]] = i
# List of CVE IDs from the index dictionary
keys_list = list(index.keys())
# Write the sorted CVE IDs to the output file
with open(output_file, mode="w", encoding="utf-8") as file:
for item in keys_list: # Loop through each CVE ID
file.write(item + '\n') # Write the CVE ID followed by a newline