Vulnerability-Management/sorting_alg.py at master · GeorgievDimcho/Vulnerability-Management · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import csv  # Import the CSV module for working with CSV files
import sys  # Import the sys module to work with system arguments and functions

# Check if the script received exactly one command-line argument (the input file)
if len(sys.argv) != 2:
    # Inform user about correct usage
    print("Usage: python sorting_alg.py <input_file>")
    # Exit with an error code if incorrect number of arguments is provided
    sys.exit(1)
existing_cvs = []  # Initialize an empty list to store CVE IDs from the input file
# Name for the output file with "_sorted" appended
output_file = sys.argv[1]+"_sorted"
# Open the input file and read its content line-by-line
with open(sys.argv[1], mode="r", newline="", encoding="utf-8") as imported:
    line = imported.readline()  # Read the first line
    while line:  # Continue until the end of the file
        existing_cvs.append(line.strip())  # Add the cleaned line to the list
        line = imported.readline()  # Read the next line
# Open the reference CSV file containing vulnerability data
# Path to the CSV file with vulnerability data
csv_file = "res/vuln_downloaded_data/CVE.csv"
with open(csv_file, mode="r", newline="", encoding="utf-8") as csvfile:
    # Create a CSV reader object to read as dictionaries
    reader = csv.DictReader(csvfile)
    # Convert the reader to a list of dictionaries
    vulnerabilities = list(reader)
# Weights for different scoring parameters (CVSS, EPSS, CISA) for composite score calculation
# Set the weights for calculating the composite score
WEIGHTS = [0.3,  # CVSS
           0.3,  # EPSS
           0.4  # CISA
           ]
# Keywords to be used when extracting specific vulnerability information from CSV data
CVSS_keywords = ["'CVE'", "'CVSS2_baseScore'", "'CVSS3_baseScore'", "'CVSS2_exploitabilityScore'", "'CVSS3_exploitabilityScore'",
                 "'CVSS2_impactScore'", "'CVSS3_impactScore'", "'EPSS'", "'CISA_dateAdded'"]

# Function to calculate a composite score for each vulnerability based on predefined weights


def calculate_composite_score(vulnerability):
    cve = {}
    score = [0, 0, 0]  # Initialize the score array
    cvss_couter = 0  # Counter for the number of CVSS scores
    for key, value in vulnerability.items():
        value = value.replace("'", "")  # Clean value by removing extra quotes
        if (isANumber(value)):  # Check if the value is a number
            if (key == CVSS_keywords[6]):  # EPSS score keyword
                score[1] = float(value)*10  # Assign EPSS score
            else:
                score[0] += float(value)  # Add to CVSS score
                cvss_couter += 1  # Increment CVSS counter
        if (key == CVSS_keywords[len(CVSS_keywords)-1]):  # Last keyword in the list
            score[2] = 1  # Set CISA score
    if cvss_couter > 0:
        score[0] /= cvss_couter  # Average the CVSS scores
    result = 0.0
    for i in range(0, 2):
        # Calculate the composite score
        result += float(WEIGHTS[i])*float(score[i])
    return result  # Return the calculated composite score

# Helper function to check if a string can be converted to a number


def isANumber(s):
    try:
        try:
            float(s)  # Try converting to float
            return True
        except ValueError:
            return False
        int(s)
        return True
    except ValueError:
        return False


# Clean the vulnerability data by removing unwanted values and keeping only valid key-value pairs
cleaned_vulnerabilities = []
for v in vulnerabilities:
    cleaned_dict = {}
    for k, val in v.items():
        if val not in [None, "'null'", "NULL", "null", "None", "'NONE'"] and k in CVSS_keywords:
            # Add valid key-value pairs to the cleaned dictionary
            cleaned_dict[k] = val
    # Append to the cleaned vulnerabilities list
    cleaned_vulnerabilities.append(cleaned_dict)
# Sort the cleaned vulnerabilities based on composite score in descending order
sorted_vulnerabilities = sorted(
    cleaned_vulnerabilities,
    key=lambda v: calculate_composite_score(v),
    reverse=True  # Sort in descending order (highest score first)
)
# Create an index dictionary to map CVE IDs to their sorted positions
index = {}
for i, d in enumerate(sorted_vulnerabilities):
    for l in range(0, len(existing_cvs)):  # Loop through existing CVE IDs
        cve = str(d[CVSS_keywords[0]])
        cve = cve.replace("'", "")
        if cve == existing_cvs[l]:
            # Store the index of the corresponding CVE ID
            index[existing_cvs[l]] = i
# List of CVE IDs from the index dictionary
keys_list = list(index.keys())
# Write the sorted CVE IDs to the output file
with open(output_file, mode="w", encoding="utf-8") as file:
    for item in keys_list:  # Loop through each CVE ID
        file.write(item + '\n')  # Write the CVE ID followed by a newline