diff --git a/requirements.txt b/requirements.txt index 420d375..946c51e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,3 +12,4 @@ bandit tenacity sandbox-fusion rich +matplotlib diff --git a/script/cwepie.py b/script/cwepie.py new file mode 100644 index 0000000..db2ba35 --- /dev/null +++ b/script/cwepie.py @@ -0,0 +1,198 @@ +# SPDX-FileCopyrightText: (c) UIUC PurpCode Team +# +# SPDX-License-Identifier: Apache-2.0 + +import matplotlib.pyplot as plt + +# latex required +plt.rcParams.update( + { + "text.usetex": True, + "font.family": "serif", + "font.serif": ["Computer Modern Roman"], + } +) + +# Data provided +data = { + "AWS credentials logged": 50, + "AWS insecure transmission CDK": 50, + "AWS missing encryption CDK": 50, + "AWS missing encryption of sensitive data cdk": 50, + "Clear text credentials": 50, + "Cross-site request forgery": 56, + "Cross-site scripting": 147, + "Deserialization of untrusted object": 50, + "Empty Password": 17, + "Garbage collection prevention in multiprocessing": 58, + "Hardcoded IP address": 50, + "Hardcoded credentials": 144, + "Improper authentication": 70, + "Improper certificate validation": 44, + "Improper input validation": 75, + "Improper privilege management": 8, + "Improper resource exposure": 70, + "Improper sanitization of wildcards or matching symbols": 52, + "Insecure CORS policy": 58, + "Insecure Socket Bind": 66, + "Insecure connection using unencrypted protocol": 83, + "Insecure cookie": 64, + "Insecure cryptography": 130, + "Insecure hashing": 282, + "Insecure temporary file or directory": 125, + "Integer overflow": 50, + "LDAP injection": 54, + "Log injection": 82, + "Loose file permissions": 241, + "Missing Authorization CDK": 50, + "Mutually exclusive call": 50, + "OS command injection": 1411, + "Override of reserved variable names in a Lambda function": 55, + "Path traversal": 223, + "Public method parameter validation": 273, + "Resource leak": 1516, + "S3 partial encrypt CDK": 50, + "SQL injection": 106, + "Socket connection timeout": 109, + "Spawning a process without main module": 52, + "URL redirection to untrusted site": 70, + "Unauthenticated Amazon SNS unsubscribe requests might succeed": 50, + "Unauthenticated LDAP requests": 50, + "Unrestricted upload of dangerous file type": 70, + "Unsafe Cloudpickle Load": 51, + "Unsanitized input is run as code": 351, + "Untrusted AMI images": 50, + "Usage of an API that is not recommended": 17, + "Usage of an API that is not recommended - High Severity": 29, + "Usage of an API that is not recommended - Medium Severity": 1390, + "Using AutoAddPolicy or WarningPolicy": 4, + "Weak algorithm used for Password Hashing": 108, + "Weak obfuscation of web request": 52, + "XML External Entity": 19, + "XPath injection": 51, + "Zip bomb attack": 56, +} + + +# Prepare data: Top N and 'Others' +sorted_data = dict(sorted(data.items(), key=lambda item: item[1], reverse=True)) +top_n_count = 10 +top_n_labels_orig = list(sorted_data.keys())[:top_n_count] +top_n_freqs = list(sorted_data.values())[:top_n_count] +top_n_ratio = [(f / sum(sorted_data.values())) for f in top_n_freqs] +other_size = sum(list(sorted_data.values())[top_n_count:]) + +# Create legend labels with frequencies +max_label_length = 64 +plot_labels_for_legend = [] +for i in range(len(top_n_labels_orig)): + label_text = top_n_labels_orig[i].split(" - ")[0] + freq = top_n_freqs[i] + ratio = top_n_ratio[i] + if len(label_text) > max_label_length: + truncated_label_text = label_text[: max_label_length - 3] + "..." + else: + truncated_label_text = label_text + plot_labels_for_legend.append(f"{truncated_label_text} ({ratio * 100:.1f}\\%)") + +# Determine plot_sizes for pie chart and add 'Others' label if needed +if other_size > 0: + plot_labels_for_legend.append(f"Others") # MODIFIED: Added frequency for Others + plot_sizes = top_n_freqs + [other_size] +else: + plot_sizes = top_n_freqs + + +grouped_labels = [] +grouped_counts = [] +others_count = 0 + +for k, v in sorted_data.items(): + if v >= 144: + grouped_labels.append(k) + grouped_counts.append(v) + else: + others_count += v + +grouped_labels.append("Others") +grouped_counts.append(others_count) + +fig, ax = plt.subplots(1, 1, figsize=(10, 6)) + + +def make_autopct(values): + def my_autopct(pct): + return f"{pct:.1f}%" + + return my_autopct + + +# Styling +fig, ax = plt.subplots(1, 1, figsize=(3.5, 3.5)) + +# Create a color map +num_colors = len(plot_sizes) +colors_palette = [ + "#f7c59f", # Soft peach + "#ffb58b", # Warm coral + "#ffd48a", # Pastel amber + "#fff0a5", # Light butter‑yellow + "#e9e3a4", # Sandstone + "#d8f0a1", # Pale pistachio + "#c1e8b0", # Mint‑melon + "#b8e8d4", # Icy aqua + "#cde0ff", # Powder periwinkle + "#d8c7ff", # Lilac + "lightgray", +] + +final_colors = [colors_palette[i % len(colors_palette)] for i in range(num_colors)] + +wedges, texts, autotexts = ax.pie( + plot_sizes, # This now correctly reflects top N + Others (if any) + # autopct="%1.1f\\%%", + startangle=140, + pctdistance=0.75, + colors=final_colors, + wedgeprops=dict(width=0.5, edgecolor="w"), + textprops={"fontsize": 16}, + explode=[0.05 if label == "Others" else 0.03 for label in grouped_labels], + autopct=make_autopct(grouped_counts), +) + +for val, txt in zip(plot_sizes, autotexts): + pct = val / sum(plot_sizes) * 100 + if pct > 15: + txt.set_fontsize(18) + txt.set_text(r"\textbf{" + txt.get_text() + r"}") + +plt.setp(autotexts, size=11, weight="bold", color="black") +ax.axis("equal") + +plt.subplots_adjust(left=0.1, right=0.85) +legend = ax.legend( + wedges, + plot_labels_for_legend, # This now includes frequencies + title="\\textbf{Top CodeGuru Detections}", + title_fontsize="12", + loc="center left", + bbox_to_anchor=(0.95, 0.5), + fontsize=11, # May need to adjust if labels with freq are too long + frameon=False, + shadow=False, +) + + +plt.savefig( + "cwepie.png", # New filename + bbox_extra_artists=(legend,), + bbox_inches="tight", + dpi=300, + pad_inches=-0.05, # User's custom padding +) +plt.savefig( + "cwepie.pdf", # New filename + bbox_extra_artists=(legend,), + bbox_inches="tight", + pad_inches=-0.05, # User's custom padding +) diff --git a/script/cwevenn.py b/script/cwevenn.py new file mode 100644 index 0000000..ae2cbee --- /dev/null +++ b/script/cwevenn.py @@ -0,0 +1,200 @@ +# SPDX-FileCopyrightText: (c) UIUC PurpCode Team +# +# SPDX-License-Identifier: Apache-2.0 + +import matplotlib.pyplot as plt +from matplotlib.colors import to_rgba + +from script.venn4py import venny4py + +DATA = { + r"Ours": { + "19": 50, + "20": 305, + "22": 341, + "23": 208, + "36": 208, + "73": 208, + "74": 7, + "77": 1478, + "78": 1478, + "79": 254, + "80": 146, + "88": 1478, + "89": 141, + "90": 56, + "93": 90, + "94": 670, + "95": 67, + "99": 208, + "113": 2, + "116": 181, + "117": 139, + "155": 52, + "185": 1, + "186": 1, + "190": 50, + "200": 68, + "202": 52, + "209": 119, + "215": 217, + "255": 50, + "258": 17, + "259": 80, + "266": 248, + "269": 53, + "285": 50, + "287": 70, + "295": 59, + "311": 170, + "312": 224, + "315": 32, + "319": 225, + "321": 80, + "322": 4, + "326": 6, + "327": 516, + "328": 439, + "349": 49, + "352": 58, + "359": 156, + "377": 128, + "390": 653, + "396": 445, + "400": 1120, + "409": 58, + "434": 135, + "477": 27, + "489": 217, + "497": 119, + "502": 179, + "521": 121, + "522": 52, + "532": 127, + "561": 655, + "563": 1020, + "570": 57, + "571": 57, + "581": 1, + "584": 5, + "601": 78, + "611": 26, + "614": 89, + "628": 4, + "643": 54, + "664": 1100, + "665": 44, + "668": 137, + "685": 6, + "687": 7, + "730": 19, + "732": 278, + "772": 622, + "776": 6, + "798": 218, + "827": 12, + "916": 161, + "918": 24, + "942": 58, + "1004": 66, + "1275": 66, + "1333": 15, + }, + r"\textsc{CodeLMSec}": { + "20": 12, + "22": 24, + "78": 12, + "79": 12, + "89": 12, + "94": 12, + "117": 12, + "190": 12, + "327": 12, + "476": 12, + "502": 12, + "601": 12, + "611": 12, + "732": 12, + "787": 12, + }, + r"\textsc{SecCodePLT}": { + "22": 70, + "74": 60, + "77": 51, + "78": 50, + "79": 51, + "94": 51, + "95": 51, + "120": 50, + "200": 51, + "281": 50, + "295": 51, + "327": 50, + "338": 51, + "347": 51, + "352": 40, + "367": 51, + "400": 51, + "502": 51, + "601": 51, + "611": 51, + "732": 51, + "770": 51, + "862": 51, + "863": 41, + "915": 31, + "918": 51, + "1333": 36, + }, + r"\textsc{CWEval}": { + "1333": 1, + "20": 1, + "918": 2, + "732": 1, + "95": 1, + "347": 1, + "329": 1, + "327": 3, + "22": 2, + "326": 2, + "400": 1, + "79": 1, + "943": 1, + "78": 1, + "502": 1, + "643": 1, + "760": 1, + "113": 1, + "377": 1, + "117": 1, + }, +} + +# Set up LaTeX rendering +plt.rcParams.update( + { + "text.usetex": True, + "font.family": "serif", + "font.serif": ["Computer Modern Roman"], + "legend.loc": "lower center", + } +) + +fig, ax = plt.subplots(1, 1, figsize=(4, 4)) + +type2set = {k: set(v.keys()) for k, v in DATA.items()} + + +colors = [ + to_rgba("#3274A1", 0.25), # Blue + to_rgba("#E1812C", 0.25), # Orange + to_rgba("#3A923A", 0.25), # Green + to_rgba("#C03D3E", 0.25), # Red +] + +print(type2set) +venny4py(type2set, ax, font_size=12, legend_cols=2, column_spacing=0.5, colors=colors) + +# save +plt.savefig("cwevenn.png", dpi=300, bbox_inches="tight", pad_inches=-0.05) +plt.savefig("cwevenn.pdf", dpi=300, bbox_inches="tight", pad_inches=-0.05) diff --git a/script/venn4py.py b/script/venn4py.py new file mode 100644 index 0000000..a659130 --- /dev/null +++ b/script/venn4py.py @@ -0,0 +1,186 @@ +# SPDX-FileCopyrightText: (c) UIUC PurpCode Team +# +# SPDX-License-Identifier: Apache-2.0 + +from itertools import combinations + +import matplotlib.patches as mpatches +from matplotlib.patches import Ellipse + + +# get shared elements for each combination of sets +def get_shared(sets): + IDs = sets.keys() + combs = sum( + [list(map(list, combinations(IDs, i))) for i in range(1, len(IDs) + 1)], [] + ) + + shared = {} + for comb in combs: + ID = " and ".join(comb) + if len(comb) == 1: + shared.update({ID: sets[comb[0]]}) + else: + setlist = [sets[c] for c in comb] + u = set.intersection(*setlist) + shared.update({ID: u}) + return shared + + +# get unique elements for each combination of sets +def get_unique(shared): + unique = {} + for shar in shared: + if shar == list(shared.keys())[-1]: + s = shared[shar] + unique.update({shar: s}) + continue + count = shar.count(" and ") + if count == 0: + setlist = [ + shared[k] for k in shared.keys() if k != shar and " and " not in k + ] + s = shared[shar].difference(*setlist) + else: + setlist = [ + shared[k] + for k in shared.keys() + if k != shar and k.count(" and ") >= count + ] + s = shared[shar].difference(*setlist) + unique.update({shar: s}) + return unique + + +# plot Venn +def venny4py( + sets, + ax, + size=3.5, + colors="bgrc", + line_width=None, + font_size=None, + legend_cols=2, + column_spacing=4, +): + assert len(sets) == 4, "Number of sets must be 4" + shared = get_shared(sets) + unique = get_unique(shared) + ce = colors + lw = size * 0.5 if line_width is None else line_width + fs = size * 2 if font_size is None else font_size + nc = legend_cols + cs = column_spacing + + ax.set_xlim(0, 100) + ax.set_ylim(0, 100) + ax.axis("off") + + # draw ellipses + ew = 45 # width + eh = 75 # height + xe = [35, 48, 52, 65] # x coordinates + ye = [35, 45, 45, 35] # y coordinates + ae = [225, 225, 315, 315] # angles + + for i, s in enumerate(sets): + ax.add_artist( + Ellipse(xy=(xe[i], ye[i]), width=ew, height=eh, fc=ce[i], angle=ae[i]) + ) + ax.add_artist( + Ellipse( + xy=(xe[i], ye[i]), + width=ew, + height=eh, + fc="None", + angle=ae[i], + ec="royalblue" if i == 0 else None, + lw=lw, + ) + ) + + # annotate + xt = [ + 10, + 32, + 68, + 91, + 14, + 34, + 66, + 86, + 26, + 28, + 50, + 50, + 72, + 74, + 37, + 60, + 40, + 63, + 50, + ] # x + yt = [ + 67, + 79, + 79, + 67, + 41, + 70, + 70, + 41, + 59, + 26, + 11, + 60, + 26, + 59, + 51, + 17, + 17, + 51, + 35, + ] # y + + for j, s in enumerate(sets): + ax.text( + xt[j], + yt[j], + len(sets[s]), + ha="center", + va="center", + fontsize=fs, + transform=ax.transData, + ) + + for k in unique: + j += 1 + ax.text( + xt[j], + yt[j], + len(unique[k]), + ha="center", + va="center", + fontsize=fs, + transform=ax.transData, + ) + + # legend + handles = [ + mpatches.Patch(color=ce[i], label=l, lw=lw, ec="royalblue" if i == 0 else None) + for i, l in enumerate(sets) + ] + ax.legend( + labels=sets, + handles=handles, + fontsize=fs, + frameon=False, + bbox_to_anchor=(0.5, 1.01), + bbox_transform=ax.transAxes, + loc=9, + handlelength=1.5, + ncol=nc, + columnspacing=cs, + handletextpad=0.5, + )