Skip to content

Commit 19ebadd

Browse files
ganlerCopilot
andauthored
feat: cwe visualization (#3)
* feat: cwe visualization * Update script/venn4py.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 5ea30b7 commit 19ebadd

4 files changed

Lines changed: 585 additions & 0 deletions

File tree

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,4 @@ bandit
1212
tenacity
1313
sandbox-fusion
1414
rich
15+
matplotlib

script/cwepie.py

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
# SPDX-FileCopyrightText: (c) UIUC PurpCode Team
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
import matplotlib.pyplot as plt
6+
7+
# latex required
8+
plt.rcParams.update(
9+
{
10+
"text.usetex": True,
11+
"font.family": "serif",
12+
"font.serif": ["Computer Modern Roman"],
13+
}
14+
)
15+
16+
# Data provided
17+
data = {
18+
"AWS credentials logged": 50,
19+
"AWS insecure transmission CDK": 50,
20+
"AWS missing encryption CDK": 50,
21+
"AWS missing encryption of sensitive data cdk": 50,
22+
"Clear text credentials": 50,
23+
"Cross-site request forgery": 56,
24+
"Cross-site scripting": 147,
25+
"Deserialization of untrusted object": 50,
26+
"Empty Password": 17,
27+
"Garbage collection prevention in multiprocessing": 58,
28+
"Hardcoded IP address": 50,
29+
"Hardcoded credentials": 144,
30+
"Improper authentication": 70,
31+
"Improper certificate validation": 44,
32+
"Improper input validation": 75,
33+
"Improper privilege management": 8,
34+
"Improper resource exposure": 70,
35+
"Improper sanitization of wildcards or matching symbols": 52,
36+
"Insecure CORS policy": 58,
37+
"Insecure Socket Bind": 66,
38+
"Insecure connection using unencrypted protocol": 83,
39+
"Insecure cookie": 64,
40+
"Insecure cryptography": 130,
41+
"Insecure hashing": 282,
42+
"Insecure temporary file or directory": 125,
43+
"Integer overflow": 50,
44+
"LDAP injection": 54,
45+
"Log injection": 82,
46+
"Loose file permissions": 241,
47+
"Missing Authorization CDK": 50,
48+
"Mutually exclusive call": 50,
49+
"OS command injection": 1411,
50+
"Override of reserved variable names in a Lambda function": 55,
51+
"Path traversal": 223,
52+
"Public method parameter validation": 273,
53+
"Resource leak": 1516,
54+
"S3 partial encrypt CDK": 50,
55+
"SQL injection": 106,
56+
"Socket connection timeout": 109,
57+
"Spawning a process without main module": 52,
58+
"URL redirection to untrusted site": 70,
59+
"Unauthenticated Amazon SNS unsubscribe requests might succeed": 50,
60+
"Unauthenticated LDAP requests": 50,
61+
"Unrestricted upload of dangerous file type": 70,
62+
"Unsafe Cloudpickle Load": 51,
63+
"Unsanitized input is run as code": 351,
64+
"Untrusted AMI images": 50,
65+
"Usage of an API that is not recommended": 17,
66+
"Usage of an API that is not recommended - High Severity": 29,
67+
"Usage of an API that is not recommended - Medium Severity": 1390,
68+
"Using AutoAddPolicy or WarningPolicy": 4,
69+
"Weak algorithm used for Password Hashing": 108,
70+
"Weak obfuscation of web request": 52,
71+
"XML External Entity": 19,
72+
"XPath injection": 51,
73+
"Zip bomb attack": 56,
74+
}
75+
76+
77+
# Prepare data: Top N and 'Others'
78+
sorted_data = dict(sorted(data.items(), key=lambda item: item[1], reverse=True))
79+
top_n_count = 10
80+
top_n_labels_orig = list(sorted_data.keys())[:top_n_count]
81+
top_n_freqs = list(sorted_data.values())[:top_n_count]
82+
top_n_ratio = [(f / sum(sorted_data.values())) for f in top_n_freqs]
83+
other_size = sum(list(sorted_data.values())[top_n_count:])
84+
85+
# Create legend labels with frequencies
86+
max_label_length = 64
87+
plot_labels_for_legend = []
88+
for i in range(len(top_n_labels_orig)):
89+
label_text = top_n_labels_orig[i].split(" - ")[0]
90+
freq = top_n_freqs[i]
91+
ratio = top_n_ratio[i]
92+
if len(label_text) > max_label_length:
93+
truncated_label_text = label_text[: max_label_length - 3] + "..."
94+
else:
95+
truncated_label_text = label_text
96+
plot_labels_for_legend.append(f"{truncated_label_text} ({ratio * 100:.1f}\\%)")
97+
98+
# Determine plot_sizes for pie chart and add 'Others' label if needed
99+
if other_size > 0:
100+
plot_labels_for_legend.append(f"Others") # MODIFIED: Added frequency for Others
101+
plot_sizes = top_n_freqs + [other_size]
102+
else:
103+
plot_sizes = top_n_freqs
104+
105+
106+
grouped_labels = []
107+
grouped_counts = []
108+
others_count = 0
109+
110+
for k, v in sorted_data.items():
111+
if v >= 144:
112+
grouped_labels.append(k)
113+
grouped_counts.append(v)
114+
else:
115+
others_count += v
116+
117+
grouped_labels.append("Others")
118+
grouped_counts.append(others_count)
119+
120+
fig, ax = plt.subplots(1, 1, figsize=(10, 6))
121+
122+
123+
def make_autopct(values):
124+
def my_autopct(pct):
125+
return f"{pct:.1f}%"
126+
127+
return my_autopct
128+
129+
130+
# Styling
131+
fig, ax = plt.subplots(1, 1, figsize=(3.5, 3.5))
132+
133+
# Create a color map
134+
num_colors = len(plot_sizes)
135+
colors_palette = [
136+
"#f7c59f", # Soft peach
137+
"#ffb58b", # Warm coral
138+
"#ffd48a", # Pastel amber
139+
"#fff0a5", # Light butter‑yellow
140+
"#e9e3a4", # Sandstone
141+
"#d8f0a1", # Pale pistachio
142+
"#c1e8b0", # Mint‑melon
143+
"#b8e8d4", # Icy aqua
144+
"#cde0ff", # Powder periwinkle
145+
"#d8c7ff", # Lilac
146+
"lightgray",
147+
]
148+
149+
final_colors = [colors_palette[i % len(colors_palette)] for i in range(num_colors)]
150+
151+
wedges, texts, autotexts = ax.pie(
152+
plot_sizes, # This now correctly reflects top N + Others (if any)
153+
# autopct="%1.1f\\%%",
154+
startangle=140,
155+
pctdistance=0.75,
156+
colors=final_colors,
157+
wedgeprops=dict(width=0.5, edgecolor="w"),
158+
textprops={"fontsize": 16},
159+
explode=[0.05 if label == "Others" else 0.03 for label in grouped_labels],
160+
autopct=make_autopct(grouped_counts),
161+
)
162+
163+
for val, txt in zip(plot_sizes, autotexts):
164+
pct = val / sum(plot_sizes) * 100
165+
if pct > 15:
166+
txt.set_fontsize(18)
167+
txt.set_text(r"\textbf{" + txt.get_text() + r"}")
168+
169+
plt.setp(autotexts, size=11, weight="bold", color="black")
170+
ax.axis("equal")
171+
172+
plt.subplots_adjust(left=0.1, right=0.85)
173+
legend = ax.legend(
174+
wedges,
175+
plot_labels_for_legend, # This now includes frequencies
176+
title="\\textbf{Top CodeGuru Detections}",
177+
title_fontsize="12",
178+
loc="center left",
179+
bbox_to_anchor=(0.95, 0.5),
180+
fontsize=11, # May need to adjust if labels with freq are too long
181+
frameon=False,
182+
shadow=False,
183+
)
184+
185+
186+
plt.savefig(
187+
"cwepie.png", # New filename
188+
bbox_extra_artists=(legend,),
189+
bbox_inches="tight",
190+
dpi=300,
191+
pad_inches=-0.05, # User's custom padding
192+
)
193+
plt.savefig(
194+
"cwepie.pdf", # New filename
195+
bbox_extra_artists=(legend,),
196+
bbox_inches="tight",
197+
pad_inches=-0.05, # User's custom padding
198+
)

0 commit comments

Comments
 (0)