-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcombine.py
More file actions
91 lines (75 loc) · 2.57 KB
/
combine.py
File metadata and controls
91 lines (75 loc) · 2.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import pandas as pd
from pathlib import Path
# 1. Define your parent directory path
parent_dir = Path(r"C:\Users\Richard\_Vork\MVT\DONE")
# 2. This list will hold individual dataframes
df_list = []
# 3. Iterate through each item in the parent directory
for group_folder in parent_dir.iterdir():
if group_folder.is_dir():
# rglob looks for the filename recursively within the group folder
# It returns a generator of all matches
files = list(group_folder.rglob("all_runs_summary_combined_averaged.csv"))
if files:
# We take the first match found in this specific group folder
file_path = files[0]
print(f"Found file in {group_folder.name}: {file_path}")
# Read the CSV and optionally add a column to identify the group
temp_df = pd.read_csv(file_path)
temp_df['group_source'] = group_folder.name
df_list.append(temp_df)
else:
print(f"No file found in {group_folder.name}")
# 4. Concatenate all found dataframes into one
if df_list:
combined_df = pd.concat(df_list, ignore_index=True)
print("\nSuccessfully combined all files!")
print(f"Total rows: {len(combined_df)}")
else:
print("No files were found to combine.")
# Show the first few rows
print(combined_df.head())
##############################################
import matplotlib.pyplot as plt
import seaborn as sns
# 1. Set the visual style
sns.set_theme(style="whitegrid")
# 2. Sort the dataframe by group to ensure consistent ordering on the X-axis
combined_df = combined_df.sort_values('group')
# 3. Create the Bar Plot
# 'errorbar=None' ensures only the mean bar is shown without standard error bars
ax = sns.barplot(
data=combined_df,
x='group',
# y='void',
# y='leak',
y='weighted_avg_void_vol',
color='skyblue',
alpha=0.5,
errorbar=None
)
# 4. Overlay the Individual Points
# 'jitter=True' prevents points from overlapping perfectly so you can see density
sns.stripplot(
data=combined_df,
x='group',
# y='void',
# y='leak',
y='weighted_avg_void_vol',
color='black',
jitter=0.2,
size=5,
alpha=0.7
)
# 5. Labeling and Formatting
# plt.title('Void Count per Group')
# plt.title('Leak Count per Group')
plt.title('AVV per Group')
plt.xlabel('Group Name')
# plt.ylabel('Void Count')
# plt.ylabel('Leak Count')
plt.ylabel('AVV (ul)')
# Tight layout ensures labels aren't cut off
plt.show()
# plt.savefig('void_count_plot.png', bbox_inches='tight')
print("Plot saved as void_count_plot.png")