-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
119 lines (88 loc) · 3.05 KB
/
utils.py
File metadata and controls
119 lines (88 loc) · 3.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import os
import pandas as pd
import time
from scipy.io import arff
def load_datasets(directory, dataset_name):
dataset_file = os.path.join(directory, f"{dataset_name}.arff")
data, _ = arff.loadarff(dataset_file)
return data
def get_process_time(process, start_time):
"""
Calculates and logs the elapsed time for a given process.
Args:
process (str): Description of the process.
start_time (float): Timestamp when the process started.
Returns:
float: Current timestamp after the process completion.
"""
current_time = time.time()
process_time = current_time - start_time
print(f"Finished {process} in {process_time:.3f} seconds.")
return current_time
def create_directory(directory):
"""
Creates a directory if it does not exist.
Args:
directory (str): Directory path.
Returns:
output_dir (str): Full path to the output directory
"""
if not os.path.exists(directory):
os.makedirs(directory)
output_dir = directory
return output_dir
def check_missing_values(df):
missing_values = []
for column in df.columns:
if (
df[column].any() == None
or df[column].any() == ""
or df[column].isnull().sum() > 0
):
missing_values.append(f"Missing values for column {column}.")
if missing_values:
[print(col) for col in missing_values]
return True
else:
print("We are not missing any values!")
return False
def save_metrics_to_csv(results, dataset_name, output_dir, k_alorithm_flag):
"""
Save the clustering metrics to separate CSV files for each metric.
"""
os.makedirs(output_dir, exist_ok=True) # Create the directory if it doesn't exist
rows = []
if k_alorithm_flag: # K-Means
for metric, k_values in results.items():
for k, metrics in k_values.items():
row = {"K": k, "distance_metric": metric}
row.update(metrics) # Add all metrics for this K
rows.append(row)
else: # G-Means
for metric, alphas in results.items():
for alpha, metrics in alphas.items():
row = {"Alpha": alpha, "distance_metric": metric}
row.update(metrics)
rows.append(row)
# Convert rows to a DataFrame
df = pd.DataFrame(rows)
# Save the DataFrame to a CSV file
csv_file = os.path.join(output_dir, f"{dataset_name}_metrics.csv")
df.to_csv(csv_file, index=False)
print(f"Metrics for {metric} saved to {csv_file}")
def print_divider(char="=", length=60, before="", after=""):
if before:
print(before)
print(char * length)
if after:
print(after)
def print_heading1(text):
print("\n" + "=" * len(text))
print(text.upper())
print("=" * len(text) + "\n")
def print_heading2(text):
print("\n" + text)
print("-" * len(text) + "\n")
def print_heading3(text):
print("\n" + text)
print("".join(["-" if i % 2 == 0 else " " for i in range(len(text))]) + "\n")