-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathde_identified_csv_evaluator.py
More file actions
137 lines (110 loc) · 5.11 KB
/
de_identified_csv_evaluator.py
File metadata and controls
137 lines (110 loc) · 5.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import pandas as pd
import os
import string
# Specify the relative folder paths for OpenAI_redacted_files
output_folder = 'results/OpenAI_redacted_files/'
human_redacted_folder = 'human_redacted_files/'
def count_redacted(text):
return text.count("[REDACTED]")
def clean_text(text):
# Checks if text is a string, converts to string if not
if not isinstance(text, str):
text = str(text)
text = text.translate(str.maketrans(string.punctuation, ' ' * len(string.punctuation)))
words = text.split()
words = [word for word in words if word.isalnum() and word.lower() not in ['a', 'an', 'the']]
return words
def add_word_lists_to_df(df):
df['word_list_redacted'] = df['post_text_human_redacted'].apply(clean_text)
df['word_list_openai_redacted'] = df['post_text_OpenAI_redacted'].apply(clean_text)
return df
def calculate_metrics(df):
total_true_positives = 0
total_true_negatives = 0
total_false_positives = 0
total_false_negatives = 0
total_words = 0
tp_counts = []
fp_counts = []
fn_counts = []
tn_counts = []
for _, row in df.iterrows():
row_tp = 0
row_fp = 0
row_fn = 0
row_tn = 0
human_words = row['word_list_redacted']
openai_words = row['word_list_openai_redacted']
for hw, ow in zip(human_words, openai_words):
total_words += 1
if hw == ow and hw == 'REDACTED':
total_true_positives += 1
row_tp += 1
elif hw != ow and hw != 'REDACTED' and ow == 'REDACTED':
total_false_positives += 1
row_fp += 1
elif hw != ow and hw == 'REDACTED' and ow != 'REDACTED':
total_false_negatives += 1
row_fn += 1
else:
total_true_negatives += 1
row_tn += 1
tp_counts.append(row_tp)
fp_counts.append(row_fp)
fn_counts.append(row_fn)
tn_counts.append(row_tn)
df.at[_, 'true_positives'] = row_tp
df.at[_, 'true_negatives'] = row_tn
df.at[_, 'false_positives'] = row_fp
df.at[_, 'false_negatives'] = row_fn
precision = total_true_positives / (total_true_positives + total_false_positives)
recall = total_true_positives / (total_true_positives + total_false_negatives)
accuracy = (total_true_positives + total_true_negatives) / (
total_false_negatives + total_true_negatives + total_true_positives + total_false_positives)
observed_agreement = (total_true_positives + total_true_negatives) / total_words
expected_agreement = ((total_true_positives + total_false_positives) * (total_true_positives + total_false_negatives) + (total_false_positives + total_true_negatives) * (total_false_negatives + total_true_negatives)) / (total_words ** 2)
kappa = (observed_agreement - expected_agreement) / (1 - expected_agreement)
return accuracy, precision, recall, kappa, tp_counts, tn_counts, fp_counts, fn_counts
prompt_df = pd.read_csv('prompts.csv', encoding_errors='ignore')
precision_df = pd.DataFrame(columns=prompt_df.columns)
recall_df = pd.DataFrame(columns=prompt_df.columns)
accuracy_df = pd.DataFrame(columns=prompt_df.columns)
kappa_df = pd.DataFrame(columns=prompt_df.columns)
all_csv_files = [f for f in os.listdir(output_folder) if f.endswith('.csv')]
human_redacted_files = [f for f in os.listdir(human_redacted_folder) if f.endswith('.csv')]
all_metrics = []
for file in all_csv_files:
file_path = os.path.join(output_folder, file)
df = pd.read_csv(file_path, encoding='utf-8', encoding_errors='ignore')
original_file_name = file.split("_prompt")[0] + ".csv"
df_human = pd.read_csv(os.path.join(human_redacted_folder, original_file_name), encoding='utf-8', encoding_errors='ignore')
if 'post_text_human_redacted' not in df.columns:
df = pd.merge(df, df_human, on='id')
# Adds word lists as new columns
df = add_word_lists_to_df(df)
# Calculates metrics and get counts
accuracy, precision, recall, kappa, tp_counts, tn_counts, fp_counts, fn_counts = calculate_metrics(df)
# Adds counts to DataFrame
df['true_positives'] = tp_counts
df['true_negatives'] = tn_counts
df['false_positives'] = fp_counts
df['false_negatives'] = fn_counts
# Extracts prompt number and original file name from filename
prompt_number_part = file.split('_')[-3]
prompt_number = int(prompt_number_part.replace('prompt', ''))
original_file_name = file.split("_prompt")[0] + ".csv"
print(f"Updated Metrics for {original_file_name}, Prompt {prompt_number}: Accuracy:{accuracy:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}, Kappa: {kappa:.2f}")
df.to_csv(file_path, index=False)
# Stores metrics in a list
all_metrics.append({
'Prompt': prompt_number,
'File': original_file_name,
'Accuracy': accuracy,
'Precision': precision,
'Recall': recall,
'Kappa': kappa
})
# Creates a DataFrame with all metrics
all_metrics_df = pd.DataFrame(all_metrics)
# Saves all metrics to a CSV file
all_metrics_df.to_csv('results\metrics.csv', index=False)