-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathValueExtractor.py
More file actions
193 lines (142 loc) · 8.24 KB
/
ValueExtractor.py
File metadata and controls
193 lines (142 loc) · 8.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
from ollama import chat
import glob
import pandas as pd
import matplotlib.pyplot as plt
import os
import time
gemma_7b = 'gemma:7b'
mistral = 'mistral'
llava_7b = 'llava:7b'
directory_for_data = 'LLM_generated_csv/'
lidar_data_column = 'lidar_data'
steer_suggestion_column = 'steer_suggestion'
actual_steer_column = 'actual_steer'
time_column = 'time_taken'
prompt = ("Extract the number value from the following prompt, if the value is not present, please output 99999. "
"Make sure to only output a number and nothing else: ")
# Load the DataFrame from a CSV file
def load_dataframe(file_path):
try:
df = pd.read_csv(file_path)
except FileNotFoundError:
# if the file is not found, throw an error
raise FileNotFoundError(
file_path + " file not found. Please make sure the file exists.")
return df
def extract_steer(steer_suggestion):
import re
# Using regular expression to find a number in the string
match = re.search(r'\d+', str(steer_suggestion))
if match:
return int(match.group()) # Extracted number
else:
return 99999 # Return 99999 if no number is found
# Find the difference between the actual steer and the suggested steer
def find_difference(row):
return extract_steer(row[steer_suggestion_column]) - row[actual_steer_column]
# Calculate average time taken to process the data
def average_time_taken(dataframe):
return dataframe[time_column].mean()
# Calculate standard deviation of time taken to process the data
def standard_deviation_time_taken(dataframe):
return dataframe[time_column].std()
# Calculate variance of time taken to process the data
def variance_time_taken(dataframe):
return dataframe[time_column].var()
# Calculate mean difference for dataframe then extract outliers and return both the mean and outliers
def mean_difference(dataframe):
return dataframe.apply(find_difference, axis=1).mean()
# Calculate standard deviation of difference for dataframe then filter outliers and return both the SD and outliers
def standard_deviation_difference(dataframe):
# Calculate the standard deviation of the difference
data = dataframe.apply(find_difference, axis=1)
return data.std()
# Calculate variance of difference for dataframe then extract outliers and return both the variance and outliers
def variance_difference(dataframe):
return dataframe.apply(find_difference, axis=1).var()
# Output the information calculated for each dataframe
def output_information(dataframe, name):
mean_difference_data = mean_difference(dataframe)
standard_deviation_difference_data = standard_deviation_difference(dataframe)
variance_difference_data = variance_difference(dataframe)
average_time_taken_data = average_time_taken(dataframe)
standard_deviation_time_taken_data = standard_deviation_time_taken(dataframe)
variance_time_taken_data = variance_time_taken(dataframe)
def mean_difference_graphs(dataframe1, name1, dataframe2, name2, dataframe3, name3):
# Calculate the mean difference for each dataframe
mean_difference_data1 = mean_difference(dataframe1)
mean_difference_data2 = mean_difference(dataframe2)
mean_difference_data3 = mean_difference(dataframe3)
# Create a bar chart to compare the mean difference for each dataframe
plt.bar([name1, name2, name3], [mean_difference_data1, mean_difference_data2, mean_difference_data3])
plt.xlabel('Dataframe')
plt.ylabel('Mean Difference')
plt.title('Mean Difference for each Dataframe')
plt.show()
def standard_deviation_difference_graphs(dataframe1, name1, dataframe2, name2, dataframe3, name3):
# Calculate the standard deviation of the difference for each dataframe
standard_deviation_difference_data1 = standard_deviation_difference(dataframe1)
standard_deviation_difference_data2 = standard_deviation_difference(dataframe2)
standard_deviation_difference_data3 = standard_deviation_difference(dataframe3)
# Create a bar chart to compare the standard deviation of the difference for each dataframe
plt.bar([name1, name2, name3], [standard_deviation_difference_data1, standard_deviation_difference_data2, standard_deviation_difference_data3])
plt.xlabel('Dataframe')
plt.ylabel('Standard Deviation of Difference')
plt.title('Standard Deviation of Difference for each Dataframe')
plt.show()
def variance_difference_graphs(dataframe1, name1, dataframe2, name2, dataframe3, name3):
# Calculate the variance of the difference for each dataframe
variance_difference_data1 = variance_difference(dataframe1)
variance_difference_data2 = variance_difference(dataframe2)
variance_difference_data3 = variance_difference(dataframe3)
# Create a bar chart to compare the variance of the difference for each dataframe
plt.bar([name1, name2, name3], [variance_difference_data1, variance_difference_data2, variance_difference_data3])
plt.xlabel('Dataframe')
plt.ylabel('Variance of Difference')
plt.title('Variance of Difference for each Dataframe')
plt.show()
def average_time_taken_graphs(dataframe1, name1, dataframe2, name2, dataframe3, name3):
# Calculate the average time taken for each dataframe
average_time_taken_data1 = average_time_taken(dataframe1)
average_time_taken_data2 = average_time_taken(dataframe2)
average_time_taken_data3 = average_time_taken(dataframe3)
# Create a bar chart to compare the average time taken for each dataframe
plt.bar([name1, name2, name3], [average_time_taken_data1, average_time_taken_data2, average_time_taken_data3])
plt.xlabel('Dataframe')
plt.ylabel('Average Time Taken')
plt.title('Average Time Taken for each Dataframe')
plt.show()
def standard_deviation_time_taken_graphs(dataframe1, name1, dataframe2, name2, dataframe3, name3):
# Calculate the standard deviation of the time taken for each dataframe
standard_deviation_time_taken_data1 = standard_deviation_time_taken(dataframe1)
standard_deviation_time_taken_data2 = standard_deviation_time_taken(dataframe2)
standard_deviation_time_taken_data3 = standard_deviation_time_taken(dataframe3)
# Create a bar chart to compare the standard deviation of the time taken for each dataframe
plt.bar([name1, name2, name3], [standard_deviation_time_taken_data1, standard_deviation_time_taken_data2, standard_deviation_time_taken_data3])
plt.xlabel('Dataframe')
plt.ylabel('Standard Deviation of Time Taken')
plt.title('Standard Deviation of Time Taken for each Dataframe')
plt.show()
def variance_time_taken_graphs(dataframe1, name1, dataframe2, name2, dataframe3, name3):
# Calculate the variance of the time taken for each dataframe
variance_time_taken_data1 = variance_time_taken(dataframe1)
variance_time_taken_data2 = variance_time_taken(dataframe2)
variance_time_taken_data3 = variance_time_taken(dataframe3)
# Create a bar chart to compare the variance of the time taken for each dataframe
plt.bar([name1, name2, name3], [variance_time_taken_data1, variance_time_taken_data2, variance_time_taken_data3])
plt.xlabel('Dataframe')
plt.ylabel('Variance of Time Taken')
plt.title('Variance of Time Taken for each Dataframe')
plt.show()
if __name__ == '__main__':
# Load the dataframes from the CSV files
gemma_dataframe = load_dataframe(directory_for_data + 'gemma:7bsteer_suggestion2.1.csv')
mistral_dataframe = load_dataframe(directory_for_data + 'mistralsteer_suggestion2.1.csv')
Llama7b_dataframe = load_dataframe(directory_for_data + 'llava:7bsteer_suggestion2.1.csv')
# Output the information for each dataframe
mean_difference_graphs(gemma_dataframe, gemma_7b, mistral_dataframe, mistral, Llama7b_dataframe, llava_7b)
standard_deviation_difference_graphs(gemma_dataframe, gemma_7b, mistral_dataframe, mistral, Llama7b_dataframe, llava_7b)
variance_difference_graphs(gemma_dataframe, gemma_7b, mistral_dataframe, mistral, Llama7b_dataframe, llava_7b)
average_time_taken_graphs(gemma_dataframe, gemma_7b, mistral_dataframe, mistral, Llama7b_dataframe, llava_7b)
standard_deviation_time_taken_graphs(gemma_dataframe, gemma_7b, mistral_dataframe, mistral, Llama7b_dataframe, llava_7b)
variance_time_taken_graphs(gemma_dataframe, gemma_7b, mistral_dataframe, mistral, Llama7b_dataframe, llava_7b)