-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgeneration_result_add_intentions.py
More file actions
105 lines (79 loc) · 3.32 KB
/
generation_result_add_intentions.py
File metadata and controls
105 lines (79 loc) · 3.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import pandas as pd
import os
import random
import time
import json
import pickle
import numpy as np
# from sentence_transformers import SentenceTransformer, util
from tqdm import tqdm
def get_cost(generation_dict, engine_name):
if engine_name == "gpt-35-turbo":
cost = get_cost_gpt3_5(generation_dict)
elif engine_name == "gpt-4":
cost = get_cost_gpt4(generation_dict)
return cost
def get_cost_gpt3_5(generation_dict):
prompt_tokens = generation_dict['Prompt_tokens']
completion_tokens = generation_dict['Completion_tokens']
cost = (0.0015*prompt_tokens + 0.002*completion_tokens) / 1000
return cost
def get_cost_gpt4(generation_dict):
prompt_tokens = generation_dict['Prompt_tokens']
completion_tokens = generation_dict['Completion_tokens']
cost = (0.03*prompt_tokens + 0.06*completion_tokens) / 1000
return cost
def get_intentions(generation_dict):
answer = generation_dict['Answer']
answer = answer.strip().split('\n')
intentions = []
for line in answer:
if line[:9].lower() == 'intention':
try:
intention = line.strip().split(':')[1].strip()
except:
print(line)
continue
intentions.append(intention)
return intentions
def analyze_cost(file_name, engine_name):
with open(file_name, 'r') as f:
generation_list = f.readlines()
cost_list = []
for generation in generation_list:
generation_dict = json.loads(generation)
cost = get_cost(generation_dict, engine_name)
cost_list.append(cost)
average_cost = np.mean(cost_list)
total_cost = np.sum(cost_list)
num_sessions = len(cost_list)
return average_cost, total_cost, num_sessions
def add_intention_to_dict(generation_dict):
intentions = get_intentions(generation_dict)
generation_dict['Intentions'] = intentions
return generation_dict
def add_intention_to_file(file_name):
with open(file_name, 'r') as f:
generation_list = f.readlines()
new_generation_list = []
for generation in generation_list:
generation_dict = json.loads(generation)
generation_dict = add_intention_to_dict(generation_dict)
new_generation_list.append(generation_dict)
new_file_name = file_name.split('.')[0] + '_intentions.json'
with open(new_file_name, 'w') as f:
for generation_dict in new_generation_list:
generation_string = json.dumps(generation_dict, ensure_ascii=False)
f.write(generation_string + '\n')
if __name__ == '__main__':
# for model_name in ["gpt-35-turbo", "gpt-4"]:
for model_name in ["gpt-35-turbo"]:
for split in range(10,12):
str_split = str(split)
generation_result_path = f'data_preprocess/generation_results/{model_name}_answer_{str_split}.json'
# average_cost, total_cost, num_sessions = analyze_cost(generation_result_path, model_name)
# print(f"Average cost for {model_name}: {average_cost}")
# print(f"Total cost for {model_name}: {total_cost}")
# print(f"Number of sessions for {model_name}: {num_sessions}")
add_intention_to_file(generation_result_path)
print(f"Added intentions to {model_name} split {str_split}.")