RelationalIntentionGraph/generation_result_add_intentions.py at main · HKUST-KnowComp/RelationalIntentionGraph · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import pandas as pd
import os
import random
import time
import json
import pickle
import numpy as np
# from sentence_transformers import SentenceTransformer, util

from tqdm import tqdm

def get_cost(generation_dict, engine_name):
    if engine_name == "gpt-35-turbo":
        cost = get_cost_gpt3_5(generation_dict)
    elif engine_name == "gpt-4":
        cost = get_cost_gpt4(generation_dict)

    return cost

def get_cost_gpt3_5(generation_dict):
    prompt_tokens = generation_dict['Prompt_tokens']
    completion_tokens = generation_dict['Completion_tokens']
    cost = (0.0015*prompt_tokens + 0.002*completion_tokens) / 1000
    return cost

def get_cost_gpt4(generation_dict):
    prompt_tokens = generation_dict['Prompt_tokens']
    completion_tokens = generation_dict['Completion_tokens']
    cost = (0.03*prompt_tokens + 0.06*completion_tokens) / 1000
    return cost

def get_intentions(generation_dict):
    answer = generation_dict['Answer']
    answer = answer.strip().split('\n')
    intentions = []
    for line in answer:
        if line[:9].lower() == 'intention':
            try:
                intention = line.strip().split(':')[1].strip()
            except:
                print(line)
                continue
            intentions.append(intention)

    return intentions

def analyze_cost(file_name, engine_name):
    with open(file_name, 'r') as f:
        generation_list = f.readlines()

    cost_list = []
    for generation in generation_list:
        generation_dict = json.loads(generation)
        cost = get_cost(generation_dict, engine_name)
        cost_list.append(cost)

    average_cost = np.mean(cost_list)
    total_cost = np.sum(cost_list)
    num_sessions = len(cost_list)

    return average_cost, total_cost, num_sessions

def add_intention_to_dict(generation_dict):
    intentions = get_intentions(generation_dict)
    generation_dict['Intentions'] = intentions

    return generation_dict

def add_intention_to_file(file_name):
    with open(file_name, 'r') as f:
        generation_list = f.readlines()

    new_generation_list = []
    for generation in generation_list:
        generation_dict = json.loads(generation)
        generation_dict = add_intention_to_dict(generation_dict)
        new_generation_list.append(generation_dict)

    new_file_name = file_name.split('.')[0] + '_intentions.json'
    with open(new_file_name, 'w') as f:
        for generation_dict in new_generation_list:
            generation_string = json.dumps(generation_dict, ensure_ascii=False)
            f.write(generation_string + '\n')


if __name__ == '__main__':
    # for model_name in ["gpt-35-turbo", "gpt-4"]:
    for model_name in ["gpt-35-turbo"]:
        for split in range(10,12):
            str_split = str(split)

            generation_result_path = f'data_preprocess/generation_results/{model_name}_answer_{str_split}.json'

            # average_cost, total_cost, num_sessions = analyze_cost(generation_result_path, model_name)
            # print(f"Average cost for {model_name}: {average_cost}")
            # print(f"Total cost for {model_name}: {total_cost}")
            # print(f"Number of sessions for {model_name}: {num_sessions}")

            add_intention_to_file(generation_result_path)
            print(f"Added intentions to {model_name} split {str_split}.")