-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerate.py
More file actions
142 lines (122 loc) · 6.55 KB
/
generate.py
File metadata and controls
142 lines (122 loc) · 6.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import os
import json
import yaml
import random
from openai import OpenAI
import argparse
api_key = "API_KEY" # replace with your API key
client = OpenAI(api_key=api_key)
def llm(messages, **kwargs) -> str:
# print(f"[Debug Info] messages = {messages}")
response = client.chat.completions.create(
model="gpt-4-1106-preview",
# model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are an agent and you will complete a task."},
{"role": "user", "content": f"{messages}"}
]
)
content = response.choices[0].message.content
return content
def double_llm(message1, message2, message3, **kwargs) -> str:
# print(f"[Debug Info] messages = {messages}")
response = client.chat.completions.create(
model="gpt-4-1106-preview",
# model="gpt-3.5",
messages=[
{"role": "system", "content": "You are an agent and you will complete a task."},
{"role": "user", "content": f"{message1}"},
{"role": "assistant", "content": f"{message2}"},
{"role": "user", "content": f"{message3}"}
]
)
content = response.choices[0].message.content
return content
def load_denoised_data(filename: str):
with open(f"./dataset/GPT_denoised/{filename}.txt", "r") as f:
content = f.read()
return content
def vanilla_generation_prompt(keyword: str):
pre_prompt = "You are an expert in writing Encyclopedia documents. "
pre_prompt += "I will give you some materials for your reference, based on which you should generate an Encyclopedia document. "
pre_prompt += "Your generated Encyclopedia document should be structured as a real Encyclopedia document. "
pre_prompt += "You should only output your generated Encyclopedia document, without other sentences. "
pre_prompt += f"\n \n The key word is: {keyword}. "
pre_prompt += "Here is the original materials, for your reference: \n \n"
post_prompt = "\n \n Now it's your turn. You should only output your generated Encyclopedia document, without other sentences."
return pre_prompt, post_prompt
def CoT(keyword):
CoT_prompt = "Let's think step by step. First, please answer these questions: \n \n "
CoT_prompt += "1. What is a common structure of an Encyclopedia document? \n"
CoT_prompt += f"2. What do you know about the key word, {keyword}? \n"
CoT_prompt += "3. Please give a sentence-level outline of your Encyclopedia document, describing what you would convey in each part. \n"
return CoT_prompt
def ICL(example_keyword, example_ground_truth):
ICL_prompt = f"Here is an example Encyclopedia document, whose key word is {example_keyword}: \n"
ICL_prompt += f"\n {example_ground_truth} \n \n"
ICL_prompt += "Now it's your turn. Here is the original materials, for your reference: \n \n"
return ICL_prompt
def pre_define():
pre_defined_prompt = "\n A common Encyclopedia document contains the following parts: \n \n"
pre_defined_prompt += "1. Introduction \n"
pre_defined_prompt += "2. History or Biography \n"
pre_defined_prompt += "3. Detailed Explanations \n"
pre_defined_prompt += "4. Debates or Supporting Evidences over this topic \n"
pre_defined_prompt += "5. Broad Impact or Applications \n"
pre_defined_prompt += "6. References \n \n "
pre_defined_prompt += "You can generate your Encyclopedia document following this structure. \n \n"
pre_defined_prompt += "Here is the original materials, for your reference: \n \n"
return pre_defined_prompt
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--file", default="orange", type=str)
parser.add_argument("--method", default=1, type=int) # 1: vanilla baseline; 2: CoT; 3: ICL; 4: pre-defined template outline
args = parser.parse_args()
method = args.method
# load in example ground truth for ICL
with open(f"./dataset/ground_truth/bottle_gt.txt", "r") as f:
example_ground_truth = f.read()
example_keyword = "bottle"
files = os.listdir("./dataset/rank_top_3/")
ICL_prompt = ICL(example_keyword, example_ground_truth)
pre_defined_prompt = pre_define()
original_content = ""
for keyword in ["GreatWall", "luxun", "MoM", "montezuma", "RLHF", "whale"]:
print(f"[Debug Info] keyword == {keyword}")
for original_file in files:
if keyword in original_file:
original_content += load_denoised_data(original_file[:-4])[:2000] # truncated to first 2000 characters
original_content += "\n \n"
pre_prompt, post_prompt = vanilla_generation_prompt(keyword)
CoT_prompt = CoT(keyword)
for method in [1, 2, 3, 4]:
print(f"[Debug Info] method == {method}")
if method == 1:
response = llm(pre_prompt + original_content + post_prompt)
elif method == 2:
intermediate = llm(pre_prompt[:-56] + CoT_prompt)
response = double_llm(pre_prompt[:-56] + CoT_prompt, intermediate,
"Based on your answers above, please generate. Here is the original materials, for your reference: \n \n" + original_content + post_prompt)
elif method == 3:
response = llm(pre_prompt[:-56] + ICL_prompt + original_content + post_prompt)
elif method == 4:
response = llm(pre_prompt[:-56] + pre_defined_prompt + original_content + post_prompt)
with open(f"./generated_truncated/{method}/{keyword}.txt", "w") as f:
f.write(response)
# original_content = load_denoised_data(args.file)
# pre_prompt, post_prompt = vanilla_generation_prompt(args.file)
# CoT_prompt = CoT(args.file)
# ICL_prompt = ICL(example_keyword, example_ground_truth)
# pre_defined_prompt = pre_define()
# if method == 1:
# response = llm(pre_prompt + original_content + post_prompt)
# elif method == 2:
# intermediate = llm(pre_prompt[:-56] + CoT_prompt)
# response = double_llm(pre_prompt[:-56] + CoT_prompt, intermediate,
# "Based on your answers above, please generate. Here is the original materials, for your reference: \n \n" + original_content + post_prompt)
# elif method == 3:
# response = llm(pre_prompt[:-56] + ICL_prompt + original_content + post_prompt)
# elif method == 4:
# response = llm(pre_prompt[:-56] + pre_defined_prompt + original_content + post_prompt)
# with open(f"./generated/{method}/{args.file}.txt", "w") as f:
# f.write(response)