forked from MarkScrivo/MultiDocumentAnswering
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbuild_index.py
More file actions
30 lines (22 loc) · 874 Bytes
/
build_index.py
File metadata and controls
30 lines (22 loc) · 874 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import openai
import json
import textwrap
def open_file(filepath):
with open(filepath, 'r', encoding='utf-8') as infile:
return infile.read()
openai.api_key = open_file('openaiapikey.txt')
def gpt3_embedding(content, engine='text-similarity-ada-001'):
response = openai.Embedding.create(input=content,engine=engine)
vector = response['data'][0]['embedding'] # this is a normal list
return vector
if __name__ == '__main__':
alltext = open_file('input.txt')
chunks = textwrap.wrap(alltext, 4000)
result = list()
for chunk in chunks:
embedding = gpt3_embedding(chunk.encode(encoding='ASCII',errors='ignore').decode())
info = {'content': chunk, 'vector': embedding}
print(info, '\n\n\n')
result.append(info)
with open('index.json', 'w') as outfile:
json.dump(result, outfile, indent=2)