-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path2.responses_to_table.py
More file actions
110 lines (101 loc) · 4.23 KB
/
2.responses_to_table.py
File metadata and controls
110 lines (101 loc) · 4.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# !/usr/bin/env python3
"""
Iteratives Textverarbeitungs-Skript für ChatGPT API
Dabei werden die Angaben zu den Marken in den Antworten der LLMs extrahiert, durch weitere Recherchen ergänzt und
im Tabellenformat ausgegeben.
"""
import os
import re
from datetime import datetime
import pandas as pd
import requests
import json
from ki_functions import *
# API-Key setzen
os.environ["OPENAI_API_KEY"] = ChatGPT_key
# OpenAI-Client initialisieren (ohne Argumente!)
client = OpenAI()
llm_model = "gpt-5.4"
#llm_model = "sonar"
responses_synthesis_filename = "prompt_responses_synthesis" + ".txt"
file_path = r"C:\Users\andre\OneDrive\Desktop\KI-Performance Versicherungen 2026"
########################################################################################################################
#Dependencies
# pip install openai
#pip install openpyxl
#pip install tabulate
########################################################################################################################
if __name__ == '__main__':
os.chdir(file_path)
with open(responses_synthesis_filename, "r", encoding="utf-8") as f:
response_synthesis = f.read()
if '\n' in response_synthesis:
response_synthesis = response_synthesis.replace('\n',' ')
final_table = []
os.chdir('./responses')
file_list = sorted([f for f in os.listdir() if '.txt' in f and 'full_responses' in f])
start_at = 0
for n, source_file_filename in enumerate(file_list):
print(source_file_filename)
if n < start_at:
continue
model_name = source_file_filename.replace('full_responses','').replace('.txt','').replace('_','')
# Quellendatei mit den Responses im Textformat
with open(source_file_filename, "r", encoding="utf-8") as f:
source_file = f.read()
patterns = re.findall(r'(?:[1-9]|[1-4]\d|50):\n', source_file)
responses_list = re.split(r'(?:[1-9]|[1-4]\d|50):\n', source_file)
if not len(responses_list) == 51:
print(f'Abweichende Anzahl: {len(responses_list)}')
print(patterns)
continue
# for n, l in enumerate(responses_list):
# print(n,l)
# if n > 3:
# break
for ID, response in enumerate(responses_list):
if ID <= 1:
continue
if len(response) <= 3:
continue
if str(ID) not in patterns[ID-1]:
break
if str(response).count('\n') >= 1:
response_s = response.rsplit('\n',1)[0]
if len(response) - len(response_s) < 4:
response = response_s
full_prompt = response_synthesis + "\n" + response
print(f"{ID}: {response}")
table_format = gpt_chat(client, llm_model, full_prompt)
# table_format = perplexity_chat(llm_model, full_prompt)
for line in table_format.split('\n'):
if not str(line[0]).isdigit():
continue
row = line.split(';')
if len(row) != 7:
print(f'Abweichende Spalten: {len(row)}')
print(len(row), row)
if len(row) < 7:
if 'http' in row[3]:
row.insert(4, '')
final_table.append([ID]+row)
new_table = []
for row in final_table:
# print(len(row), row)
if len(row) >= 8:
overhang = [str(e).strip() for e in row[7:] if len(str(e).strip()) > 4]
row = row[:7] + ['; '.join(overhang)]
# print(len(row), row)
if len(row) < 8:
if 'http' in row[3]:
row.insert(4, '')
else:
row.insert(-1, '')
# print(len(row), row)
new_table.append(row)
header = ['Anfrage', 'Rang', 'Firma', 'Marke', 'Website', 'Produkt', 'Quellen',
'Wörtliche Beschreibung der Marke im Chat']
df_responses = pd.DataFrame(new_table,columns=header)
dt_str_now = datetime.now().strftime("%Y-%m-%d_%H_%M_%S")
filename = model_name + '_responses_table_' + dt_str_now + '.xlsx'
df_responses.to_excel(filename)