-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathki_functions.py
More file actions
91 lines (81 loc) · 3.58 KB
/
ki_functions.py
File metadata and controls
91 lines (81 loc) · 3.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from api_keys import ChatGPT_key, Perplexity_key
from openai import OpenAI, RateLimitError
def gpt_chat(client, llm_model, prompt):
try:
response = client.chat.completions.create(
model=llm_model,
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content.strip()
except RateLimitError as e:
return f"Rate Limit überschritten: {e}"
def perplexity_chat(llm_model, prompt):
Perplexity_API_URL = "https://api.perplexity.ai/chat/completions"
headers = {
"Authorization": f"Bearer {Perplexity_key}",
"Content-Type": "application/json",
}
data = {
"model": llm_model,
"messages": [
{"role": "user", "content": prompt}
]
}
resp = requests.post(Perplexity_API_URL, headers=headers, data=json.dumps(data))
resp.raise_for_status() # wirft Fehler bei HTTP-Problem
body = resp.json()
# einfache Text-Ausgabe aus der ersten Choice
return body["choices"][0]["message"]["content"]
def create_table(response):
for line in response.split('\n'):
line = line.replace('"','')
if not line[0].isdigit():
continue
first_elements = line.split(',')[:6]
description = [','.join(line.split(',')[6:])]
full_row = first_elements + description
print(full_row)
# Extract text from elements
def extract_text(element):
if element:
if not isinstance(element,(str,int,float)):
element = element.text.strip()
element = str(element)
if element == '':
return element
elif len(element) >= 1:
repl_element = element.replace('\u200b','').replace('\xa0', ' ').replace('\n',' ')
new_element = re.sub(r'\s+', ' ', repl_element).strip()
return new_element
else:
return element
return None
def brand_variations(brand):
bv0 = brand.replace('-','')
bv1 = brand.replace('’','')
bv2 = brand.replace('`','')
bv3 = brand.replace("'","")
bv4 = brand.replace(' /','')
bv5 = brand.replace('®','')
bv6 = brand.replace('è','e').replace('é','e').replace('ö','oe')
bv7 = brand.lower()
bv8 = brand.upper()
bv9 = brand.title()
brand_var_list = [brand, bv0, bv1, bv2, bv3, bv4, bv5, bv6, bv7, bv8, bv9]
brand_variations = list(set(brand_var_list))
branch_exclude = ['kaffee', 'coffee', 'cafe', 'caffè','caffé', 'caffe', 'kaffeerösterei', 'roast', 'roasters', 'roaster']
brand_variations_list = [e for e in brand_variations if not e.lower() in branch_exclude and len(e) > 3]
return brand_variations_list
def get_company_keywords(company):
comp_l1 = company.replace('-', '').replace('_', ' ').replace('.', '').replace('’','').replace("'","").split()
comp_l2 = company.replace('-', '').replace('_', ' ').replace('.', '').split()
comp_l3 = company.lower().replace('ä', 'ae').replace('ö', 'oe').replace('ü', 'ue').split()
comp_l4 = company.split()
comp_l = list(set(comp_l1 + comp_l2 + comp_l3 + comp_l4))
comp_keywords_f = [str(e).lower() for e in comp_l if len(str(e).lower()) >= 3]
appendix = ['gmbh', 'mbh', 'inc', 'limited', 'ltd', 'llc', 'co.', 'lda', 'a.s.', 'S.A.', ' OG', ' AG', ' SE',
'GmbH & Co. KG', 'GmbH', 'B.V.', 'KG', 'LLC', 'NV', 'N.V.',
'& Co.', 'S.L.U.', '(', ')', '.de', '.com', '.at', 'oHG', 'Ltd.', 'Limited',
'Kaffee','kaffee']
comp_keywords = list(set([e for e in comp_keywords_f if not any(a in e for a in appendix) and len(e) > 3] + [company]))
return comp_keywords