-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathtelegram_text_extractor.py
More file actions
45 lines (32 loc) · 1.78 KB
/
telegram_text_extractor.py
File metadata and controls
45 lines (32 loc) · 1.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import json
def input_choice(choices, prompt, allow_blank=False):
print('Available chocies: ' + ', '.join(choices))
choice = input(prompt)
if choice in choices or (allow_blank and choice == ''):
return choice
else:
raise ValueError('This choice is not available: ' + choice)
def extract_text(node):
if isinstance(node, str):
return node
elif isinstance(node, dict):
if 'text' in node:
return extract_text(node['text'])
else:
raise ValueError('Encountered dict without text field while extracting text: ' + str(node))
elif isinstance(node, list):
return ''.join(map(extract_text, node))
else:
raise ValueError('Encountered unexpected node extracting text: ' + str(node))
with open('result.json', encoding='utf8') as input_file:
telegram_export = json.load(input_file)
available_chat_names = [chat['name'] for chat in telegram_export['chats']['list'] if ('name' in chat) and chat['name'] != None]
requested_chat_name = input_choice(available_chat_names, 'Enter requested chat name: ')
requested_chat = next(chat for chat in telegram_export['chats']['list'] if ('name' in chat) and chat['name'] == requested_chat_name)
print()
available_author_names = set([message['from'] for message in requested_chat['messages'] if ('from' in message)])
requested_author_name = input_choice(available_author_names, 'Only export this person\'s messages (leave blank to export everything): ', allow_blank=True)
extracted_text = '\n\n'.join([extract_text(message) for message in requested_chat['messages']
if ('from' in message) and (requested_author_name == '' or message['from'] == requested_author_name)])
with open('output.txt', 'w', encoding='utf8') as output_file:
output_file.write(extracted_text)