-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathget_score.py
More file actions
executable file
·91 lines (66 loc) · 2.54 KB
/
get_score.py
File metadata and controls
executable file
·91 lines (66 loc) · 2.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import json
import os
import time
import urllib
from urllib.parse import urlparse
from youtube_transcript_api import YouTubeTranscriptApi
from QA import load_qa_model
from Summarization import load_summ_model
from wordembedding import *
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
class Example:
def __init__(self, id, questions):
self.id = id
self.questions = questions
self.answers = list()
self.score = 0
self.summary = ''
self.script = self.load_script()
self.title, self.author = self.load_title()
def load_script(self):
file_path = f"survey_script/{id}.json"
if not os.path.exists(file_path):
script = self.download_script()
return script
with open(file_path, 'r') as f:
script = json.load(f)
return script
def download_script(self):
transcript = YouTubeTranscriptApi.get_transcripts([self.id], languages=['ko'])
transcript = transcript[0]
sub = transcript[self.id]
for x in sub:
x.pop('duration', None)
return sub
def load_title(self):
params = {"format": "json", "url": "https://www.youtube.com/watch?v=%s" % self.id}
url = "https://www.youtube.com/oembed"
query_string = urllib.parse.urlencode(params)
url = url + "?" + query_string
with urllib.request.urlopen(url) as response:
response_text = response.read()
data = json.loads(response_text.decode())
return [data['title'], data['author_name']]
def __str__(self):
return f"author: {self.author}\ntitle: {self.title}\nsub: {self.script}\nquestion: {self.questions}"
def load_models():
wm_model = load_wm_model()
qa_model, qa_tokenizer = load_qa_model()
summ_model = load_summ_model()
sc_model = load_sc_model()
return [wm_model, qa_model, qa_tokenizer, summ_model, sc_model]
if __name__ == "__main__":
wm_model, qa_model, qa_tokenizer, summ_model, sc_model = load_models()
ids = ['0iOspqjA83g', 'vrAH1jfj3bU', 'FisxKZHJU18', '4puc2Ox9_vc', 'oPJ7d3Yvh88']
examples = list(map(lambda x: Example(x, []), ids))
start = time.time()
for e in examples:
e.score = cosin_similar(e.title, e.script, sc_model)
e.summary = summary_script(e.script, summ_model)
print(f"running time: {time.time() - start}")
for e in examples:
print('=' * 10)
print(f'https://www.youtube.com/watch?v={e.id}')
print(e.title)
print(e.score)
print('summary: ', e.summary)