-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcue_word.py
More file actions
70 lines (67 loc) · 2.14 KB
/
cue_word.py
File metadata and controls
70 lines (67 loc) · 2.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import nltk
import re
import math
import numpy
import copy
import json
from nltk.tokenize.texttiling import TextTilingTokenizer
from textblob import TextBlob
from nltk.corpus import wordnet
def read_cue_words(fname):
words = []
with open(fname) as f:
content = f.readlines()
for line in content:
words.append(line.split()[0])
f.close()
return set(words)
def filter_cue_words(hp, tokseqs):
topic_begin = read_cue_words('results/chi2_cuewords')
topic_end = read_cue_words('results/chi2_cuewords')
# print (topic_begin, topic_end)
num = 0
new_hp = []
for dt in hp:
has_end_cue = False
has_begin_cue = False
# print (len(tokseqs[dt[1]].wrdindex_list))
for wi in tokseqs[dt[1]].wrdindex_list:
if wi[0] in topic_end:
new_hp.append(dt)
has_end_cue = True
break
if not has_end_cue:
for wi in tokseqs[dt[1]+1].wrdindex_list:
if wi[0] in topic_begin:
new_hp.append(dt)
has_begin_cue = True
break
if has_begin_cue or has_end_cue:
num += 1
# print (num, len(hp), hp)
return new_hp
def update_hp_by_cue_words(hp, tokseqs, cue_percent):
topic_begin = read_cue_words('results/chi2_cuewords')
topic_end = read_cue_words('results/chi2_cuewords')
# print (topic_begin, topic_end)
num = 0
new_hp = []
for dt in hp:
has_end_cue = False
has_begin_cue = False
# print (len(tokseqs[dt[1]].wrdindex_list))
for wi in tokseqs[dt[1]].wrdindex_list:
if wi[0] in topic_end:
new_hp.append(dt)
has_end_cue = True
break
if not has_end_cue:
for wi in tokseqs[dt[1]+1].wrdindex_list:
if wi[0] in topic_begin:
new_hp.append(dt)
has_begin_cue = True
break
if not has_begin_cue and not has_end_cue:
new_hp.append((dt[0] * (1 - cue_percent), dt[1]))
# print (len(new_hp), len(hp), hp, new_hp)
return new_hp