-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathword_source.py
More file actions
150 lines (128 loc) · 3.84 KB
/
word_source.py
File metadata and controls
150 lines (128 loc) · 3.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import random
from config import global_config as config
import nltk
from nltk.corpus import wordnet as wn
class WordsSource:
''' Interface defining required methods of a word source. '''
def sampleWords(n):
''' Sample `n` words from the word source. This sampling may be random
or biased in ways to improve content-distibution of words.
'''
raise NotImplementedError("Please Implement this method")
class WordsFile:
''' Wrapper on file containing newline separated list of words. '''
def __init__(self, filename):
self.filename = filename
def getAllWords(self):
''' Fetches all words in file as in-memory list. '''
words = []
with open(self.filename, 'r') as f:
for line in f:
words.append(line.strip().lower())
return words
class WordsFromFile(WordsSource):
''' Word source based off of file containing newline separated list of words. '''
def __init__(self):
# In-memory list of words in file.
self._words = None
# Wrapper around file containing word list.
self._wordsFile = WordsFile(config.WORDS_FILE)
def getAllWords(self):
''' Lazily evaluated/cached method that returns stored list of all words in file. '''
if self._words is None:
self._words = self._wordsFile.getAllWords()
return self._words
def refreshWords(self):
''' Force-refreshes in-memory list of words. '''
self._words = self._wordsFile.getAllWords()
def sampleWords(self,n):
''' Samples words uniformly from in-memory word list (of file contents). '''
words = self.getAllWords()
return random.sample(words, n)
class NounsFromNLTK(WordsSource):
''' Word source based off WordNet english word synset index. '''
def __init__(self):
self._words = None
def getAllWords(self):
''' Lazily evaluated/cached method that returns stored list of all single-word nouns in WordNet. '''
if self._words is None:
_words = self.getSingleWordNouns()
return _words
def getSingleWordNouns(self):
''' Helper method of `getAllWords()` that fetches all single-word nouns in WordNet. '''
nltk.download('wordnet')
nouns = set()
for synset in wn.all_synsets(wn.NOUN):
lemma_names = synset.lemma_names()
for name in lemma_names:
try:
ascii_name = name.encode('ascii')
if ascii_name.count('_') > 0:
continue
else:
nouns.add(ascii_name)
except UnicodeEncodeError:
continue
return list(nouns)
def refreshWords(self):
''' Force-refreshes in-memory list of words. '''
self._words = self.getSingleWordNouns()
def sampleWords(self,n):
''' Samples words uniformly from in-memory word list (of single-word nouns). '''
words = self.getAllWords()
return random.sample(words, n)
# For global access
WordsInMemory = WordsFromFile()
"""
Possible advanced game generation technique (for future):
# Take input list of words from file
# Use concept.io to build graph (in-memory)
# word relations, where words are nodes, and
# relations are edges
# Sample graph to find superset (of independent set of size k)
# which is of requisite size n.
"""
# Utility functions
def reservoirSample(iterator, K):
''' Implements reservoir sampling for performing random sampling from a stream of items.
Currently unused, may be used in the future.
'''
result = []
N = 0
for item in iterator:
N += 1
if len( result ) < K:
result.append( item )
else:
s = int(random.random() * N)
if s < K:
result[ s ] = item
return result
# Default values
DEFAULT_WORDS = [
"Europe",
"Cat",
"Bermuda",
"Jupiter",
"Dance",
"Pupil",
"Mail",
"Fair",
"Germany",
"Forest",
"Thumb",
"Press",
"Snow",
"Day",
"Washington",
"Fly",
"Head",
"Dog",
"Iron",
"Train",
"Beat",
"Nail",
"Charge",
"Bell",
"Alps"
]