Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 61 additions & 22 deletions corpustools/corpus/classes/lexicon.py
Original file line number Diff line number Diff line change
Expand Up @@ -845,8 +845,9 @@ class EnvironmentFilter(object):
Environments are strings of the form "[+feature,-feature]_[+feature]"
or "[+feature]_" or "a_b" or "_b"
"""
def __init__(self, corpus, env):

def __init__(self, corpus, env, long_distance = False, segment_set = None):
self.segment_set = segment_set
self.long_distance = long_distance
#there's a problem where some feature names have underscores in them
#so doing lhs,rhs=env.split('_') causes unpacking problems
#this in an awakward work-around that checks to see if either side of
Expand Down Expand Up @@ -897,8 +898,50 @@ def __init__(self, corpus, env):
self.rhs_string = rhs
self.rhs = [rhs]

def apply(self, sequence):
results = list()
for i, s in enumerate(sequence):
if not s in self.segment_set: #Doesn't match
continue
if not self.long_distance:
#FIXME! Transcription.get_env should just return an Environment
env = Environment(*sequence.get_env(i)) #Basic local environment check
if env not in self:
continue
else:
if self.lhs and self.lhs != '#':
if i == 0: #First elements
continue
for j in range(i-1, -1,-1):
if sequence[j] in self.lhs:
break
else:
continue
if self.rhs and self.rhs != '#':
if i == len(sequence) - 1:
continue
for j in range(i+1, len(sequence)):
if sequence[j] in self.rhs:
break
else:
continue
results.append((s, self))
return results

def __str__(self):
return '_'.join([self.lhs_string,self.rhs_string])
if not self.long_distance:
to_join = [self.lhs_string,self.rhs_string]
else:
to_join = list()
if self.lhs:
to_join.append(self.lhs_string+'*')
else:
to_join.append('')
if self.rhs:
to_join.append('*'+self.rhs_string)
else:
to_join.append('')
return '_'.join(to_join)

def __eq__(self, other):
if not hasattr(other,'lhs'):
Expand All @@ -915,14 +958,15 @@ def __hash__(self):
return hash((self.rhs_string, self.lhs_string))

def __contains__(self, item):
if not isinstance(item, Environment):
if isinstance(item, Environment):
if self.rhs:
if item.rhs not in self.rhs:
return False
if self.lhs:
if item.lhs not in self.lhs:
return False
else:
return False
if self.rhs:
if item.rhs not in self.rhs:
return False
if self.lhs:
if item.lhs not in self.lhs:
return False
return True

class Attribute(object):
Expand Down Expand Up @@ -1625,6 +1669,7 @@ def check_coverage(self):
return [x for x in self._inventory.keys() if x not in self.specifier]

def phonological_search(self,seg_list,envs=None, sequence_type = 'transcription',
long_distance = False,
call_back = None, stop_check = None):
"""
Perform a search of a corpus for segments, with the option of only
Expand Down Expand Up @@ -1665,7 +1710,9 @@ def phonological_search(self,seg_list,envs=None, sequence_type = 'transcription'
call_back(0,len(self))
cur = 0
if envs is not None:
envs = [EnvironmentFilter(self, env) for env in envs]
envs = [EnvironmentFilter(self, env, long_distance, seg_list) for env in envs]
else:
envs = [EnvironmentFilter(self, '#_#', True, seg_list)]
results = list()
for word in self:
if stop_check is not None and stop_check():
Expand All @@ -1675,17 +1722,9 @@ def phonological_search(self,seg_list,envs=None, sequence_type = 'transcription'
if cur % 20 == 0:
call_back(cur)
founds = list()
for pos,seg in enumerate(getattr(word, sequence_type)):
if not seg in seg_list:
continue
if envs is None:
founds.append((seg,''))
continue
word_env = word.get_env(pos, sequence_type)
for env in envs:
if word_env in env:
founds.append((seg,env))
break
tier = getattr(word, sequence_type)
for env in envs:
founds.extend(env.apply(tier))
if founds:
results.append((word, founds))
return results
Expand Down
14 changes: 9 additions & 5 deletions corpustools/gui/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def data(self, index, role=None):
else:
data = 'No'
elif isinstance(data,list):
data = ', '.join(data)
data = ', '.join(map(str,data))
else:
data = str(data)
except IndexError:
Expand Down Expand Up @@ -453,18 +453,22 @@ def __init__(self, header, summary_header, results, settings, parent=None):
self.summary_header = summary_header
self.columns = self.header

self.rows = results
self.allData = self.rows
self.rows = [x[1] for x in results] #Tuples of feature specification, result_line)
self.allData = results
self.summarized = False

def _summarize(self):
typefreq = defaultdict(float)
tokenfreq = defaultdict(float)
for line in self.allData:
features, line = line
segs = line[2]
envs = line[3]
for i,seg in enumerate(segs):
segenv = seg,envs[i]
if features is None:
segenv = seg,envs[i]
else:
segenv = features,envs[i]
typefreq[segenv] += 1
tokenfreq[segenv] += line[0].frequency

Expand All @@ -481,7 +485,7 @@ def setSummarized(self, b):
if self.summarized:
self._summarize()
else:
self.rows = self.allData
self.rows = [x[1] for x in self.allData]
self.columns = self.header
self.layoutChanged.emit()

Expand Down
23 changes: 19 additions & 4 deletions corpustools/gui/psgui.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,21 @@ def __init__(self, parent, corpus, showToolTips):
pslayout.addWidget(self.envWidget)


optionLayout = QVBoxLayout()
optionLayout = QFormLayout()

self.tierWidget = TierWidget(corpus,include_spelling=False)

optionLayout.addWidget(self.tierWidget)
optionLayout.addRow(self.tierWidget)

self.longDistanceCheck = QCheckBox()

optionLayout.addRow('Allow for intervening segments', self.longDistanceCheck)

self.forceSegmentsCheck = QCheckBox()

self.forceSegmentsCheck.setChecked(True)

optionLayout.addRow('Force summary by segments', self.forceSegmentsCheck)

optionFrame = QGroupBox('Options')

Expand Down Expand Up @@ -120,12 +130,17 @@ def generateKwargs(self):
"Missing information", "Please specify at least one {}.".format(targetType[:-1].lower()))
return
if targetType == 'Features':
self.features = targetList
targetList = targetList[1:-1]
kwargs['seg_list'] = self.corpus.features_to_segments(targetList)
else:
self.features = None
kwargs['seg_list'] = targetList
if self.forceSegmentsCheck.isChecked():
self.features = None
kwargs['corpus'] = self.corpus
kwargs['sequence_type'] = self.tierWidget.value()
kwargs['long_distance'] = self.longDistanceCheck.isChecked()
envs = self.envWidget.value()
if len(envs) > 0:
kwargs['envs'] = envs
Expand All @@ -152,5 +167,5 @@ def setResults(self,results):
envs = [str(x[1]) for x in f]
except IndexError:
envs = []
self.results.append([w, str(getattr(w,self.tierWidget.value())),segs,
envs])
self.results.append((self.features, [w, str(getattr(w,self.tierWidget.value())),segs,
envs]))
3 changes: 3 additions & 0 deletions corpustools/gui/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -834,8 +834,11 @@ def redo(self):
if self.dialog.update:
self.table.model().addRows(self.dialog.results)
else:

dataModel = PhonoSearchResultsModel(self.dialog.header,
self.dialog.summary_header,
self.dialog.results, self._parent.settings)
dataModel.setSummarized(self.summarized)
self.table.setModel(dataModel)
self.raise_()
self.activateWindow()
15 changes: 12 additions & 3 deletions tests/test_lexicon.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,9 +408,18 @@ def test_contains(self):
self.assertFalse(env2 in envfilt)

envfilt = EnvironmentFilter(self.corpus,'[+feature1]_[+feature1]')
self.assertTrue(env1 in envfilt)
self.assertFalse(env2 in envfilt)
self.assertFalse(env3 in envfilt)
assert(env1 in envfilt)
assert(env2 not in envfilt)
assert(env3 not in envfilt)

def test_apply(self):
envfilt = EnvironmentFilter(self.corpus,'[-feature1]_[+feature1]',False,['a'])
c = self.corpus['c']
assert(envfilt.apply(c.transcription) == [('a',envfilt)])

envfilt = EnvironmentFilter(self.corpus,'[-feature1]_[+feature1]',True,['a'])
c = self.corpus['c']
assert(envfilt.apply(c.transcription) == [('a',envfilt)])


def test_categories_spe(specified_test_corpus):
Expand Down