-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathkwic2.py
More file actions
executable file
·130 lines (101 loc) · 4.61 KB
/
kwic2.py
File metadata and controls
executable file
·130 lines (101 loc) · 4.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/usr/bin/env python3
import fileinput
import sys
excludedWords = []
lines2BIndexed = []
def readInFile():
'''This iputs the file into the exclusion word array and
to be indexed array.'''
caseNumber = 0
for line in fileinput.input():
# print(line)
# print(caseNumber)
if line.rstrip() =="::":
caseNumber+=1
else :
if caseNumber==1:
#Fill the list with excluded words
excludedWords.append(line.rstrip().upper())
if caseNumber==2:
#Fill the list of lines to be indexed
lines2BIndexed.append(line.rstrip().split())
# Returns -1 if word one is earlier in alphabet or line than word2
def compareTo(line1, word1, line2, word2):
'''Compares two words first by lexographical order, then by line number, then by position in a line'''
firstWord = lines2BIndexed[line1][word1]
secondWord = lines2BIndexed[line2][word2]
returnInt = 0
if firstWord.upper() < secondWord.upper():
returnInt = -1
elif firstWord.upper() > secondWord.upper():
returnInt = +1
elif line1 < line2:
returnInt = -1
elif line1 > line2:
returnInt = +1
elif word1 < word2:
returnInt = -1
elif word1 > word2:
returnInt = +1
return returnInt
def getLowestNonIndexedWord(lineIndexPrevWord, wordIndexPrevWord):
'''Return the next non indexed word that isn't excluded'''
# Return None if there is no next word
indexLowestword = [None,None]
for lineIndex, line in enumerate(lines2BIndexed):
for wordIndex, word in enumerate(line):
if word.upper() not in excludedWords :
# If we are reading this in the first time
if (indexLowestword[0] == None or (compareTo(lineIndex, wordIndex, indexLowestword[0], indexLowestword[1]) < 0)):
# If there is no previous word
if (lineIndexPrevWord == None ) or (compareTo(lineIndex, wordIndex, lineIndexPrevWord, wordIndexPrevWord) > 0):
indexLowestword = [lineIndex, wordIndex]
return indexLowestword
def splitLineIntoTriple(indexOfLine, indexOfWord):
'''Given a word this fills a tuple with prefix, and postfix'''
returnTriple = [ (' '.join([x for x in lines2BIndexed[indexOfLine][:indexOfWord] ])),
lines2BIndexed[indexOfLine][indexOfWord],
(' '.join([x for x in lines2BIndexed[indexOfLine][(indexOfWord+1):] ]))
]
# Cut off the prefix word that overflows
if ( len(returnTriple[0])>19 and returnTriple[0][-19] != " " and returnTriple[0][-20] != " " ):
while len(returnTriple[0])>19:
returnTriple[0] = returnTriple[0].split(' ',1)[1]
# Cut off the postfix word that overflows
lenWord = len(returnTriple[1])
# print("\nThe length of the indexed word is:" ,lenWord)
# print("\nThe length of the postfix string is:" ,len(returnTriple[2]))
if ( (len(returnTriple[2])+ lenWord) > 30 and returnTriple[2][29-lenWord] != ' '
and returnTriple[2][30-lenWord] != ' ' ):
# print("\n This was called, the string is too long")
while len(returnTriple[2])+lenWord > 30 :
returnTriple[2] = returnTriple[2].rsplit(' ',1)[0]
# print("The line is split into", returnTriple[2])
return returnTriple
def printFormattedTriple(triple):
'''This will print the tripple in the correct format.'''
lengthLeft = len(triple[0])
lengthIndexWord = len(triple[1])
lengthRight=len(triple[2])
# Create a string of 9 white spaces
returnString = " "*9
if (lengthLeft <= 19) :
returnString = ''.join([returnString," "*(19-lengthLeft),triple[0], " ", triple[1].upper()])
else:
returnString = ''.join([returnString, triple[0][-19:]," ", triple[1].upper()])
#the last collum cannot go past 60
if (lengthRight <= (30 - lengthIndexWord)) and (lengthRight>0) :
returnString = " ".join([returnString, triple[2]])
else:
returnString = " ".join([returnString, triple[2][:(30-lengthIndexWord)]])
# print("\nThe string is:",returnString)
return returnString.rstrip()
def main():
readInFile()
nextWordToBeIndexed = getLowestNonIndexedWord(None,None)
while nextWordToBeIndexed[0] != None :
print(printFormattedTriple(splitLineIntoTriple(nextWordToBeIndexed[0],nextWordToBeIndexed[1])))
# print(printFormattedLine(nextWordToBeIndexed[0],nextWordToBeIndexed[1]))
nextWordToBeIndexed = getLowestNonIndexedWord(nextWordToBeIndexed[0],nextWordToBeIndexed[1])
if __name__ == '__main__':
main()