-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathwordcount1.py
More file actions
125 lines (77 loc) · 1.6 KB
/
wordcount1.py
File metadata and controls
125 lines (77 loc) · 1.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# -*- coding: utf-8 -*-
"""
Created on Sat Nov 16 07:17:41 2019
@author: BenTemp
"""
import collections
mydict={}
file1=open('98-0.txt',encoding="utf8")
x=file1.read().lower()
x=x.replace('\n',' ')
x=x.replace(".","")
x=x.replace(",","")
x=x.replace("\"","")
x=x.replace("“","")
x=x.split(' ')
for i in x:
i.strip()
for i in x:
if i=='':
x.remove(i)
file2=open('stopwords')
sw=file2.read().lower()
sw=sw.split("\n")
"""
use stop words
"""
"""
for i in x:
if i in sw:
x.remove(i)
"""
words=[]
for i in x:
if i not in sw:
words.append(i)
for i in words:
if i in mydict:
mydict[i]+=1
else:
mydict[i]=1
d =collections.Counter(mydict)
#print(d.most_common(10))
for word, count in d.most_common(20):
print(word, ": ", count)
"""
for i in x:
print(i)
if i in mydict:
print('key found')
file2=open('stopwordstest.txt')
sw=file2.read()
sw=sw.split("\n")
if x[10] in sw:
x.remove(x[10])
import collections
d = collections.Counter(mydict)
#print(d.most_common(10))
for word, count in d.most_common(10):
print(word, ": ", count)
: 4377
the : 4338
a : 1663
I : 1446
his : 1080
Mr. : 602
The : 586
he : 579
to : 579
said : 570
k=1
for i in x:
if i in sw:
x.remove(i)
if i=='the':
print('removed the word="the" :',i,',',k,'times')
k+=1
"""