-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhomework2.py
More file actions
59 lines (52 loc) · 1.67 KB
/
homework2.py
File metadata and controls
59 lines (52 loc) · 1.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import codecs
import re
import collections
import pandas
import nltk
from collections import Counter
from nltk import FreqDist
wordDict = Counter()
##Number 2####
##This code I borrowed off of stack overflow, it utilizes collections, regular expressions and nltk to pull the 100 most common words out of the hackers.log text and even tells you how many there are for each.
print("Displaying Number 2")
with open('/home/ligma/scripts/hackers.log') as f:
part2 = f.read().lower()
words = re.findall(r'\w+', part2)
print (Counter(words).most_common(100))
print("There are the 100 most common words in the hackers.log")
##############################################################
##Number 3a###
##This code is used to search actual chat messages
print("Displaying Number 3")
fname =('/home/ligma/scripts/hackers.log')
word=(">")
k=0
with open(fname, 'r') as f:
for line in f:
words = line.split()
for i in words:
if(i==word):
k=k+1
print("Roughly the total amount of actual messages:")
print(k)
####################################################
print("Displaying Number 4 ")
#Number4##
with codecs.open('/home/ligma/scripts/hackers.log', 'r', encoding='cp720') as f:
for line in f:
wordDict.update(line.strip().split())
for word, count in wordDict.most_common():
print(word, count)
##################################################################333
print("Displaying Number 6")
##Number 6##
with open('/home/ligma/scripts/hackers.log') as file:
for line in file:
url = re.findall(r'^.*https.*$', line)
if re.findall (r'^.*https.*$', line):
print(line)
##Number1###
with open('/home/ligma/scripts/hackers.log') as boy:
for line in boy:
if "joined" in line:
print(line)