-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathavguser.py
More file actions
50 lines (38 loc) · 1.3 KB
/
avguser.py
File metadata and controls
50 lines (38 loc) · 1.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/python
import sys
# Iterator function that returns 1 line at a time
# and strips the whitespace
def read_input(file):
for line in file:
yield line.rstrip()
def main():
# Read input through iterator and
# use a for loop to operate on each line
# input = read_input(sys.argv[1])
# print input
#try:
# filename="example1.txt"
# fh= open(filename,"r")
#except:
# print "File not opened: %s",sys.exc_info()[1]
input = read_input(sys.stdin)
#input = read_input(fh)
for line in input:
# Split the line by double colon
lineSplit = line.split('::')
# Parse only if there are 4 values in a line
if len(lineSplit) == 4:
userid=lineSplit[0]
# Key = MovieID
movie = lineSplit[1]
# Value = Rating,Date
rating = lineSplit[2]
date = lineSplit[3].replace('-','')
value = movie+','+rating
# Output Key-Value pair
print '%s\t%s' % (userid, rating)
# Output status to Hadoop Reporter
# (Necessary for Hadoop Streaming Apps)
print >> sys.stderr, "report:counter:pyNetflix1,mapper,1"
if __name__ == "__main__":
main()