forked from P7h/IntroToHadoopAndMR__Udacity_Course
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathP2Q2_Reducer.py
More file actions
32 lines (22 loc) · 672 Bytes
/
P2Q2_Reducer.py
File metadata and controls
32 lines (22 loc) · 672 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#!/usr/bin/python
# Write a MapReduce program which will display the number of hits for each different file on the Web site.
import sys
countTotal = 0
oldKey = None
# Loop around the data
# It will be in the format key\tval
#
for line in sys.stdin:
data_mapped = line.strip().split("\t")
if len(data_mapped) != 2:
# Something has gone wrong. Skip this line.
continue
thisKey, thisCount = data_mapped
if oldKey and oldKey != thisKey:
print oldKey, "\t", countTotal
oldKey = thisKey;
countTotal = 0
oldKey = thisKey
countTotal += int(thisCount)
if oldKey != None:
print oldKey, "\t", countTotal