Inverted Index Map Reduce
主要是要实现对于reducer机器下的值有序化,唯一化
'''
Definition of Document
class Document:
def __init__(self, id, cotent):
self.id = id
self.content = content
'''
class InvertedIndex:
# @param {Document} value is a document
def mapper(self, _, value):
# Write your code here
# Please use 'yield key, value' here
for string in value.content.split():
yield string, value.id
# @param key is from mapper
# @param values is a set of value with the same key
def reducer(self, key, values):
# Write your code here
# Please use 'yield key, value' here
yield key, sorted(list(set(values)))