diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 1330e6146800c..eb35db4cac4dd 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -164,6 +164,10 @@ def getCheckpointFile(self): def map(self, f, preservesPartitioning=False): """ Return a new RDD containing the distinct elements in this RDD. + + >>> rdd = sc.parallelize(["b", "a", "c"]) + >>> sorted(rdd.map(lambda x: (x, 1)).collect()) + [('a', 1), ('b', 1), ('c', 1)] """ def func(split, iterator): return imap(f, iterator) return PipelinedRDD(self, func, preservesPartitioning)