made jrdd explicitly lazy

ahirreddy · ahirreddy · commit 09b9980e9fec · 2014-04-14T17:05:20.000-07:00
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
@@ -1394,20 +1394,25 @@ def __init__(self, d):
         self.__dict__ = d
         dict.__init__(self, d)
 
-class SchemaRDD:
+class SchemaRDD(RDD):
 
     def __init__(self, jschema_rdd, sql_ctx):
         self.sql_ctx = sql_ctx
         self._sc = sql_ctx._sc
         self._jschema_rdd = jschema_rdd
 
-        self._jrdd = self.toPython()._jrdd
         self.is_cached = False
         self.is_checkpointed = False
         self.ctx = self.sql_ctx._sc
         self._jrdd_deserializer = self.ctx.serializer
-        # TODO: Figure out how to make this lazy
-        #self._id = self._jrdd.id()
+
+    @property
+    def _jrdd(self):
+        return self.toPython()._jrdd
+
+    @property
+    def _id(self):
+        return self._jrdd.id()
 
     def registerAsTable(self, name):
         self._jschema_rdd.registerAsTable(name)
@@ -1416,11 +1421,6 @@ def toPython(self):
         jrdd = self._jschema_rdd.javaToPython()
         return RDD(jrdd, self._sc, self._sc.serializer).map(lambda d: Row(d))
 
-customRDDDict = dict(RDD.__dict__)
-del customRDDDict["__init__"]
-
-SchemaRDD.__dict__.update(customRDDDict)
-
 def _test():
     import doctest
     from pyspark.context import SparkContext