Skip to content

Commit 09b9980

Browse files
committed
made jrdd explicitly lazy
1 parent c608947 commit 09b9980

File tree

1 file changed

+9
-9
lines changed

1 file changed

+9
-9
lines changed

python/pyspark/rdd.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1394,20 +1394,25 @@ def __init__(self, d):
13941394
self.__dict__ = d
13951395
dict.__init__(self, d)
13961396

1397-
class SchemaRDD:
1397+
class SchemaRDD(RDD):
13981398

13991399
def __init__(self, jschema_rdd, sql_ctx):
14001400
self.sql_ctx = sql_ctx
14011401
self._sc = sql_ctx._sc
14021402
self._jschema_rdd = jschema_rdd
14031403

1404-
self._jrdd = self.toPython()._jrdd
14051404
self.is_cached = False
14061405
self.is_checkpointed = False
14071406
self.ctx = self.sql_ctx._sc
14081407
self._jrdd_deserializer = self.ctx.serializer
1409-
# TODO: Figure out how to make this lazy
1410-
#self._id = self._jrdd.id()
1408+
1409+
@property
1410+
def _jrdd(self):
1411+
return self.toPython()._jrdd
1412+
1413+
@property
1414+
def _id(self):
1415+
return self._jrdd.id()
14111416

14121417
def registerAsTable(self, name):
14131418
self._jschema_rdd.registerAsTable(name)
@@ -1416,11 +1421,6 @@ def toPython(self):
14161421
jrdd = self._jschema_rdd.javaToPython()
14171422
return RDD(jrdd, self._sc, self._sc.serializer).map(lambda d: Row(d))
14181423

1419-
customRDDDict = dict(RDD.__dict__)
1420-
del customRDDDict["__init__"]
1421-
1422-
SchemaRDD.__dict__.update(customRDDDict)
1423-
14241424
def _test():
14251425
import doctest
14261426
from pyspark.context import SparkContext

0 commit comments

Comments
 (0)