From e4417c884f220e7eb909723b0b3f5c96c731cf81 Mon Sep 17 00:00:00 2001 From: Jeff Zhang Date: Tue, 20 Oct 2015 17:11:26 +0800 Subject: [PATCH 1/2] [SPARK-11205] [PYSPARK] Delegate to scala DataFrame API rather than print in python --- python/pyspark/sql/dataframe.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 033b31983ffa..7e52d491817d 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -203,7 +203,7 @@ def printSchema(self): |-- name: string (nullable = true) """ - print(self._jdf.schema().treeString()) + self._jdf.printSchema() @since(1.3) def explain(self, extended=False): @@ -224,10 +224,7 @@ def explain(self, extended=False): == Physical Plan == ... """ - if extended: - print(self._jdf.queryExecution().toString()) - else: - print(self._jdf.queryExecution().executedPlan().toString()) + self._jdf.explain(extended) @since(1.3) def isLocal(self): @@ -253,7 +250,7 @@ def show(self, n=20, truncate=True): | 5| Bob| +---+-----+ """ - print(self._jdf.showString(n, truncate)) + self._jdf.show(n, truncate) def __repr__(self): return "DataFrame[%s]" % (", ".join("%s: %s" % c for c in self.dtypes)) From 39f3fc0b73b76c521f9dda7e61b4548896ca742c Mon Sep 17 00:00:00 2001 From: Jeff Zhang Date: Wed, 21 Oct 2015 12:25:21 +0800 Subject: [PATCH 2/2] Match the outputs of scala api & python api --- python/pyspark/sql/dataframe.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 7e52d491817d..36fc6e0611dc 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -203,7 +203,7 @@ def printSchema(self): |-- name: string (nullable = true) """ - self._jdf.printSchema() + print(self._jdf.schema().treeString()) @since(1.3) def explain(self, extended=False): @@ -212,6 +212,7 @@ def explain(self, extended=False): :param extended: boolean, default ``False``. If ``False``, prints only the physical plan. >>> df.explain() + == Physical Plan == Scan PhysicalRDD[age#0,name#1] >>> df.explain(True) @@ -224,7 +225,10 @@ def explain(self, extended=False): == Physical Plan == ... """ - self._jdf.explain(extended) + if extended: + print(self._jdf.queryExecution().toString()) + else: + print(self._jdf.queryExecution().simpleString()) @since(1.3) def isLocal(self): @@ -250,7 +254,7 @@ def show(self, n=20, truncate=True): | 5| Bob| +---+-----+ """ - self._jdf.show(n, truncate) + print(self._jdf.showString(n, truncate)) def __repr__(self): return "DataFrame[%s]" % (", ".join("%s: %s" % c for c in self.dtypes))