From 485e9ba1d1dc2df81130a8bd883105646247d449 Mon Sep 17 00:00:00 2001 From: 0x0FFF Date: Tue, 8 Sep 2015 14:27:28 -0700 Subject: [PATCH 1/2] [SPARK-9014][SQL] Allow Python spark API to use built-in exponential operator --- python/pyspark/sql/column.py | 13 +++++++++++++ python/pyspark/sql/tests.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py index 56e75e8caee88..fbdef8f9721bc 100644 --- a/python/pyspark/sql/column.py +++ b/python/pyspark/sql/column.py @@ -91,6 +91,17 @@ def _(self): return _ +def _bin_func_op(name, reverse=False, doc="binary function"): + def _(self, other): + sc = SparkContext._active_spark_context + fn = getattr(sc._jvm.functions, name) + jc = other._jc if isinstance(other, Column) else float(other) + njc = fn(self._jc, jc) if not reverse else fn(jc, self._jc) + return Column(njc) + _.__doc__ = doc + return _ + + def _bin_op(name, doc="binary operator"): """ Create a method for given binary operator """ @@ -151,6 +162,8 @@ def __init__(self, jc): __rdiv__ = _reverse_op("divide") __rtruediv__ = _reverse_op("divide") __rmod__ = _reverse_op("mod") + __pow__ = _bin_func_op("pow") + __rpow__ = _bin_func_op("pow", reverse=True) # logistic operators __eq__ = _bin_op("equalTo") diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index eb449e8679fa0..f2172b7a27d88 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -568,7 +568,7 @@ def test_column_operators(self): cs = self.df.value c = ci == cs self.assertTrue(isinstance((- ci - 1 - 2) % 3 * 2.5 / 3.5, Column)) - rcc = (1 + ci), (1 - ci), (1 * ci), (1 / ci), (1 % ci) + rcc = (1 + ci), (1 - ci), (1 * ci), (1 / ci), (1 % ci), (1 ** ci), (ci ** 1) self.assertTrue(all(isinstance(c, Column) for c in rcc)) cb = [ci == 5, ci != 0, ci > 3, ci < 4, ci >= 0, ci <= 7] self.assertTrue(all(isinstance(c, Column) for c in cb)) From aecc0c2fb2e4fddcec97e4cca9eac243df848acb Mon Sep 17 00:00:00 2001 From: 0x0FFF Date: Fri, 11 Sep 2015 00:48:06 -0700 Subject: [PATCH 2/2] [SPARK-9014][SQL] Reverted to the previous option and replaced float() with lit() --- python/pyspark/sql/column.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py index fbdef8f9721bc..e035a2a20e4cf 100644 --- a/python/pyspark/sql/column.py +++ b/python/pyspark/sql/column.py @@ -95,7 +95,7 @@ def _bin_func_op(name, reverse=False, doc="binary function"): def _(self, other): sc = SparkContext._active_spark_context fn = getattr(sc._jvm.functions, name) - jc = other._jc if isinstance(other, Column) else float(other) + jc = other._jc if isinstance(other, Column) else _create_column_from_literal(other) njc = fn(self._jc, jc) if not reverse else fn(jc, self._jc) return Column(njc) _.__doc__ = doc