Skip to content

Commit f535802

Browse files
Davies Liurxin
authored andcommitted
[SPARK-6536] [PySpark] Column.inSet() in Python
``` >>> df[df.name.inSet("Bob", "Mike")].collect() [Row(age=5, name=u'Bob')] >>> df[df.age.inSet([1, 2, 3])].collect() [Row(age=2, name=u'Alice')] ``` Author: Davies Liu <[email protected]> Closes apache#5190 from davies/in and squashes the following commits: 6b73a47 [Davies Liu] Column.inSet() in Python
1 parent 276ef1c commit f535802

File tree

1 file changed

+17
-0
lines changed

1 file changed

+17
-0
lines changed

python/pyspark/sql/dataframe.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -985,6 +985,23 @@ def substr(self, startPos, length):
985985

986986
__getslice__ = substr
987987

988+
def inSet(self, *cols):
989+
""" A boolean expression that is evaluated to true if the value of this
990+
expression is contained by the evaluated values of the arguments.
991+
992+
>>> df[df.name.inSet("Bob", "Mike")].collect()
993+
[Row(age=5, name=u'Bob')]
994+
>>> df[df.age.inSet([1, 2, 3])].collect()
995+
[Row(age=2, name=u'Alice')]
996+
"""
997+
if len(cols) == 1 and isinstance(cols[0], (list, set)):
998+
cols = cols[0]
999+
cols = [c._jc if isinstance(c, Column) else _create_column_from_literal(c) for c in cols]
1000+
sc = SparkContext._active_spark_context
1001+
jcols = ListConverter().convert(cols, sc._gateway._gateway_client)
1002+
jc = getattr(self._jc, "in")(sc._jvm.PythonUtils.toSeq(jcols))
1003+
return Column(jc)
1004+
9881005
# order
9891006
asc = _unary_op("asc", "Returns a sort expression based on the"
9901007
" ascending order of the given column name.")

0 commit comments

Comments
 (0)