@@ -2077,9 +2077,9 @@ def dtypes(self):
20772077 """Return all column names and their data types as a list.
20782078
20792079 >>> df.dtypes
2080- [(u 'age', 'IntegerType '), (u 'name', 'StringType ')]
2080+ [('age', 'integer '), ('name', 'string ')]
20812081 """
2082- return [(f .name , str ( f .dataType )) for f in self .schema ().fields ]
2082+ return [(str ( f .name ), f .dataType . jsonValue ( )) for f in self .schema ().fields ]
20832083
20842084 @property
20852085 def columns (self ):
@@ -2263,18 +2263,6 @@ def subtract(self, other):
22632263 """
22642264 return DataFrame (getattr (self ._jdf , "except" )(other ._jdf ), self .sql_ctx )
22652265
2266- def sample (self , withReplacement , fraction , seed = None ):
2267- """ Return a new DataFrame by sampling a fraction of rows.
2268-
2269- >>> df.sample(False, 0.5, 10).collect()
2270- [Row(age=2, name=u'Alice')]
2271- """
2272- if seed is None :
2273- jdf = self ._jdf .sample (withReplacement , fraction )
2274- else :
2275- jdf = self ._jdf .sample (withReplacement , fraction , seed )
2276- return DataFrame (jdf , self .sql_ctx )
2277-
22782266 def addColumn (self , colName , col ):
22792267 """ Return a new :class:`DataFrame` by adding a column.
22802268
@@ -2376,28 +2364,6 @@ def sum(self):
23762364 group."""
23772365
23782366
2379- SCALA_METHOD_MAPPINGS = {
2380- '=' : '$eq' ,
2381- '>' : '$greater' ,
2382- '<' : '$less' ,
2383- '+' : '$plus' ,
2384- '-' : '$minus' ,
2385- '*' : '$times' ,
2386- '/' : '$div' ,
2387- '!' : '$bang' ,
2388- '@' : '$at' ,
2389- '#' : '$hash' ,
2390- '%' : '$percent' ,
2391- '^' : '$up' ,
2392- '&' : '$amp' ,
2393- '~' : '$tilde' ,
2394- '?' : '$qmark' ,
2395- '|' : '$bar' ,
2396- '\\ ' : '$bslash' ,
2397- ':' : '$colon' ,
2398- }
2399-
2400-
24012367def _create_column_from_literal (literal ):
24022368 sc = SparkContext ._active_spark_context
24032369 return sc ._jvm .Dsl .lit (literal )
@@ -2416,23 +2382,18 @@ def _to_java_column(col):
24162382 return jcol
24172383
24182384
2419- def _scalaMethod (name ):
2420- """ Translate operators into methodName in Scala
2421-
2422- >>> _scalaMethod('+')
2423- '$plus'
2424- >>> _scalaMethod('>=')
2425- '$greater$eq'
2426- >>> _scalaMethod('cast')
2427- 'cast'
2428- """
2429- return '' .join (SCALA_METHOD_MAPPINGS .get (c , c ) for c in name )
2430-
2431-
24322385def _unary_op (name , doc = "unary operator" ):
24332386 """ Create a method for given unary operator """
24342387 def _ (self ):
2435- jc = getattr (self ._jc , _scalaMethod (name ))()
2388+ jc = getattr (self ._jc , name )()
2389+ return Column (jc , self .sql_ctx )
2390+ _ .__doc__ = doc
2391+ return _
2392+
2393+
2394+ def _dsl_op (name , doc = '' ):
2395+ def _ (self ):
2396+ jc = getattr (self ._sc ._jvm .Dsl , name )(self ._jc )
24362397 return Column (jc , self .sql_ctx )
24372398 _ .__doc__ = doc
24382399 return _
@@ -2443,7 +2404,7 @@ def _bin_op(name, doc="binary operator"):
24432404 """
24442405 def _ (self , other ):
24452406 jc = other ._jc if isinstance (other , Column ) else other
2446- njc = getattr (self ._jc , _scalaMethod ( name ) )(jc )
2407+ njc = getattr (self ._jc , name )(jc )
24472408 return Column (njc , self .sql_ctx )
24482409 _ .__doc__ = doc
24492410 return _
@@ -2454,7 +2415,7 @@ def _reverse_op(name, doc="binary operator"):
24542415 """
24552416 def _ (self , other ):
24562417 jother = _create_column_from_literal (other )
2457- jc = getattr (jother , _scalaMethod ( name ) )(self ._jc )
2418+ jc = getattr (jother , name )(self ._jc )
24582419 return Column (jc , self .sql_ctx )
24592420 _ .__doc__ = doc
24602421 return _
@@ -2481,34 +2442,33 @@ def __init__(self, jc, sql_ctx=None):
24812442 super (Column , self ).__init__ (jc , sql_ctx )
24822443
24832444 # arithmetic operators
2484- __neg__ = _unary_op ("unary_-" )
2485- __add__ = _bin_op ("+" )
2486- __sub__ = _bin_op ("-" )
2487- __mul__ = _bin_op ("*" )
2488- __div__ = _bin_op ("/" )
2489- __mod__ = _bin_op ("%" )
2490- __radd__ = _bin_op ("+" )
2491- __rsub__ = _reverse_op ("-" )
2492- __rmul__ = _bin_op ("*" )
2493- __rdiv__ = _reverse_op ("/" )
2494- __rmod__ = _reverse_op ("%" )
2495- __abs__ = _unary_op ("abs" )
2445+ __neg__ = _dsl_op ("negate" )
2446+ __add__ = _bin_op ("plus" )
2447+ __sub__ = _bin_op ("minus" )
2448+ __mul__ = _bin_op ("multiply" )
2449+ __div__ = _bin_op ("divide" )
2450+ __mod__ = _bin_op ("mod" )
2451+ __radd__ = _bin_op ("plus" )
2452+ __rsub__ = _reverse_op ("minus" )
2453+ __rmul__ = _bin_op ("multiply" )
2454+ __rdiv__ = _reverse_op ("divide" )
2455+ __rmod__ = _reverse_op ("mod" )
24962456
24972457 # logistic operators
2498- __eq__ = _bin_op ("=== " )
2499- __ne__ = _bin_op ("!== " )
2500- __lt__ = _bin_op ("< " )
2501- __le__ = _bin_op ("<= " )
2502- __ge__ = _bin_op (">= " )
2503- __gt__ = _bin_op ("> " )
2458+ __eq__ = _bin_op ("equalTo " )
2459+ __ne__ = _bin_op ("notEqual " )
2460+ __lt__ = _bin_op ("lt " )
2461+ __le__ = _bin_op ("leq " )
2462+ __ge__ = _bin_op ("geq " )
2463+ __gt__ = _bin_op ("gt " )
25042464
25052465 # `and`, `or`, `not` cannot be overloaded in Python,
25062466 # so use bitwise operators as boolean operators
2507- __and__ = _bin_op ('&& ' )
2508- __or__ = _bin_op ('|| ' )
2509- __invert__ = _unary_op ( 'unary_! ' )
2510- __rand__ = _bin_op ("&& " )
2511- __ror__ = _bin_op ("|| " )
2467+ __and__ = _bin_op ('and ' )
2468+ __or__ = _bin_op ('or ' )
2469+ __invert__ = _dsl_op ( 'not ' )
2470+ __rand__ = _bin_op ("and " )
2471+ __ror__ = _bin_op ("or " )
25122472
25132473 # container operators
25142474 __contains__ = _bin_op ("contains" )
0 commit comments