Skip to content

Commit b0192d3

Browse files
committed
Added Long, Double and Boolean as usable types + unit test
1 parent f98a422 commit b0192d3

File tree

2 files changed

+13
-7
lines changed

2 files changed

+13
-7
lines changed

python/pyspark/context.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -492,6 +492,13 @@ def __init__(self, sparkContext):
492492
Traceback (most recent call last):
493493
...
494494
ValueError:...
495+
496+
>>> allTypes = sc.parallelize([{"int" : 1, "string" : "string", "double" : 1.0, "long": 1L,
497+
... "boolean" : True}])
498+
>>> srdd = sqlCtx.applySchema(allTypes).map(lambda x: (x.int, x.string, x.double, x.long,
499+
... x.boolean))
500+
>>> srdd.collect()[0]
501+
(1, u'string', 1.0, 1, True)
495502
"""
496503
self._sc = sparkContext
497504
self._jsc = self._sc._jsc

sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -243,18 +243,17 @@ class SQLContext(@transient val sparkContext: SparkContext)
243243
def debugExec() = DebugQuery(executedPlan).execute().collect()
244244
}
245245

246+
// TODO: We only support primitive types, add support for nested types. Difficult because java
247+
// objects don't have classTags
246248
def applySchema(rdd: RDD[Map[String, _]]): SchemaRDD = {
247249
val schema = rdd.first.map { case (fieldName, obj) =>
248250
val dataType = obj.getClass match {
249251
case c: Class[_] if c == classOf[java.lang.String] => StringType
250252
case c: Class[_] if c == classOf[java.lang.Integer] => IntegerType
251-
// case c: Class[_] if c == java.lang.Short.TYPE => ShortType
252-
// case c: Class[_] if c == java.lang.Integer.TYPE => IntegerType
253-
// case c: Class[_] if c == java.lang.Long.TYPE => LongType
254-
// case c: Class[_] if c == java.lang.Double.TYPE => DoubleType
255-
// case c: Class[_] if c == java.lang.Byte.TYPE => ByteType
256-
// case c: Class[_] if c == java.lang.Float.TYPE => FloatType
257-
// case c: Class[_] if c == java.lang.Boolean.TYPE => BooleanType
253+
case c: Class[_] if c == classOf[java.lang.Long] => LongType
254+
case c: Class[_] if c == classOf[java.lang.Double] => DoubleType
255+
case c: Class[_] if c == classOf[java.lang.Boolean] => BooleanType
256+
case c => throw new Exception(s"Object of type $c cannot be used")
258257
}
259258
AttributeReference(fieldName, dataType, true)()
260259
}.toSeq

0 commit comments

Comments
 (0)