Skip to content

Commit 122d1e7

Browse files
committed
Address comments.
1 parent 03bfd95 commit 122d1e7

File tree

6 files changed

+78
-73
lines changed

6 files changed

+78
-73
lines changed

python/pyspark/sql.py

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,16 @@
2626
"ShortType", "ArrayType", "MapType", "StructField", "StructType",
2727
"SQLContext", "HiveContext", "LocalHiveContext", "TestHiveContext", "SchemaRDD", "Row"]
2828

29+
2930
class PrimitiveTypeSingleton(type):
3031
_instances = {}
32+
3133
def __call__(cls):
3234
if cls not in cls._instances:
3335
cls._instances[cls] = super(PrimitiveTypeSingleton, cls).__call__()
3436
return cls._instances[cls]
3537

38+
3639
class StringType(object):
3740
"""Spark SQL StringType
3841
@@ -44,6 +47,7 @@ class StringType(object):
4447
def __repr__(self):
4548
return "StringType"
4649

50+
4751
class BinaryType(object):
4852
"""Spark SQL BinaryType
4953
@@ -55,6 +59,7 @@ class BinaryType(object):
5559
def __repr__(self):
5660
return "BinaryType"
5761

62+
5863
class BooleanType(object):
5964
"""Spark SQL BooleanType
6065
@@ -66,6 +71,7 @@ class BooleanType(object):
6671
def __repr__(self):
6772
return "BooleanType"
6873

74+
6975
class TimestampType(object):
7076
"""Spark SQL TimestampType
7177
@@ -77,6 +83,7 @@ class TimestampType(object):
7783
def __repr__(self):
7884
return "TimestampType"
7985

86+
8087
class DecimalType(object):
8188
"""Spark SQL DecimalType
8289
@@ -88,6 +95,7 @@ class DecimalType(object):
8895
def __repr__(self):
8996
return "DecimalType"
9097

98+
9199
class DoubleType(object):
92100
"""Spark SQL DoubleType
93101
@@ -99,13 +107,15 @@ class DoubleType(object):
99107
def __repr__(self):
100108
return "DoubleType"
101109

110+
102111
class FloatType(object):
103112
"""Spark SQL FloatType
104113
105114
For now, please use L{DoubleType} instead of using L{FloatType}.
106115
Because query evaluation is done in Scala, java.lang.Double will be be used
107116
for Python float numbers. Because the underlying JVM type of FloatType is
108-
java.lang.Float (in Java) and Float (in scala), there will be a java.lang.ClassCastException
117+
java.lang.Float (in Java) and Float (in scala), and we are trying to cast the type,
118+
there will be a java.lang.ClassCastException
109119
if FloatType (Python) is used.
110120
111121
"""
@@ -114,13 +124,15 @@ class FloatType(object):
114124
def __repr__(self):
115125
return "FloatType"
116126

127+
117128
class ByteType(object):
118129
"""Spark SQL ByteType
119130
120131
For now, please use L{IntegerType} instead of using L{ByteType}.
121132
Because query evaluation is done in Scala, java.lang.Integer will be be used
122133
for Python int numbers. Because the underlying JVM type of ByteType is
123-
java.lang.Byte (in Java) and Byte (in scala), there will be a java.lang.ClassCastException
134+
java.lang.Byte (in Java) and Byte (in scala), and we are trying to cast the type,
135+
there will be a java.lang.ClassCastException
124136
if ByteType (Python) is used.
125137
126138
"""
@@ -129,6 +141,7 @@ class ByteType(object):
129141
def __repr__(self):
130142
return "ByteType"
131143

144+
132145
class IntegerType(object):
133146
"""Spark SQL IntegerType
134147
@@ -140,6 +153,7 @@ class IntegerType(object):
140153
def __repr__(self):
141154
return "IntegerType"
142155

156+
143157
class LongType(object):
144158
"""Spark SQL LongType
145159
@@ -152,13 +166,15 @@ class LongType(object):
152166
def __repr__(self):
153167
return "LongType"
154168

169+
155170
class ShortType(object):
156171
"""Spark SQL ShortType
157172
158173
For now, please use L{IntegerType} instead of using L{ShortType}.
159174
Because query evaluation is done in Scala, java.lang.Integer will be be used
160175
for Python int numbers. Because the underlying JVM type of ShortType is
161-
java.lang.Short (in Java) and Short (in scala), there will be a java.lang.ClassCastException
176+
java.lang.Short (in Java) and Short (in scala), and we are trying to cast the type,
177+
there will be a java.lang.ClassCastException
162178
if ShortType (Python) is used.
163179
164180
"""
@@ -167,6 +183,7 @@ class ShortType(object):
167183
def __repr__(self):
168184
return "ShortType"
169185

186+
170187
class ArrayType(object):
171188
"""Spark SQL ArrayType
172189
@@ -196,9 +213,9 @@ def __repr__(self):
196213
str(self.containsNull).lower() + ")"
197214

198215
def __eq__(self, other):
199-
return (isinstance(other, self.__class__) and \
200-
self.elementType == other.elementType and \
201-
self.containsNull == other.containsNull)
216+
return (isinstance(other, self.__class__) and
217+
self.elementType == other.elementType and
218+
self.containsNull == other.containsNull)
202219

203220
def __ne__(self, other):
204221
return not self.__eq__(other)
@@ -238,14 +255,15 @@ def __repr__(self):
238255
str(self.valueContainsNull).lower() + ")"
239256

240257
def __eq__(self, other):
241-
return (isinstance(other, self.__class__) and \
242-
self.keyType == other.keyType and \
243-
self.valueType == other.valueType and \
244-
self.valueContainsNull == other.valueContainsNull)
258+
return (isinstance(other, self.__class__) and
259+
self.keyType == other.keyType and
260+
self.valueType == other.valueType and
261+
self.valueContainsNull == other.valueContainsNull)
245262

246263
def __ne__(self, other):
247264
return not self.__eq__(other)
248265

266+
249267
class StructField(object):
250268
"""Spark SQL StructField
251269
@@ -278,14 +296,15 @@ def __repr__(self):
278296
str(self.nullable).lower() + ")"
279297

280298
def __eq__(self, other):
281-
return (isinstance(other, self.__class__) and \
282-
self.name == other.name and \
283-
self.dataType == other.dataType and \
284-
self.nullable == other.nullable)
299+
return (isinstance(other, self.__class__) and
300+
self.name == other.name and
301+
self.dataType == other.dataType and
302+
self.nullable == other.nullable)
285303

286304
def __ne__(self, other):
287305
return not self.__eq__(other)
288306

307+
289308
class StructType(object):
290309
"""Spark SQL StructType
291310
@@ -315,12 +334,13 @@ def __repr__(self):
315334
",".join([field.__repr__() for field in self.fields]) + "))"
316335

317336
def __eq__(self, other):
318-
return (isinstance(other, self.__class__) and \
319-
self.fields == other.fields)
337+
return (isinstance(other, self.__class__) and
338+
self.fields == other.fields)
320339

321340
def __ne__(self, other):
322341
return not self.__eq__(other)
323342

343+
324344
def _parse_datatype_list(datatype_list_string):
325345
"""Parses a list of comma separated data types.
326346
@@ -348,6 +368,7 @@ def _parse_datatype_list(datatype_list_string):
348368
datatype_list.append(_parse_datatype_string(datatype_string))
349369
return datatype_list
350370

371+
351372
def _parse_datatype_string(datatype_string):
352373
"""Parses the given data type string.
353374
@@ -472,6 +493,7 @@ def _parse_datatype_string(datatype_string):
472493
fields = _parse_datatype_list(field_list_string)
473494
return StructType(fields)
474495

496+
475497
class SQLContext:
476498
"""Main entry point for SparkSQL functionality.
477499

sql/core/src/main/java/org/apache/spark/sql/api/java/types/DataType.java

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,6 @@ public abstract class DataType {
8787
/**
8888
* Creates an ArrayType by specifying the data type of elements ({@code elementType}).
8989
* The field of {@code containsNull} is set to {@code false}.
90-
*
91-
* @param elementType
92-
* @return
9390
*/
9491
public static ArrayType createArrayType(DataType elementType) {
9592
if (elementType == null) {
@@ -102,9 +99,6 @@ public static ArrayType createArrayType(DataType elementType) {
10299
/**
103100
* Creates an ArrayType by specifying the data type of elements ({@code elementType}) and
104101
* whether the array contains null values ({@code containsNull}).
105-
* @param elementType
106-
* @param containsNull
107-
* @return
108102
*/
109103
public static ArrayType createArrayType(DataType elementType, boolean containsNull) {
110104
if (elementType == null) {
@@ -117,10 +111,6 @@ public static ArrayType createArrayType(DataType elementType, boolean containsNu
117111
/**
118112
* Creates a MapType by specifying the data type of keys ({@code keyType}) and values
119113
* ({@code keyType}). The field of {@code valueContainsNull} is set to {@code true}.
120-
*
121-
* @param keyType
122-
* @param valueType
123-
* @return
124114
*/
125115
public static MapType createMapType(DataType keyType, DataType valueType) {
126116
if (keyType == null) {
@@ -137,10 +127,6 @@ public static MapType createMapType(DataType keyType, DataType valueType) {
137127
* Creates a MapType by specifying the data type of keys ({@code keyType}), the data type of
138128
* values ({@code keyType}), and whether values contain any null value
139129
* ({@code valueContainsNull}).
140-
* @param keyType
141-
* @param valueType
142-
* @param valueContainsNull
143-
* @return
144130
*/
145131
public static MapType createMapType(
146132
DataType keyType,
@@ -159,10 +145,6 @@ public static MapType createMapType(
159145
/**
160146
* Creates a StructField by specifying the name ({@code name}), data type ({@code dataType}) and
161147
* whether values of this field can be null values ({@code nullable}).
162-
* @param name
163-
* @param dataType
164-
* @param nullable
165-
* @return
166148
*/
167149
public static StructField createStructField(String name, DataType dataType, boolean nullable) {
168150
if (name == null) {
@@ -177,17 +159,13 @@ public static StructField createStructField(String name, DataType dataType, bool
177159

178160
/**
179161
* Creates a StructType with the given list of StructFields ({@code fields}).
180-
* @param fields
181-
* @return
182162
*/
183163
public static StructType createStructType(List<StructField> fields) {
184164
return createStructType(fields.toArray(new StructField[0]));
185165
}
186166

187167
/**
188168
* Creates a StructType with the given StructField array ({@code fields}).
189-
* @param fields
190-
* @return
191169
*/
192170
public static StructType createStructType(StructField[] fields) {
193171
if (fields == null) {

sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,26 @@ class SQLContext(@transient val sparkContext: SparkContext)
9494
* Creates a [[SchemaRDD]] from an [[RDD]] containing [[Row]]s by applying a schema to this RDD.
9595
* It is important to make sure that the structure of every [[Row]] of the provided RDD matches
9696
* the provided schema. Otherwise, there will be runtime exception.
97+
* Example:
98+
* {{{
99+
* import org.apache.spark.sql._
100+
* val sqlContext = new org.apache.spark.sql.SQLContext(sc)
101+
*
102+
* val schema =
103+
* StructType(
104+
* StructField("name", StringType, false) ::
105+
* StructField("age", IntegerType, true) :: Nil)
106+
*
107+
* val people = sc.textFile("examples/src/main/resources/people.txt").map(_.split(",")).map(p => Row(p(0), p(1).trim.toInt))
108+
* val peopleSchemaRDD = sqlContext. applySchema(people, schema)
109+
* peopleSchemaRDD.printSchema
110+
* // root
111+
* // |-- name: string (nullable = false)
112+
* // |-- age: integer (nullable = true)
113+
*
114+
* peopleSchemaRDD.registerAsTable("people")
115+
* sqlContext.sql("select name from people").collect.foreach(println)
116+
* }}}
97117
*
98118
* @group userf
99119
*/

sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -268,10 +268,9 @@ private[sql] object JsonRDD extends Logging {
268268
// the ObjectMapper will take the last value associated with this duplicate key.
269269
// For example: for {"key": 1, "key":2}, we will get "key"->2.
270270
val mapper = new ObjectMapper()
271-
iter.map {
272-
record =>
273-
val parsed = scalafy(mapper.readValue(record, classOf[java.util.Map[String, Any]]))
274-
parsed.asInstanceOf[Map[String, Any]]
271+
iter.map { record =>
272+
val parsed = scalafy(mapper.readValue(record, classOf[java.util.Map[String, Any]]))
273+
parsed.asInstanceOf[Map[String, Any]]
275274
}
276275
})
277276
}

sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ private[parquet] object ParquetTypesConverter extends Logging {
116116
case ParquetOriginalType.LIST => { // TODO: check enums!
117117
assert(groupType.getFieldCount == 1)
118118
val field = groupType.getFields.apply(0)
119-
ArrayType(toDataType(field), false)
119+
ArrayType(toDataType(field), containsNull = false)
120120
}
121121
case ParquetOriginalType.MAP => {
122122
assert(
@@ -147,7 +147,7 @@ private[parquet] object ParquetTypesConverter extends Logging {
147147
MapType(keyType, valueType)
148148
} else if (correspondsToArray(groupType)) { // ArrayType
149149
val elementType = toDataType(groupType.getFields.apply(0))
150-
ArrayType(elementType, false)
150+
ArrayType(elementType, containsNull = false)
151151
} else { // everything else: StructType
152152
val fields = groupType
153153
.getFields

0 commit comments

Comments
 (0)