Skip to content

Commit 16be3e5

Browse files
committed
This commit contains three changes:
* Expose `DataType`s in the sql package (internal details are private to sql). * Introduce `createSchemaRDD` to create a `SchemaRDD` from an `RDD` with a provided schema (represented by a `StructType`) and a provided function to construct `Row`, * Add a function `simpleString` to every `DataType`. Also, the schema represented by a `StructType` can be visualized by `printSchema`.
1 parent b520b64 commit 16be3e5

File tree

10 files changed

+221
-107
lines changed

10 files changed

+221
-107
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Row.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ object Row {
3232
* }}}
3333
*/
3434
def unapplySeq(row: Row): Some[Seq[Any]] = Some(row)
35+
36+
/**
37+
* Construct a [[Row]] with the given values.
38+
*/
39+
def apply(values: Any*): Row = new GenericRow(values.toArray)
3540
}
3641

3742
/**

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/WrapDynamic.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ import scala.language.dynamics
2121

2222
import org.apache.spark.sql.catalyst.types.DataType
2323

24-
case object DynamicType extends DataType
24+
case object DynamicType extends DataType {
25+
def simpleString: String = "dynamic"
26+
}
2527

2628
case class WrapDynamic(children: Seq[Attribute]) extends Expression {
2729
type EvaluatedType = DynamicRow

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala

Lines changed: 3 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -125,52 +125,11 @@ abstract class QueryPlan[PlanType <: TreeNode[PlanType]] extends TreeNode[PlanTy
125125
}.toSeq
126126
}
127127

128-
protected def generateSchemaString(schema: Seq[Attribute]): String = {
129-
val builder = new StringBuilder
130-
builder.append("root\n")
131-
val prefix = " |"
132-
schema.foreach { attribute =>
133-
val name = attribute.name
134-
val dataType = attribute.dataType
135-
dataType match {
136-
case fields: StructType =>
137-
builder.append(s"$prefix-- $name: $StructType\n")
138-
generateSchemaString(fields, s"$prefix |", builder)
139-
case ArrayType(fields: StructType) =>
140-
builder.append(s"$prefix-- $name: $ArrayType[$StructType]\n")
141-
generateSchemaString(fields, s"$prefix |", builder)
142-
case ArrayType(elementType: DataType) =>
143-
builder.append(s"$prefix-- $name: $ArrayType[$elementType]\n")
144-
case _ => builder.append(s"$prefix-- $name: $dataType\n")
145-
}
146-
}
147-
148-
builder.toString()
149-
}
150-
151-
protected def generateSchemaString(
152-
schema: StructType,
153-
prefix: String,
154-
builder: StringBuilder): StringBuilder = {
155-
schema.fields.foreach {
156-
case StructField(name, fields: StructType, _) =>
157-
builder.append(s"$prefix-- $name: $StructType\n")
158-
generateSchemaString(fields, s"$prefix |", builder)
159-
case StructField(name, ArrayType(fields: StructType), _) =>
160-
builder.append(s"$prefix-- $name: $ArrayType[$StructType]\n")
161-
generateSchemaString(fields, s"$prefix |", builder)
162-
case StructField(name, ArrayType(elementType: DataType), _) =>
163-
builder.append(s"$prefix-- $name: $ArrayType[$elementType]\n")
164-
case StructField(name, fieldType: DataType, _) =>
165-
builder.append(s"$prefix-- $name: $fieldType\n")
166-
}
167-
168-
builder
169-
}
128+
def schema: StructType = StructType.fromAttributes(output)
170129

171130
/** Returns the output schema in the tree format. */
172-
def schemaString: String = generateSchemaString(output)
131+
def formattedSchemaString: String = schema.formattedSchemaString
173132

174133
/** Prints out the schema in the tree format */
175-
def printSchema(): Unit = println(schemaString)
134+
def printSchema(): Unit = println(formattedSchemaString)
176135
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala

Lines changed: 149 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ object DataType extends RegexParsers {
6262
"true" ^^^ true |
6363
"false" ^^^ false
6464

65-
6665
protected lazy val structType: Parser[DataType] =
6766
"StructType\\([A-zA-z]*\\(".r ~> repsep(structField, ",") <~ "))" ^^ {
6867
case fields => new StructType(fields)
@@ -93,47 +92,56 @@ abstract class DataType {
9392
}
9493

9594
def isPrimitive: Boolean = false
95+
96+
def simpleString: String
9697
}
9798

98-
case object NullType extends DataType
99+
case object NullType extends DataType {
100+
def simpleString: String = "null"
101+
}
99102

100103
trait PrimitiveType extends DataType {
101104
override def isPrimitive = true
102105
}
103106

104107
abstract class NativeType extends DataType {
105-
type JvmType
106-
@transient val tag: TypeTag[JvmType]
107-
val ordering: Ordering[JvmType]
108+
private[sql] type JvmType
109+
@transient private[sql] val tag: TypeTag[JvmType]
110+
private[sql] val ordering: Ordering[JvmType]
108111

109-
@transient val classTag = {
112+
@transient private[sql] val classTag = {
110113
val mirror = runtimeMirror(Utils.getSparkClassLoader)
111114
ClassTag[JvmType](mirror.runtimeClass(tag.tpe))
112115
}
113116
}
114117

115118
case object StringType extends NativeType with PrimitiveType {
116-
type JvmType = String
117-
@transient lazy val tag = typeTag[JvmType]
118-
val ordering = implicitly[Ordering[JvmType]]
119+
private[sql] type JvmType = String
120+
@transient private[sql] lazy val tag = typeTag[JvmType]
121+
private[sql] val ordering = implicitly[Ordering[JvmType]]
122+
def simpleString: String = "string"
119123
}
120124
case object BinaryType extends DataType with PrimitiveType {
121-
type JvmType = Array[Byte]
125+
private[sql] type JvmType = Array[Byte]
126+
def simpleString: String = "binary"
122127
}
123128
case object BooleanType extends NativeType with PrimitiveType {
124-
type JvmType = Boolean
125-
@transient lazy val tag = typeTag[JvmType]
126-
val ordering = implicitly[Ordering[JvmType]]
129+
private[sql] type JvmType = Boolean
130+
@transient private[sql] lazy val tag = typeTag[JvmType]
131+
private[sql] val ordering = implicitly[Ordering[JvmType]]
132+
def simpleString: String = "boolean"
127133
}
128134

129135
case object TimestampType extends NativeType {
130-
type JvmType = Timestamp
136+
private[sql] type JvmType = Timestamp
131137

132-
@transient lazy val tag = typeTag[JvmType]
138+
@transient private[sql] lazy val tag = typeTag[JvmType]
133139

134-
val ordering = new Ordering[JvmType] {
140+
private[sql] val ordering = new Ordering[JvmType] {
135141
def compare(x: Timestamp, y: Timestamp) = x.compareTo(y)
136142
}
143+
144+
def simpleString: String = "timestamp"
137145
}
138146

139147
abstract class NumericType extends NativeType with PrimitiveType {
@@ -142,7 +150,7 @@ abstract class NumericType extends NativeType with PrimitiveType {
142150
// type parameter and and add a numeric annotation (i.e., [JvmType : Numeric]). This gets
143151
// desugared by the compiler into an argument to the objects constructor. This means there is no
144152
// longer an no argument constructor and thus the JVM cannot serialize the object anymore.
145-
val numeric: Numeric[JvmType]
153+
private[sql] val numeric: Numeric[JvmType]
146154
}
147155

148156
/** Matcher for any expressions that evaluate to [[IntegralType]]s */
@@ -154,39 +162,43 @@ object IntegralType {
154162
}
155163

156164
abstract class IntegralType extends NumericType {
157-
val integral: Integral[JvmType]
165+
private[sql] val integral: Integral[JvmType]
158166
}
159167

160168
case object LongType extends IntegralType {
161-
type JvmType = Long
162-
@transient lazy val tag = typeTag[JvmType]
163-
val numeric = implicitly[Numeric[Long]]
164-
val integral = implicitly[Integral[Long]]
165-
val ordering = implicitly[Ordering[JvmType]]
169+
private[sql] type JvmType = Long
170+
@transient private[sql] lazy val tag = typeTag[JvmType]
171+
private[sql] val numeric = implicitly[Numeric[Long]]
172+
private[sql] val integral = implicitly[Integral[Long]]
173+
private[sql] val ordering = implicitly[Ordering[JvmType]]
174+
def simpleString: String = "long"
166175
}
167176

168177
case object IntegerType extends IntegralType {
169-
type JvmType = Int
170-
@transient lazy val tag = typeTag[JvmType]
171-
val numeric = implicitly[Numeric[Int]]
172-
val integral = implicitly[Integral[Int]]
173-
val ordering = implicitly[Ordering[JvmType]]
178+
private[sql] type JvmType = Int
179+
@transient private[sql] lazy val tag = typeTag[JvmType]
180+
private[sql] val numeric = implicitly[Numeric[Int]]
181+
private[sql] val integral = implicitly[Integral[Int]]
182+
private[sql] val ordering = implicitly[Ordering[JvmType]]
183+
def simpleString: String = "integer"
174184
}
175185

176186
case object ShortType extends IntegralType {
177-
type JvmType = Short
178-
@transient lazy val tag = typeTag[JvmType]
179-
val numeric = implicitly[Numeric[Short]]
180-
val integral = implicitly[Integral[Short]]
181-
val ordering = implicitly[Ordering[JvmType]]
187+
private[sql] type JvmType = Short
188+
@transient private[sql] lazy val tag = typeTag[JvmType]
189+
private[sql] val numeric = implicitly[Numeric[Short]]
190+
private[sql] val integral = implicitly[Integral[Short]]
191+
private[sql] val ordering = implicitly[Ordering[JvmType]]
192+
def simpleString: String = "short"
182193
}
183194

184195
case object ByteType extends IntegralType {
185-
type JvmType = Byte
186-
@transient lazy val tag = typeTag[JvmType]
187-
val numeric = implicitly[Numeric[Byte]]
188-
val integral = implicitly[Integral[Byte]]
189-
val ordering = implicitly[Ordering[JvmType]]
196+
private[sql] type JvmType = Byte
197+
@transient private[sql] lazy val tag = typeTag[JvmType]
198+
private[sql] val numeric = implicitly[Numeric[Byte]]
199+
private[sql] val integral = implicitly[Integral[Byte]]
200+
private[sql] val ordering = implicitly[Ordering[JvmType]]
201+
def simpleString: String = "byte"
190202
}
191203

192204
/** Matcher for any expressions that evaluate to [[FractionalType]]s */
@@ -197,47 +209,127 @@ object FractionalType {
197209
}
198210
}
199211
abstract class FractionalType extends NumericType {
200-
val fractional: Fractional[JvmType]
212+
private[sql] val fractional: Fractional[JvmType]
201213
}
202214

203215
case object DecimalType extends FractionalType {
204-
type JvmType = BigDecimal
205-
@transient lazy val tag = typeTag[JvmType]
206-
val numeric = implicitly[Numeric[BigDecimal]]
207-
val fractional = implicitly[Fractional[BigDecimal]]
208-
val ordering = implicitly[Ordering[JvmType]]
216+
private[sql] type JvmType = BigDecimal
217+
@transient private[sql] lazy val tag = typeTag[JvmType]
218+
private[sql] val numeric = implicitly[Numeric[BigDecimal]]
219+
private[sql] val fractional = implicitly[Fractional[BigDecimal]]
220+
private[sql] val ordering = implicitly[Ordering[JvmType]]
221+
def simpleString: String = "decimal"
209222
}
210223

211224
case object DoubleType extends FractionalType {
212-
type JvmType = Double
213-
@transient lazy val tag = typeTag[JvmType]
214-
val numeric = implicitly[Numeric[Double]]
215-
val fractional = implicitly[Fractional[Double]]
216-
val ordering = implicitly[Ordering[JvmType]]
225+
private[sql] type JvmType = Double
226+
@transient private[sql] lazy val tag = typeTag[JvmType]
227+
private[sql] val numeric = implicitly[Numeric[Double]]
228+
private[sql] val fractional = implicitly[Fractional[Double]]
229+
private[sql] val ordering = implicitly[Ordering[JvmType]]
230+
def simpleString: String = "double"
217231
}
218232

219233
case object FloatType extends FractionalType {
220-
type JvmType = Float
221-
@transient lazy val tag = typeTag[JvmType]
222-
val numeric = implicitly[Numeric[Float]]
223-
val fractional = implicitly[Fractional[Float]]
224-
val ordering = implicitly[Ordering[JvmType]]
234+
private[sql] type JvmType = Float
235+
@transient private[sql] lazy val tag = typeTag[JvmType]
236+
private[sql] val numeric = implicitly[Numeric[Float]]
237+
private[sql] val fractional = implicitly[Fractional[Float]]
238+
private[sql] val ordering = implicitly[Ordering[JvmType]]
239+
def simpleString: String = "float"
225240
}
226241

227-
case class ArrayType(elementType: DataType) extends DataType
242+
case class ArrayType(elementType: DataType) extends DataType {
243+
private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = {
244+
builder.append(s"${prefix}-- element: ${elementType.simpleString}\n")
245+
elementType match {
246+
case array: ArrayType =>
247+
array.buildFormattedString(s"$prefix |", builder)
248+
case struct: StructType =>
249+
struct.buildFormattedString(s"$prefix |", builder)
250+
case map: MapType =>
251+
map.buildFormattedString(s"$prefix |", builder)
252+
case _ =>
253+
}
254+
}
228255

229-
case class StructField(name: String, dataType: DataType, nullable: Boolean)
256+
def simpleString: String = "array"
257+
}
258+
259+
case class StructField(name: String, dataType: DataType, nullable: Boolean) {
260+
261+
private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = {
262+
builder.append(s"${prefix}-- ${name}: ${dataType.simpleString} (nullable = ${nullable})\n")
263+
dataType match {
264+
case array: ArrayType =>
265+
array.buildFormattedString(s"$prefix |", builder)
266+
case struct: StructType =>
267+
struct.buildFormattedString(s"$prefix |", builder)
268+
case map: MapType =>
269+
map.buildFormattedString(s"$prefix |", builder)
270+
case _ =>
271+
}
272+
}
273+
}
230274

231275
object StructType {
232276
def fromAttributes(attributes: Seq[Attribute]): StructType = {
233277
StructType(attributes.map(a => StructField(a.name, a.dataType, a.nullable)))
234278
}
235279

280+
private def validateFields(fields: Seq[StructField]): Boolean =
281+
fields.map(field => field.name).distinct.size == fields.size
282+
236283
// def apply(fields: Seq[StructField]) = new StructType(fields.toIndexedSeq)
237284
}
238285

239286
case class StructType(fields: Seq[StructField]) extends DataType {
287+
require(StructType.validateFields(fields), "Found fields with the same name.")
288+
240289
def toAttributes = fields.map(f => AttributeReference(f.name, f.dataType, f.nullable)())
290+
291+
def formattedSchemaString: String = {
292+
val builder = new StringBuilder
293+
builder.append("root\n")
294+
val prefix = " |"
295+
fields.foreach(field => field.buildFormattedString(prefix, builder))
296+
297+
builder.toString()
298+
}
299+
300+
def printSchema(): Unit = println(formattedSchemaString)
301+
302+
private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = {
303+
fields.foreach(field => field.buildFormattedString(prefix, builder))
304+
}
305+
306+
def simpleString: String = "struct"
241307
}
242308

243-
case class MapType(keyType: DataType, valueType: DataType) extends DataType
309+
case class MapType(keyType: DataType, valueType: DataType) extends DataType {
310+
private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = {
311+
builder.append(s"${prefix}-- key: ${keyType.simpleString}\n")
312+
keyType match {
313+
case array: ArrayType =>
314+
array.buildFormattedString(s"$prefix |", builder)
315+
case struct: StructType =>
316+
struct.buildFormattedString(s"$prefix |", builder)
317+
case map: MapType =>
318+
map.buildFormattedString(s"$prefix |", builder)
319+
case _ =>
320+
}
321+
322+
builder.append(s"${prefix}-- value: ${valueType.simpleString}\n")
323+
valueType match {
324+
case array: ArrayType =>
325+
array.buildFormattedString(s"$prefix |", builder)
326+
case struct: StructType =>
327+
struct.buildFormattedString(s"$prefix |", builder)
328+
case map: MapType =>
329+
map.buildFormattedString(s"$prefix |", builder)
330+
case _ =>
331+
}
332+
}
333+
334+
def simpleString: String = "map"
335+
}

0 commit comments

Comments
 (0)