diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index fd92e526e1529..76ad69f9fe92c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -338,6 +338,7 @@ class DataFrameReader private[sql](sqlContext: SQLContext) extends Logging { * @since 1.4.0 */ def json(jsonRDD: RDD[String]): DataFrame = { + option("caseSensitiveAnalysis", sqlContext.conf.caseSensitiveAnalysis) val parsedOptions: JSONOptions = new JSONOptions(extraOptions.toMap) val schema = userSpecifiedSchema.getOrElse { InferSchema.infer(jsonRDD, sqlContext.conf.columnNameOfCorruptRecord, parsedOptions) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/InferSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/InferSchema.scala index 0937a213c984f..d56df927bd764 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/InferSchema.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/InferSchema.scala @@ -99,8 +99,10 @@ private[sql] object InferSchema { case START_OBJECT => val builder = Seq.newBuilder[StructField] while (nextUntil(parser, END_OBJECT)) { + val tn = parser.getCurrentName + val name = if (configOptions.caseSensitiveAnalysis) tn else tn.toLowerCase builder += StructField( - parser.getCurrentName, + name, inferField(parser, configOptions), nullable = true) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONOptions.scala index e59dbd6b3d438..aaf8702e0e2a7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONOptions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONOptions.scala @@ -49,6 +49,9 @@ private[sql] class JSONOptions( val allowBackslashEscapingAnyCharacter = parameters.get("allowBackslashEscapingAnyCharacter").map(_.toBoolean).getOrElse(false) val compressionCodec = parameters.get("compression").map(CompressionCodecs.getCodecClassName) + val caseSensitiveAnalysis = + parameters.get("caseSensitiveAnalysis").map(_.toBoolean).getOrElse(true) + /** Sets config options on a Jackson [[JsonFactory]]. */ def setJacksonOptions(factory: JsonFactory): Unit = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 55153cda31e0a..60554203df59e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -1340,4 +1340,12 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { // another invalid table name test as below intercept[AnalysisException](df.registerTempTable("table!#")) } + + test("SPARK-13493: Enable case sensitiveness in json schema inference") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + val data = List("{'field': 1}", "{'field': 2}", "{'field': 3}", "{'FIELD': 4}") + val df = sqlContext.read.json(sc.parallelize(data)) + assert(df.schema.map(_.name) === Seq("field")) + } + } }