Skip to content

Commit dfe4bd7

Browse files
author
Andrew Or
committed
Merge branch 'master' of github.com:apache/spark into rest
2 parents b9e2a08 + 6d3b7cb commit dfe4bd7

File tree

7 files changed

+66
-40
lines changed

7 files changed

+66
-40
lines changed

core/pom.xml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,13 @@
132132
<artifactId>jetty-servlet</artifactId>
133133
<scope>compile</scope>
134134
</dependency>
135+
<!-- Because we mark jetty as provided and shade it, its dependency
136+
orbit is ignored, so we explicitly list it here (see SPARK-5557).-->
137+
<dependency>
138+
<groupId>org.eclipse.jetty.orbit</groupId>
139+
<artifactId>javax.servlet</artifactId>
140+
<version>${orbit.version}</version>
141+
</dependency>
135142

136143
<dependency>
137144
<groupId>org.apache.commons</groupId>

pom.xml

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,8 @@
135135
<parquet.version>1.6.0rc3</parquet.version>
136136
<jblas.version>1.2.3</jblas.version>
137137
<jetty.version>8.1.14.v20131031</jetty.version>
138+
<orbit.version>3.0.0.v201112011016</orbit.version>
138139
<chill.version>0.5.0</chill.version>
139-
<kryo.version>2.24.0</kryo.version>
140140
<ivy.version>2.4.0</ivy.version>
141141
<oro.version>2.0.8</oro.version>
142142
<codahale.metrics.version>3.1.0</codahale.metrics.version>
@@ -342,13 +342,7 @@
342342
</exclusion>
343343
</exclusions>
344344
</dependency>
345-
<!-- Bump kryo version (included via chill) due to SPARK-5607 -->
346-
<dependency>
347-
<groupId>com.esotericsoftware.kryo</groupId>
348-
<artifactId>kryo</artifactId>
349-
<version>${kryo.version}</version>
350-
</dependency>
351-
345+
352346
<!-- Shaded deps marked as provided. These are promoted to compile scope
353347
in the modules where we want the shaded classes to appear in the
354348
associated jar. -->

project/SparkBuild.scala

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,10 @@ object Unidoc {
374374
),
375375
"-group", "Spark SQL", packageList("sql.api.java", "sql.api.java.types", "sql.hive.api.java"),
376376
"-noqualifier", "java.lang"
377-
)
377+
),
378+
379+
// Group similar methods together based on the @group annotation.
380+
scalacOptions in (ScalaUnidoc, unidoc) ++= Seq("-groups")
378381
)
379382
}
380383

sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,9 @@ private[sql] class DataFrameImpl protected[sql](
5353
def this(sqlContext: SQLContext, logicalPlan: LogicalPlan) = {
5454
this(sqlContext, {
5555
val qe = sqlContext.executePlan(logicalPlan)
56-
qe.analyzed // This should force analysis and throw errors if there are any
56+
if (sqlContext.conf.dataFrameEagerAnalysis) {
57+
qe.analyzed // This should force analysis and throw errors if there are any
58+
}
5759
qe
5860
})
5961
}

sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ private[spark] object SQLConf {
5252
// This is used to set the default data source
5353
val DEFAULT_DATA_SOURCE_NAME = "spark.sql.default.datasource"
5454

55+
// Whether to perform eager analysis on a DataFrame.
56+
val DATAFRAME_EAGER_ANALYSIS = "spark.sql.dataframe.eagerAnalysis"
57+
5558
object Deprecated {
5659
val MAPRED_REDUCE_TASKS = "mapred.reduce.tasks"
5760
}
@@ -173,6 +176,9 @@ private[sql] class SQLConf extends Serializable {
173176
private[spark] def defaultDataSourceName: String =
174177
getConf(DEFAULT_DATA_SOURCE_NAME, "org.apache.spark.sql.parquet")
175178

179+
private[spark] def dataFrameEagerAnalysis: Boolean =
180+
getConf(DATAFRAME_EAGER_ANALYSIS, "true").toBoolean
181+
176182
/** ********************** SQLConf functionality methods ************ */
177183

178184
/** Set Spark SQL configuration properties. */

sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,23 @@
1717

1818
package org.apache.spark.sql
1919

20+
import scala.language.postfixOps
21+
2022
import org.apache.spark.sql.Dsl._
2123
import org.apache.spark.sql.types._
22-
23-
/* Implicits */
24+
import org.apache.spark.sql.test.TestSQLContext
2425
import org.apache.spark.sql.test.TestSQLContext.logicalPlanToSparkQuery
2526
import org.apache.spark.sql.test.TestSQLContext.implicits._
2627

27-
import scala.language.postfixOps
2828

2929
class DataFrameSuite extends QueryTest {
3030
import org.apache.spark.sql.TestData._
3131

3232
test("analysis error should be eagerly reported") {
33+
val oldSetting = TestSQLContext.conf.dataFrameEagerAnalysis
34+
// Eager analysis.
35+
TestSQLContext.setConf(SQLConf.DATAFRAME_EAGER_ANALYSIS, "true")
36+
3337
intercept[Exception] { testData.select('nonExistentName) }
3438
intercept[Exception] {
3539
testData.groupBy('key).agg(Map("nonExistentName" -> "sum"))
@@ -40,6 +44,13 @@ class DataFrameSuite extends QueryTest {
4044
intercept[Exception] {
4145
testData.groupBy($"abcd").agg(Map("key" -> "sum"))
4246
}
47+
48+
// No more eager analysis once the flag is turned off
49+
TestSQLContext.setConf(SQLConf.DATAFRAME_EAGER_ANALYSIS, "false")
50+
testData.select('nonExistentName)
51+
52+
// Set the flag back to original value before this test.
53+
TestSQLContext.setConf(SQLConf.DATAFRAME_EAGER_ANALYSIS, oldSetting.toString)
4354
}
4455

4556
test("table scan") {

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala

Lines changed: 30 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,10 @@
1717

1818
package org.apache.spark.sql.hive.execution
1919

20-
import org.apache.spark.sql.QueryTest
21-
22-
import org.apache.spark.sql.Row
20+
import org.apache.spark.sql.hive.HiveShim
2321
import org.apache.spark.sql.hive.test.TestHive._
2422
import org.apache.spark.sql.types._
25-
import org.apache.spark.util.Utils
26-
import org.apache.spark.sql.hive.HiveShim
23+
import org.apache.spark.sql.{QueryTest, Row, SQLConf}
2724

2825
case class Nested1(f1: Nested2)
2926
case class Nested2(f2: Nested3)
@@ -109,28 +106,34 @@ class SQLQuerySuite extends QueryTest {
109106
)
110107

111108
if (HiveShim.version =="0.13.1") {
112-
sql(
113-
"""CREATE TABLE ctas5
114-
| STORED AS parquet AS
115-
| SELECT key, value
116-
| FROM src
117-
| ORDER BY key, value""".stripMargin).collect
118-
119-
checkExistence(sql("DESC EXTENDED ctas5"), true,
120-
"name:key", "type:string", "name:value", "ctas5",
121-
"org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat",
122-
"org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat",
123-
"org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe",
124-
"MANAGED_TABLE"
125-
)
126-
127-
val default = getConf("spark.sql.hive.convertMetastoreParquet", "true")
128-
// use the Hive SerDe for parquet tables
129-
sql("set spark.sql.hive.convertMetastoreParquet = false")
130-
checkAnswer(
131-
sql("SELECT key, value FROM ctas5 ORDER BY key, value"),
132-
sql("SELECT key, value FROM src ORDER BY key, value").collect().toSeq)
133-
sql(s"set spark.sql.hive.convertMetastoreParquet = $default")
109+
val origUseParquetDataSource = conf.parquetUseDataSourceApi
110+
try {
111+
setConf(SQLConf.PARQUET_USE_DATA_SOURCE_API, "false")
112+
sql(
113+
"""CREATE TABLE ctas5
114+
| STORED AS parquet AS
115+
| SELECT key, value
116+
| FROM src
117+
| ORDER BY key, value""".stripMargin).collect()
118+
119+
checkExistence(sql("DESC EXTENDED ctas5"), true,
120+
"name:key", "type:string", "name:value", "ctas5",
121+
"org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat",
122+
"org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat",
123+
"org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe",
124+
"MANAGED_TABLE"
125+
)
126+
127+
val default = getConf("spark.sql.hive.convertMetastoreParquet", "true")
128+
// use the Hive SerDe for parquet tables
129+
sql("set spark.sql.hive.convertMetastoreParquet = false")
130+
checkAnswer(
131+
sql("SELECT key, value FROM ctas5 ORDER BY key, value"),
132+
sql("SELECT key, value FROM src ORDER BY key, value").collect().toSeq)
133+
sql(s"set spark.sql.hive.convertMetastoreParquet = $default")
134+
} finally {
135+
setConf(SQLConf.PARQUET_USE_DATA_SOURCE_API, origUseParquetDataSource.toString)
136+
}
134137
}
135138
}
136139

0 commit comments

Comments
 (0)