Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
84 commits
Select commit Hold shift + click to select a range
01e4cdf
Merge remote-tracking branch 'upstream/master'
gatorsmile Nov 13, 2015
6835704
Merge remote-tracking branch 'upstream/master'
gatorsmile Nov 14, 2015
9180687
Merge remote-tracking branch 'upstream/master'
gatorsmile Nov 14, 2015
b38a21e
SPARK-11633
gatorsmile Nov 17, 2015
d2b84af
Merge remote-tracking branch 'upstream/master' into joinMakeCopy
gatorsmile Nov 17, 2015
fda8025
Merge remote-tracking branch 'upstream/master'
gatorspark Nov 17, 2015
ac0dccd
Merge branch 'master' of https://github.com/gatorsmile/spark
gatorspark Nov 17, 2015
6e0018b
Merge remote-tracking branch 'upstream/master'
Nov 20, 2015
0546772
converge
gatorsmile Nov 20, 2015
b37a64f
converge
gatorsmile Nov 20, 2015
c2a872c
Merge remote-tracking branch 'upstream/master'
gatorsmile Jan 6, 2016
ab6dbd7
Merge remote-tracking branch 'upstream/master'
gatorsmile Jan 6, 2016
4276356
Merge remote-tracking branch 'upstream/master'
gatorsmile Jan 6, 2016
2dab708
Merge remote-tracking branch 'upstream/master'
gatorsmile Jan 7, 2016
0458770
Merge remote-tracking branch 'upstream/master'
gatorsmile Jan 8, 2016
1debdfa
Merge remote-tracking branch 'upstream/master'
gatorsmile Jan 9, 2016
763706d
Merge remote-tracking branch 'upstream/master'
gatorsmile Jan 14, 2016
4de6ec1
Merge remote-tracking branch 'upstream/master'
gatorsmile Jan 18, 2016
9422a4f
Merge remote-tracking branch 'upstream/master'
gatorsmile Jan 19, 2016
52bdf48
Merge remote-tracking branch 'upstream/master'
gatorsmile Jan 20, 2016
1e95df3
Merge remote-tracking branch 'upstream/master'
gatorsmile Jan 23, 2016
fab24cf
Merge remote-tracking branch 'upstream/master'
gatorsmile Feb 1, 2016
8b2e33b
Merge remote-tracking branch 'upstream/master'
gatorsmile Feb 5, 2016
2ee1876
Merge remote-tracking branch 'upstream/master'
gatorsmile Feb 11, 2016
b9f0090
Merge remote-tracking branch 'upstream/master'
gatorsmile Feb 12, 2016
ade6f7e
Merge remote-tracking branch 'upstream/master'
gatorsmile Feb 15, 2016
9fd63d2
Merge remote-tracking branch 'upstream/master'
gatorsmile Feb 19, 2016
5199d49
Merge remote-tracking branch 'upstream/master'
gatorsmile Feb 22, 2016
404214c
Merge remote-tracking branch 'upstream/master'
gatorsmile Feb 23, 2016
c001dd9
Merge remote-tracking branch 'upstream/master'
gatorsmile Feb 25, 2016
59daa48
Merge remote-tracking branch 'upstream/master'
gatorsmile Mar 5, 2016
41d5f64
Merge remote-tracking branch 'upstream/master'
gatorsmile Mar 7, 2016
472a6e3
Merge remote-tracking branch 'upstream/master'
gatorsmile Mar 10, 2016
0fba10a
Merge remote-tracking branch 'upstream/master'
gatorsmile Mar 12, 2016
cbf73b3
Merge remote-tracking branch 'upstream/master'
gatorsmile Mar 21, 2016
c08f561
Merge remote-tracking branch 'upstream/master'
gatorsmile Mar 22, 2016
474df88
Merge remote-tracking branch 'upstream/master'
gatorsmile Mar 22, 2016
3d9828d
Merge remote-tracking branch 'upstream/master'
gatorsmile Mar 24, 2016
72d2361
Merge remote-tracking branch 'upstream/master'
gatorsmile Mar 26, 2016
07afea5
Merge remote-tracking branch 'upstream/master'
gatorsmile Mar 29, 2016
8bf2007
Merge remote-tracking branch 'upstream/master'
gatorsmile Mar 30, 2016
87a165b
Merge remote-tracking branch 'upstream/master'
gatorsmile Mar 31, 2016
b9359cd
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 1, 2016
65bd090
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 5, 2016
babf2da
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 5, 2016
9e09469
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 6, 2016
50a8e4a
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 6, 2016
f3337fa
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 10, 2016
09cc36d
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 12, 2016
83a1915
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 14, 2016
0483145
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 19, 2016
236a5f4
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 20, 2016
08aaa4d
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 21, 2016
64f704e
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 24, 2016
006ea2d
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 26, 2016
0c0dc8a
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 27, 2016
7c4b2f0
Merge remote-tracking branch 'upstream/master'
gatorsmile May 1, 2016
38f3af9
Merge remote-tracking branch 'upstream/master'
gatorsmile May 1, 2016
8089c6f
Merge remote-tracking branch 'upstream/master'
gatorsmile May 4, 2016
a6c7518
Merge remote-tracking branch 'upstream/master'
gatorsmile May 4, 2016
546c1db
Merge remote-tracking branch 'upstream/master'
gatorsmile May 4, 2016
e2ece35
Merge remote-tracking branch 'upstream/master'
gatorsmile May 5, 2016
13c04be
Merge remote-tracking branch 'upstream/master'
gatorsmile May 6, 2016
ac88fc1
Merge remote-tracking branch 'upstream/master'
gatorsmile May 6, 2016
154d3df
Merge remote-tracking branch 'upstream/master'
gatorsmile May 10, 2016
412e88a
Merge remote-tracking branch 'upstream/master'
gatorsmile May 10, 2016
c570065
Merge remote-tracking branch 'upstream/master'
gatorsmile May 11, 2016
ac03674
Merge remote-tracking branch 'upstream/master'
gatorsmile May 11, 2016
650cdcc
Merge remote-tracking branch 'upstream/master'
gatorsmile May 15, 2016
29d16c1
Merge remote-tracking branch 'upstream/master'
gatorsmile May 20, 2016
8d02eea
Merge remote-tracking branch 'upstream/master'
gatorsmile May 22, 2016
948340b
fix.
gatorsmile May 24, 2016
f6a4517
code clean
gatorsmile May 25, 2016
26d72ae
better output
gatorsmile May 25, 2016
c313ffc
style fix.
gatorsmile May 25, 2016
3ac2b93
address comments.
gatorsmile May 25, 2016
76f4f80
address comments.
gatorsmile May 26, 2016
b9e12f8
fix test case.
gatorsmile May 26, 2016
c1217e1
Merge remote-tracking branch 'upstream/master' into runSQLAgainstFile
gatorsmile May 26, 2016
c752518
Merge remote-tracking branch 'upstream/master'
gatorsmile May 26, 2016
db0f48c
Merge remote-tracking branch 'upstream/master'
gatorsmile May 27, 2016
11387e2
Merge branch 'runSQLAgainstFile' into runSQLAgainstFileNew
gatorsmile May 27, 2016
e5c08f2
address comments
gatorsmile May 28, 2016
9fae469
address comments.
gatorsmile Jun 1, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -131,28 +131,20 @@ case class DataSource(
// Found the data source using fully qualified path
dataSource
case Failure(error) =>
if (error.isInstanceOf[ClassNotFoundException]) {
val className = error.getMessage
if (spark2RemovedClasses.contains(className)) {
throw new ClassNotFoundException(s"$className is removed in Spark 2.0. " +
"Please check if your library is compatible with Spark 2.0")
}
}
if (provider.startsWith("org.apache.spark.sql.hive.orc")) {
throw new ClassNotFoundException(
"The ORC data source must be used with Hive support enabled.", error)
if (provider.toLowerCase == "orc" ||
provider.startsWith("org.apache.spark.sql.hive.orc")) {
throw new AnalysisException(
"The ORC data source must be used with Hive support enabled")
} else if (provider.toLowerCase == "avro" ||
provider == "com.databricks.spark.avro") {
throw new AnalysisException(
s"Failed to find data source: ${provider.toLowerCase}. Please use Spark " +
"package http://spark-packages.org/package/databricks/spark-avro")
} else {
if (provider == "avro" || provider == "com.databricks.spark.avro") {
throw new ClassNotFoundException(
s"Failed to find data source: $provider. Please use Spark package " +
"http://spark-packages.org/package/databricks/spark-avro",
error)
} else {
throw new ClassNotFoundException(
s"Failed to find data source: $provider. Please find packages at " +
"http://spark-packages.org",
error)
}
throw new ClassNotFoundException(
s"Failed to find data source: $provider. Please find packages at " +
"http://spark-packages.org",
error)
}
}
} catch {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

package org.apache.spark.sql.execution.datasources

import scala.util.control.NonFatal

import org.apache.spark.sql.{AnalysisException, SaveMode, SparkSession}
import org.apache.spark.sql.catalyst.analysis._
import org.apache.spark.sql.catalyst.catalog.SessionCatalog
Expand All @@ -28,7 +30,7 @@ import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.sources.{BaseRelation, InsertableRelation}

/**
* Try to replaces [[UnresolvedRelation]]s with [[ResolvedDataSource]].
* Try to replaces [[UnresolvedRelation]]s with [[ResolveDataSource]].
*/
private[sql] class ResolveDataSource(sparkSession: SparkSession) extends Rule[LogicalPlan] {
def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
Expand All @@ -38,6 +40,16 @@ private[sql] class ResolveDataSource(sparkSession: SparkSession) extends Rule[Lo
sparkSession,
paths = u.tableIdentifier.table :: Nil,
className = u.tableIdentifier.database.get)

val notSupportDirectQuery = try {
!classOf[FileFormat].isAssignableFrom(dataSource.providingClass)
} catch {
case NonFatal(e) => false
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When would this happen ?

Should true be returned here ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tedyu If people use select * from db_name.table_name, it will throw an exception. Still need to continue for such cases.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks Ryan.

}
if (notSupportDirectQuery) {
throw new AnalysisException("Unsupported data source type for direct query on files: " +
s"${u.tableIdentifier.database.get}")
}
val plan = LogicalRelation(dataSource.resolveRelation())
u.alias.map(a => SubqueryAlias(u.alias.get, plan)).getOrElse(plan)
} catch {
Expand Down
53 changes: 47 additions & 6 deletions sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1838,20 +1838,61 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
df)
})

val e1 = intercept[AnalysisException] {
var e = intercept[AnalysisException] {
sql("select * from in_valid_table")
}
assert(e1.message.contains("Table or view not found"))
assert(e.message.contains("Table or view not found"))

val e2 = intercept[AnalysisException] {
e = intercept[AnalysisException] {
sql("select * from no_db.no_table").show()
}
assert(e2.message.contains("Table or view not found"))
assert(e.message.contains("Table or view not found"))

val e3 = intercept[AnalysisException] {
e = intercept[AnalysisException] {
sql("select * from json.invalid_file")
}
assert(e3.message.contains("Path does not exist"))
assert(e.message.contains("Path does not exist"))

e = intercept[AnalysisException] {
sql(s"select id from `org.apache.spark.sql.hive.orc`.`file_path`")
}
assert(e.message.contains("The ORC data source must be used with Hive support enabled"))

e = intercept[AnalysisException] {
sql(s"select id from `com.databricks.spark.avro`.`file_path`")
}
assert(e.message.contains("Failed to find data source: com.databricks.spark.avro. " +
"Please use Spark package http://spark-packages.org/package/databricks/spark-avro"))

// data source type is case insensitive
e = intercept[AnalysisException] {
sql(s"select id from Avro.`file_path`")
}
assert(e.message.contains("Failed to find data source: avro. Please use Spark package " +
"http://spark-packages.org/package/databricks/spark-avro"))

e = intercept[AnalysisException] {
sql(s"select id from avro.`file_path`")
}
assert(e.message.contains("Failed to find data source: avro. Please use Spark package " +
"http://spark-packages.org/package/databricks/spark-avro"))

e = intercept[AnalysisException] {
sql(s"select id from `org.apache.spark.sql.sources.HadoopFsRelationProvider`.`file_path`")
}
assert(e.message.contains("Table or view not found: " +
"`org.apache.spark.sql.sources.HadoopFsRelationProvider`.`file_path`"))

e = intercept[AnalysisException] {
sql(s"select id from `Jdbc`.`file_path`")
}
assert(e.message.contains("Unsupported data source type for direct query on files: Jdbc"))

e = intercept[AnalysisException] {
sql(s"select id from `org.apache.spark.sql.execution.datasources.jdbc`.`file_path`")
}
assert(e.message.contains("Unsupported data source type for direct query on files: " +
"org.apache.spark.sql.execution.datasources.jdbc"))
}

test("SortMergeJoin returns wrong results when using UnsafeRows") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql.sources

import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.{AnalysisException, SQLContext}
import org.apache.spark.sql.test.SharedSQLContext
import org.apache.spark.sql.types.{StringType, StructField, StructType}

Expand All @@ -42,9 +42,10 @@ class DDLSourceLoadSuite extends DataSourceTest with SharedSQLContext {
}

test("should fail to load ORC without Hive Support") {
intercept[ClassNotFoundException] {
val e = intercept[AnalysisException] {
spark.read.format("orc").load()
}
assert(e.message.contains("The ORC data source must be used with Hive support enabled"))
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.spark.sql.sources

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.execution.datasources.DataSource

class ResolvedDataSourceSuite extends SparkFunSuite {
Expand Down Expand Up @@ -60,13 +61,22 @@ class ResolvedDataSourceSuite extends SparkFunSuite {
classOf[org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat])
}

test("csv") {
assert(
getProvidingClass("csv") ===
classOf[org.apache.spark.sql.execution.datasources.csv.CSVFileFormat])
assert(
getProvidingClass("com.databricks.spark.csv") ===
classOf[org.apache.spark.sql.execution.datasources.csv.CSVFileFormat])
}

test("error message for unknown data sources") {
val error1 = intercept[ClassNotFoundException] {
val error1 = intercept[AnalysisException] {
getProvidingClass("avro")
}
assert(error1.getMessage.contains("spark-packages"))

val error2 = intercept[ClassNotFoundException] {
val error2 = intercept[AnalysisException] {
getProvidingClass("com.databricks.spark.avro")
}
assert(error2.getMessage.contains("spark-packages"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1184,11 +1184,12 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
}
}

test("run sql directly on files") {
test("run sql directly on files - parquet") {
val df = spark.range(100).toDF()
withTempPath(f => {
df.write.parquet(f.getCanonicalPath)
checkAnswer(sql(s"select id from parquet.`${f.getCanonicalPath}`"),
// data source type is case insensitive
checkAnswer(sql(s"select id from Parquet.`${f.getCanonicalPath}`"),
df)
checkAnswer(sql(s"select id from `org.apache.spark.sql.parquet`.`${f.getCanonicalPath}`"),
df)
Expand All @@ -1197,6 +1198,49 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
})
}

test("run sql directly on files - orc") {
val df = spark.range(100).toDF()
withTempPath(f => {
df.write.orc(f.getCanonicalPath)
// data source type is case insensitive
checkAnswer(sql(s"select id from ORC.`${f.getCanonicalPath}`"),
df)
checkAnswer(sql(s"select id from `org.apache.spark.sql.hive.orc`.`${f.getCanonicalPath}`"),
df)
checkAnswer(sql(s"select a.id from orc.`${f.getCanonicalPath}` as a"),
df)
})
}

test("run sql directly on files - csv") {
val df = spark.range(100).toDF()
withTempPath(f => {
df.write.csv(f.getCanonicalPath)
// data source type is case insensitive
checkAnswer(sql(s"select cast(_c0 as int) id from CSV.`${f.getCanonicalPath}`"),
df)
checkAnswer(
sql(s"select cast(_c0 as int) id from `com.databricks.spark.csv`.`${f.getCanonicalPath}`"),
df)
checkAnswer(sql(s"select cast(a._c0 as int) id from csv.`${f.getCanonicalPath}` as a"),
df)
})
}

test("run sql directly on files - json") {
val df = spark.range(100).toDF()
withTempPath(f => {
df.write.json(f.getCanonicalPath)
// data source type is case insensitive
checkAnswer(sql(s"select id from jsoN.`${f.getCanonicalPath}`"),
df)
checkAnswer(sql(s"select id from `org.apache.spark.sql.json`.`${f.getCanonicalPath}`"),
df)
checkAnswer(sql(s"select a.id from json.`${f.getCanonicalPath}` as a"),
df)
})
}

test("SPARK-8976 Wrong Result for Rollup #1") {
checkAnswer(sql(
"SELECT count(*) AS cnt, key % 5, grouping_id() FROM src GROUP BY key%5 WITH ROLLUP"),
Expand Down