Skip to content

Commit a3ca551

Browse files
committed
Merge remote-tracking branch 'upstream/master' into UDAF
Conflicts: sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
2 parents e0afca3 + c07838b commit a3ca551

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+213
-65
lines changed

project/MimaExcludes.scala

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,53 @@ object MimaExcludes {
104104
// SPARK-7422 add argmax for sparse vectors
105105
ProblemFilters.exclude[MissingMethodProblem](
106106
"org.apache.spark.mllib.linalg.Vector.argmax")
107+
) ++ Seq(
108+
// SPARK-8906 Move all internal data source classes into execution.datasources
109+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.ResolvedDataSource"),
110+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.PreInsertCastAndRename$"),
111+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.CreateTableUsingAsSelect$"),
112+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.InsertIntoDataSource$"),
113+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.SqlNewHadoopPartition"),
114+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.PartitioningUtils$PartitionValues$"),
115+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.DefaultWriterContainer"),
116+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.PartitioningUtils$PartitionValues"),
117+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.RefreshTable$"),
118+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.CreateTempTableUsing$"),
119+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.PartitionSpec"),
120+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.DynamicPartitionWriterContainer"),
121+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.CreateTableUsingAsSelect"),
122+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.SqlNewHadoopRDD$"),
123+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.DescribeCommand$"),
124+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.PartitioningUtils$"),
125+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.SqlNewHadoopRDD"),
126+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.PreInsertCastAndRename"),
127+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.Partition$"),
128+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.LogicalRelation$"),
129+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.PartitioningUtils"),
130+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.LogicalRelation"),
131+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.Partition"),
132+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.BaseWriterContainer"),
133+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.PreWriteCheck"),
134+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.CreateTableUsing"),
135+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.RefreshTable"),
136+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.SqlNewHadoopRDD$NewHadoopMapPartitionsWithSplitRDD"),
137+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.DataSourceStrategy$"),
138+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.CreateTempTableUsing"),
139+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.CreateTempTableUsingAsSelect$"),
140+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.CreateTempTableUsingAsSelect"),
141+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.CreateTableUsing$"),
142+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.ResolvedDataSource$"),
143+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.PreWriteCheck$"),
144+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.InsertIntoDataSource"),
145+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.InsertIntoHadoopFsRelation"),
146+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.DDLParser"),
147+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.CaseInsensitiveMap"),
148+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$"),
149+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.DataSourceStrategy"),
150+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.SqlNewHadoopRDD$NewHadoopMapPartitionsWithSplitRDD$"),
151+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.PartitionSpec$"),
152+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.DescribeCommand"),
153+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.DDLException")
107154
)
108155

109156
case v if v.startsWith("1.4") =>

sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ import org.apache.spark.sql.catalyst.plans.logical.{Filter, _}
3838
import org.apache.spark.sql.catalyst.plans.{Inner, JoinType}
3939
import org.apache.spark.sql.catalyst.{CatalystTypeConverters, ScalaReflection, SqlParser}
4040
import org.apache.spark.sql.execution.{EvaluatePython, ExplainCommand, LogicalRDD}
41+
import org.apache.spark.sql.execution.datasources.CreateTableUsingAsSelect
4142
import org.apache.spark.sql.json.JacksonGenerator
42-
import org.apache.spark.sql.sources.CreateTableUsingAsSelect
4343
import org.apache.spark.sql.types._
4444
import org.apache.spark.storage.StorageLevel
4545
import org.apache.spark.util.Utils

sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,16 @@ package org.apache.spark.sql
2020
import java.util.Properties
2121

2222
import org.apache.hadoop.fs.Path
23-
import org.apache.spark.{Logging, Partition}
2423

24+
import org.apache.spark.{Logging, Partition}
2525
import org.apache.spark.annotation.Experimental
2626
import org.apache.spark.api.java.JavaRDD
2727
import org.apache.spark.deploy.SparkHadoopUtil
2828
import org.apache.spark.rdd.RDD
29+
import org.apache.spark.sql.execution.datasources.{ResolvedDataSource, LogicalRelation}
2930
import org.apache.spark.sql.jdbc.{JDBCPartition, JDBCPartitioningInfo, JDBCRelation}
3031
import org.apache.spark.sql.json.JSONRelation
3132
import org.apache.spark.sql.parquet.ParquetRelation2
32-
import org.apache.spark.sql.sources.{LogicalRelation, ResolvedDataSource}
3333
import org.apache.spark.sql.types.StructType
3434

3535
/**

sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ import java.util.Properties
2222
import org.apache.spark.annotation.Experimental
2323
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
2424
import org.apache.spark.sql.catalyst.plans.logical.InsertIntoTable
25+
import org.apache.spark.sql.execution.datasources.{CreateTableUsingAsSelect, ResolvedDataSource}
2526
import org.apache.spark.sql.jdbc.{JDBCWriteDetails, JdbcUtils}
26-
import org.apache.spark.sql.sources.{ResolvedDataSource, CreateTableUsingAsSelect}
2727

2828

2929
/**

sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,9 @@ import org.apache.spark.sql.catalyst.optimizer.{DefaultOptimizer, Optimizer}
3939
import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
4040
import org.apache.spark.sql.catalyst.rules.RuleExecutor
4141
import org.apache.spark.sql.catalyst.{InternalRow, ParserDialect, _}
42-
import org.apache.spark.sql.execution.{Filter, _}
43-
import org.apache.spark.sql.sources._
42+
import org.apache.spark.sql.execution._
43+
import org.apache.spark.sql.execution.datasources._
44+
import org.apache.spark.sql.sources.BaseRelation
4445
import org.apache.spark.sql.types._
4546
import org.apache.spark.unsafe.types.UTF8String
4647
import org.apache.spark.util.Utils
@@ -146,11 +147,11 @@ class SQLContext(@transient val sparkContext: SparkContext)
146147
new Analyzer(catalog, functionRegistry, conf) {
147148
override val extendedResolutionRules =
148149
ExtractPythonUDFs ::
149-
sources.PreInsertCastAndRename ::
150+
PreInsertCastAndRename ::
150151
Nil
151152

152153
override val extendedCheckRules = Seq(
153-
sources.PreWriteCheck(catalog)
154+
datasources.PreWriteCheck(catalog)
154155
)
155156
}
156157

sql/core/src/main/scala/org/apache/spark/sql/execution/GeneratedAggregate.scala

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,18 @@ case class GeneratedAggregate(
266266

267267
val joinedRow = new JoinedRow3
268268

269-
if (groupingExpressions.isEmpty) {
269+
if (!iter.hasNext) {
270+
// This is an empty input, so return early so that we do not allocate data structures
271+
// that won't be cleaned up (see SPARK-8357).
272+
if (groupingExpressions.isEmpty) {
273+
// This is a global aggregate, so return an empty aggregation buffer.
274+
val resultProjection = resultProjectionBuilder()
275+
Iterator(resultProjection(newAggregationBuffer(EmptyRow)))
276+
} else {
277+
// This is a grouped aggregate, so return an empty iterator.
278+
Iterator[InternalRow]()
279+
}
280+
} else if (groupingExpressions.isEmpty) {
270281
// TODO: Codegening anything other than the updateProjection is probably over kill.
271282
val buffer = newAggregationBuffer(EmptyRow).asInstanceOf[MutableRow]
272283
var currentRow: InternalRow = null
@@ -280,6 +291,7 @@ case class GeneratedAggregate(
280291
val resultProjection = resultProjectionBuilder()
281292
Iterator(resultProjection(buffer))
282293
} else if (unsafeEnabled) {
294+
assert(iter.hasNext, "There should be at least one row for this path")
283295
log.info("Using Unsafe-based aggregator")
284296
val aggregationMap = new UnsafeFixedWidthAggregationMap(
285297
newAggregationBuffer,

sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
package org.apache.spark.sql.execution
1919

20+
import org.apache.spark.sql.{SQLContext, Strategy, execution}
2021
import org.apache.spark.sql.catalyst.InternalRow
2122
import org.apache.spark.sql.catalyst.expressions._
2223
import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression2}
@@ -25,12 +26,10 @@ import org.apache.spark.sql.catalyst.plans._
2526
import org.apache.spark.sql.catalyst.plans.logical.{BroadcastHint, LogicalPlan}
2627
import org.apache.spark.sql.catalyst.plans.physical._
2728
import org.apache.spark.sql.columnar.{InMemoryColumnarTableScan, InMemoryRelation}
28-
import org.apache.spark.sql.execution.aggregate.{FinalAndCompleteAggregate2Sort, Aggregate2Sort}
2929
import org.apache.spark.sql.execution.{DescribeCommand => RunnableDescribeCommand, aggregate}
30+
import org.apache.spark.sql.execution.datasources.{CreateTableUsing, CreateTempTableUsing, DescribeCommand => LogicalDescribeCommand, _}
3031
import org.apache.spark.sql.parquet._
31-
import org.apache.spark.sql.sources.{CreateTableUsing, CreateTempTableUsing, DescribeCommand => LogicalDescribeCommand, _}
3232
import org.apache.spark.sql.types._
33-
import org.apache.spark.sql.{SQLContext, Strategy, execution}
3433

3534
private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
3635
self: SQLContext#SparkPlanner =>

sql/core/src/main/scala/org/apache/spark/sql/sources/SqlNewHadoopRDD.scala renamed to sql/core/src/main/scala/org/apache/spark/sql/execution/SqlNewHadoopRDD.scala

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,24 +15,23 @@
1515
* limitations under the License.
1616
*/
1717

18-
package org.apache.spark.sql.sources
18+
package org.apache.spark.sql.execution
1919

2020
import java.text.SimpleDateFormat
2121
import java.util.Date
2222

23+
import org.apache.spark.{Partition => SparkPartition, _}
2324
import org.apache.hadoop.conf.{Configurable, Configuration}
2425
import org.apache.hadoop.io.Writable
2526
import org.apache.hadoop.mapreduce._
2627
import org.apache.hadoop.mapreduce.lib.input.{CombineFileSplit, FileSplit}
27-
import org.apache.spark.broadcast.Broadcast
28-
29-
import org.apache.spark.{Partition => SparkPartition, _}
3028
import org.apache.spark.annotation.DeveloperApi
29+
import org.apache.spark.broadcast.Broadcast
3130
import org.apache.spark.deploy.SparkHadoopUtil
3231
import org.apache.spark.executor.DataReadMethod
3332
import org.apache.spark.mapreduce.SparkHadoopMapReduceUtil
34-
import org.apache.spark.rdd.{RDD, HadoopRDD}
3533
import org.apache.spark.rdd.NewHadoopRDD.NewHadoopMapPartitionsWithSplitRDD
34+
import org.apache.spark.rdd.{HadoopRDD, RDD}
3635
import org.apache.spark.storage.StorageLevel
3736
import org.apache.spark.util.{SerializableConfiguration, Utils}
3837

sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala renamed to sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,22 +15,21 @@
1515
* limitations under the License.
1616
*/
1717

18-
package org.apache.spark.sql.sources
18+
package org.apache.spark.sql.execution.datasources
1919

2020
import org.apache.spark.{Logging, TaskContext}
2121
import org.apache.spark.deploy.SparkHadoopUtil
2222
import org.apache.spark.rdd.{MapPartitionsRDD, RDD, UnionRDD}
23-
import org.apache.spark.sql._
24-
import org.apache.spark.sql.catalyst.InternalRow
25-
import org.apache.spark.sql.catalyst.expressions
23+
import org.apache.spark.sql.catalyst.{InternalRow, expressions}
2624
import org.apache.spark.sql.catalyst.expressions._
2725
import org.apache.spark.sql.catalyst.planning.PhysicalOperation
2826
import org.apache.spark.sql.catalyst.plans.logical
2927
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
28+
import org.apache.spark.sql.sources._
3029
import org.apache.spark.sql.types.{StringType, StructType}
31-
import org.apache.spark.sql.{SaveMode, Strategy, execution, sources}
32-
import org.apache.spark.util.{SerializableConfiguration, Utils}
30+
import org.apache.spark.sql.{SaveMode, Strategy, execution, sources, _}
3331
import org.apache.spark.unsafe.types.UTF8String
32+
import org.apache.spark.util.{SerializableConfiguration, Utils}
3433

3534
/**
3635
* A Strategy for planning scans over data sources defined using the sources API.

sql/core/src/main/scala/org/apache/spark/sql/sources/LogicalRelation.scala renamed to sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,12 @@
1414
* See the License for the specific language governing permissions and
1515
* limitations under the License.
1616
*/
17-
package org.apache.spark.sql.sources
17+
package org.apache.spark.sql.execution.datasources
1818

1919
import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
20-
import org.apache.spark.sql.catalyst.expressions.{AttributeReference, AttributeMap}
21-
import org.apache.spark.sql.catalyst.plans.logical.{Statistics, LeafNode, LogicalPlan}
20+
import org.apache.spark.sql.catalyst.expressions.{AttributeMap, AttributeReference}
21+
import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
22+
import org.apache.spark.sql.sources.BaseRelation
2223

2324
/**
2425
* Used to link a [[BaseRelation]] in to a logical query plan.

0 commit comments

Comments
 (0)