Skip to content

Commit aa70660

Browse files
committed
framework of CachedColumnarRDD
1 parent 6e37fa2 commit aa70660

File tree

1 file changed

+62
-0
lines changed

1 file changed

+62
-0
lines changed
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.execution.columnar
19+
20+
import scala.reflect.ClassTag
21+
22+
import org.apache.spark.{Dependency, Partition, SparkContext, TaskContext}
23+
import org.apache.spark.rdd.RDD
24+
import org.apache.spark.sql.catalyst.InternalRow
25+
26+
private[columnar] class CachedColumnarRDDPartition(
27+
partitionIndex: Int,
28+
columnnStats: Array[InternalRow]) extends Partition {
29+
30+
override def index: Int = partitionIndex
31+
32+
def columnStats: Array[InternalRow] = columnnStats
33+
}
34+
35+
private[columnar] class CachedColumnarRDD[T: ClassTag](
36+
@transient private var _sc: SparkContext,
37+
@transient private var deps: Seq[Dependency[_]],
38+
dataRDD: RDD[T],
39+
partitionStats: Array[Array[InternalRow]]) extends RDD[T](_sc, deps) {
40+
41+
/**
42+
* :: DeveloperApi ::
43+
* Implemented by subclasses to compute a given partition.
44+
*/
45+
override def compute(split: Partition, context: TaskContext): Iterator[T] = {
46+
Iterator()
47+
}
48+
49+
/**
50+
* Implemented by subclasses to return the set of partitions in this RDD. This method will only
51+
* be called once, so it is safe to implement a time-consuming computation in it.
52+
*
53+
* The partitions in this array must satisfy the following property:
54+
* `rdd.partitions.zipWithIndex.forall { case (partition, index) => partition.index == index }`
55+
*/
56+
override protected def getPartitions: Array[Partition] = {
57+
partitionStats.zipWithIndex.map {
58+
case (statsRow, index) =>
59+
new CachedColumnarRDDPartition(index, statsRow)
60+
}
61+
}
62+
}

0 commit comments

Comments
 (0)