Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
f0a0063
add a property 'spark.sql.inMemoryColumnarStorage.codegen' to control…
kiszk Mar 25, 2016
8368eb5
add CachedBatch.column() to prepare a columnar storage that is access…
kiszk Mar 25, 2016
11659fc
add utility methods for putting a primitive value to ByteBuffer
kiszk Mar 25, 2016
8d77142
add ByteBufferColumnVector, which wraps ByteByffer for columnar stora…
kiszk Mar 25, 2016
7d6ad33
add APIs to get a CachedBatch
kiszk Mar 25, 2016
df2c9a3
add decompress() method
kiszk Mar 25, 2016
dafaa7f
add decompress() method
kiszk Mar 25, 2016
0771188
add ColumnVectorReference class
kiszk Mar 25, 2016
47a88e5
Do not call createHashMap() until actually executed at first
kiszk Mar 25, 2016
2b5bc2f
generate two version of Java codes for row-oriented and column-orient…
kiszk Mar 25, 2016
1f1e685
apply SPARK-14092 to generated code for columnar storage
kiszk Mar 25, 2016
568dc47
fix compilation error at runtime
kiszk Mar 25, 2016
5b5f037
drop debug print
kiszk Mar 25, 2016
6542af7
merge with the latest
kiszk Mar 31, 2016
c47dac8
rebase
kiszk Apr 10, 2016
55d3330
fix scalastyle error
kiszk Apr 12, 2016
7bef54f
fix scalastyle error
kiszk Apr 12, 2016
0d62d64
make source code shorter
kiszk Apr 13, 2016
7c06222
avoid memory leak due to twice allocations of hashMap
kiszk Apr 13, 2016
36af6ab
fix assertion error
kiszk Apr 13, 2016
eef56e4
make decompress() simple
kiszk Apr 14, 2016
6b47fb4
support nulls in decompress() for PassThru
kiszk Apr 16, 2016
9f30d2f
make it simple
kiszk Apr 16, 2016
78a8258
Walk over an iterator when a loop is finished since an aggregation is…
kiszk Apr 16, 2016
5f93490
remove unused code
kiszk Apr 17, 2016
a8708ab
Duplicate ByteBuffer in a column for CachedBatch
kiszk Apr 17, 2016
d456df7
Move string constants to WholeStageCodegen
kiszk Apr 17, 2016
257227f
avoid generating code for CachedBatch since these routine generates c…
kiszk Apr 17, 2016
598b9ff
Make code simple
kiszk Apr 17, 2016
9a79a57
Add benchmark suites with results
kiszk Apr 17, 2016
9943698
Reduce a conditinal branch and cast in generated Java code
kiszk Apr 17, 2016
1b7e26f
rebase
kiszk Apr 19, 2016
7426f6f
add new test suites for decompress() and codegen for Dataframe.cache()
kiszk Apr 19, 2016
8b192b5
bug fixes
kiszk Apr 19, 2016
f48c947
support accumulators in InMemoryColumnarTableScan
kiszk Apr 19, 2016
5f773c5
fix scalastyle and eol code
kiszk Apr 19, 2016
afdcf04
remove non-primitive field from CachedBatch to avoid performance over…
kiszk Apr 21, 2016
e921199
simplify Whole stage codegen by moving code for CachedBatch to other …
kiszk Apr 22, 2016
03a0e9d
resolved conflicts
kiszk Apr 23, 2016
06a665f
fix build error
kiszk Apr 23, 2016
116e78a
Supprt only float/double
kiszk May 4, 2016
a12f3ca
fix test failures
kiszk May 4, 2016
ff3e770
move most of implementations into InMemoryTableScanExec
kiszk May 8, 2016
6ec8d6a
fix build error
kiszk May 9, 2016
9a02ce5
rebase
kiszk Jun 10, 2016
697890c
rebase
kiszk Jun 29, 2016
d035c42
update benchmark
kiszk Jul 7, 2016
54aabef
fixed scala type error
kiszk Jul 7, 2016
4775db2
remove unnecessary final attribute
kiszk Jul 9, 2016
fea9f34
update
kiszk Jul 9, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,28 @@ object BindReferences extends Logging {
}.asInstanceOf[A] // Kind of a hack, but safe. TODO: Tighten return type when possible.
}
}

/**
* A column vector reference points to a specific column for ColumnVector.
* columnVar is a variable that keeps ColumnVector, and ordinal is row index in ColumnVector
*/
case class ColumnVectorReference(
columnVar: String, ordinal: String, dataType: DataType, nullable: Boolean)
extends LeafExpression {

override def toString: String = s"columnVector[$columnVar, $ordinal, ${dataType.simpleString}]"

override def eval(input: InternalRow): Any = null

override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val javaType = ctx.javaType(dataType)
val value = ctx.getValue(columnVar, dataType, ordinal)
if (nullable) {
ev.copy(code = s"""
boolean ${ev.isNull} = ${columnVar}.isNullAt($ordinal);
$javaType ${ev.value} = ${ev.isNull} ? ${ctx.defaultValue(dataType)} : ($value);""")
} else {
ev.copy(code = s"""$javaType ${ev.value} = $value;""", isNull = "false")
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -123,11 +123,12 @@ class CodegenContext {
*
* They will be kept as member variables in generated classes like `SpecificProjection`.
*/
val mutableStates: mutable.ArrayBuffer[(String, String, String)] =
mutable.ArrayBuffer.empty[(String, String, String)]
val mutableStates: mutable.ArrayBuffer[(String, String, String, String)] =
mutable.ArrayBuffer.empty[(String, String, String, String)]

def addMutableState(javaType: String, variableName: String, initCode: String): Unit = {
mutableStates += ((javaType, variableName, initCode))
def addMutableState(javaType: String, variableName: String, initCode: String,
cleanupCode: String = ""): Unit = {
mutableStates += ((javaType, variableName, initCode, cleanupCode))
}

/**
Expand All @@ -149,7 +150,7 @@ class CodegenContext {
def declareMutableStates(): String = {
// It's possible that we add same mutable state twice, e.g. the `mergeExpressions` in
// `TypedAggregateExpression`, we should call `distinct` here to remove the duplicated ones.
mutableStates.distinct.map { case (javaType, variableName, _) =>
mutableStates.distinct.map { case (javaType, variableName, _, _) =>
s"private $javaType $variableName;"
}.mkString("\n")
}
Expand All @@ -160,6 +161,9 @@ class CodegenContext {
mutableStates.distinct.map(_._3).mkString("\n")
}

def cleanupMutableStates(): String = {
mutableStates.map(_._4).mkString("\n")
}
/**
* Holding all the functions those will be added into generated class.
*/
Expand Down Expand Up @@ -206,6 +210,10 @@ class CodegenContext {
/** The variable name of the input row in generated code. */
final var INPUT_ROW = "i"

var isRow = true
var enableColumnCodeGen = false
var iteratorInput = ""

/**
* The map from a variable name to it's next ID.
*/
Expand Down
Loading