Commit 32cfd3e
[SPARK-24361][SQL] Polish code block manipulation API
## What changes were proposed in this pull request?
Current code block manipulation API is immature and hacky. We need a formal API to manipulate code blocks.
The basic idea is making `JavaCode` as `TreeNode`. So we can use familiar `transform` API to manipulate code blocks and expressions in code blocks.
For example, we can replace `SimpleExprValue` in a code block like this:
```scala
code.transformExprValues {
case SimpleExprValue("1 + 1", _) => aliasedParam
}
```
The example use case is splitting code to methods.
For example, we have an `ExprCode` containing generated code. But it is too long and we need to split it as method. Because statement-based expressions can't be directly passed into. We need to transform them as variables first:
```scala
def getExprValues(block: Block): Set[ExprValue] = block match {
case c: CodeBlock =>
c.blockInputs.collect {
case e: ExprValue => e
}.toSet
case _ => Set.empty
}
def currentCodegenInputs(ctx: CodegenContext): Set[ExprValue] = {
// Collects current variables in ctx.currentVars and ctx.INPUT_ROW.
// It looks roughly like...
ctx.currentVars.flatMap { v =>
getExprValues(v.code) ++ Set(v.value, v.isNull)
}.toSet + ctx.INPUT_ROW
}
// A code block of an expression contains too long code, making it as method
if (eval.code.length > 1024) {
val setIsNull = if (!eval.isNull.isInstanceOf[LiteralValue]) {
...
} else {
""
}
// Pick up variables and statements necessary to pass in.
val currentVars = currentCodegenInputs(ctx)
val varsPassIn = getExprValues(eval.code).intersect(currentVars)
val aliasedExprs = HashMap.empty[SimpleExprValue, VariableValue]
// Replace statement-based expressions which can't be directly passed in the method.
val newCode = eval.code.transform {
case block =>
block.transformExprValues {
case s: SimpleExprValue(_, javaType) if varsPassIn.contains(s) =>
if (aliasedExprs.contains(s)) {
aliasedExprs(s)
} else {
val aliasedVariable = JavaCode.variable(ctx.freshName("aliasedVar"), javaType)
aliasedExprs += s -> aliasedVariable
varsPassIn += aliasedVariable
aliasedVariable
}
}
}
val params = varsPassIn.filter(!_.isInstanceOf[SimpleExprValue])).map { variable =>
s"${variable.javaType.getName} ${variable.variableName}"
}.mkString(", ")
val funcName = ctx.freshName("nodeName")
val javaType = CodeGenerator.javaType(dataType)
val newValue = JavaCode.variable(ctx.freshName("value"), dataType)
val funcFullName = ctx.addNewFunction(funcName,
s"""
|private $javaType $funcName($params) {
| $newCode
| $setIsNull
| return ${eval.value};
|}
""".stripMargin))
eval.value = newValue
val args = varsPassIn.filter(!_.isInstanceOf[SimpleExprValue])).map { variable =>
s"${variable.variableName}"
}
// Create a code block to assign statements to aliased variables.
val createVariables = aliasedExprs.foldLeft(EmptyBlock) { (block, (statement, variable)) =>
block + code"${statement.javaType.getName} $variable = $statement;"
}
eval.code = createVariables + code"$javaType $newValue = $funcFullName($args);"
}
```
## How was this patch tested?
Added unite tests.
Author: Liang-Chi Hsieh <[email protected]>
Closes #21405 from viirya/codeblock-api.1 parent 4be9f0c commit 32cfd3e
File tree
2 files changed
+104
-19
lines changed- sql/catalyst/src
- main/scala/org/apache/spark/sql/catalyst/expressions/codegen
- test/scala/org/apache/spark/sql/catalyst/expressions/codegen
2 files changed
+104
-19
lines changedLines changed: 36 additions & 12 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
22 | 22 | | |
23 | 23 | | |
24 | 24 | | |
| 25 | + | |
25 | 26 | | |
26 | 27 | | |
27 | 28 | | |
| |||
118 | 119 | | |
119 | 120 | | |
120 | 121 | | |
121 | | - | |
| 122 | + | |
122 | 123 | | |
123 | 124 | | |
124 | | - | |
125 | | - | |
126 | | - | |
127 | 125 | | |
128 | 126 | | |
129 | 127 | | |
| |||
148 | 146 | | |
149 | 147 | | |
150 | 148 | | |
| 149 | + | |
| 150 | + | |
| 151 | + | |
| 152 | + | |
| 153 | + | |
| 154 | + | |
| 155 | + | |
| 156 | + | |
| 157 | + | |
| 158 | + | |
| 159 | + | |
| 160 | + | |
| 161 | + | |
| 162 | + | |
| 163 | + | |
| 164 | + | |
| 165 | + | |
| 166 | + | |
| 167 | + | |
| 168 | + | |
| 169 | + | |
| 170 | + | |
| 171 | + | |
| 172 | + | |
| 173 | + | |
| 174 | + | |
| 175 | + | |
| 176 | + | |
151 | 177 | | |
152 | 178 | | |
153 | 179 | | |
154 | 180 | | |
155 | 181 | | |
| 182 | + | |
| 183 | + | |
156 | 184 | | |
157 | 185 | | |
158 | 186 | | |
| |||
219 | 247 | | |
220 | 248 | | |
221 | 249 | | |
222 | | - | |
223 | | - | |
224 | | - | |
225 | | - | |
226 | | - | |
227 | | - | |
| 250 | + | |
| 251 | + | |
228 | 252 | | |
229 | 253 | | |
230 | 254 | | |
| |||
239 | 263 | | |
240 | 264 | | |
241 | 265 | | |
242 | | - | |
| 266 | + | |
243 | 267 | | |
244 | | - | |
| 268 | + | |
245 | 269 | | |
246 | 270 | | |
247 | 271 | | |
| |||
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala
Lines changed: 68 additions & 7 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
65 | 65 | | |
66 | 66 | | |
67 | 67 | | |
68 | | - | |
| 68 | + | |
| 69 | + | |
| 70 | + | |
69 | 71 | | |
70 | 72 | | |
71 | 73 | | |
| |||
94 | 96 | | |
95 | 97 | | |
96 | 98 | | |
97 | | - | |
| 99 | + | |
| 100 | + | |
| 101 | + | |
98 | 102 | | |
99 | 103 | | |
100 | 104 | | |
| |||
107 | 111 | | |
108 | 112 | | |
109 | 113 | | |
110 | | - | |
| 114 | + | |
111 | 115 | | |
112 | 116 | | |
113 | 117 | | |
| |||
120 | 124 | | |
121 | 125 | | |
122 | 126 | | |
123 | | - | |
124 | | - | |
125 | | - | |
| 127 | + | |
| 128 | + | |
| 129 | + | |
| 130 | + | |
126 | 131 | | |
127 | | - | |
128 | 132 | | |
129 | 133 | | |
130 | 134 | | |
| |||
133 | 137 | | |
134 | 138 | | |
135 | 139 | | |
| 140 | + | |
| 141 | + | |
| 142 | + | |
| 143 | + | |
| 144 | + | |
| 145 | + | |
| 146 | + | |
| 147 | + | |
| 148 | + | |
| 149 | + | |
| 150 | + | |
| 151 | + | |
| 152 | + | |
| 153 | + | |
| 154 | + | |
| 155 | + | |
| 156 | + | |
| 157 | + | |
| 158 | + | |
| 159 | + | |
| 160 | + | |
| 161 | + | |
| 162 | + | |
| 163 | + | |
| 164 | + | |
| 165 | + | |
| 166 | + | |
| 167 | + | |
| 168 | + | |
| 169 | + | |
| 170 | + | |
| 171 | + | |
| 172 | + | |
| 173 | + | |
| 174 | + | |
| 175 | + | |
| 176 | + | |
| 177 | + | |
| 178 | + | |
| 179 | + | |
| 180 | + | |
| 181 | + | |
| 182 | + | |
| 183 | + | |
| 184 | + | |
| 185 | + | |
| 186 | + | |
| 187 | + | |
| 188 | + | |
| 189 | + | |
| 190 | + | |
| 191 | + | |
| 192 | + | |
| 193 | + | |
| 194 | + | |
| 195 | + | |
| 196 | + | |
136 | 197 | | |
0 commit comments