Skip to content

Commit 9fe3adc

Browse files
chenghao-intelrxin
authored andcommitted
[SPARK-8248][SQL] string function: length
Author: Cheng Hao <[email protected]> Closes apache#6724 from chenghao-intel/length and squashes the following commits: aaa3c31 [Cheng Hao] revert the additional change 97148a9 [Cheng Hao] remove the codegen testing temporally ae08003 [Cheng Hao] update the comments 1eb1fd1 [Cheng Hao] simplify the code as commented 3e92d32 [Cheng Hao] use the selectExpr in unit test intead of SQLQuery 3c729aa [Cheng Hao] fix bug for constant null value in codegen 3641f06 [Cheng Hao] keep the length() method for registered function 8e30171 [Cheng Hao] update the code as comment db604ae [Cheng Hao] Add code gen support 548d2ef [Cheng Hao] register the length() 09a0738 [Cheng Hao] add length support
1 parent 4e42842 commit 9fe3adc

File tree

6 files changed

+82
-5
lines changed

6 files changed

+82
-5
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -89,14 +89,10 @@ object FunctionRegistry {
8989
expression[CreateArray]("array"),
9090
expression[Coalesce]("coalesce"),
9191
expression[Explode]("explode"),
92-
expression[Lower]("lower"),
93-
expression[Substring]("substr"),
94-
expression[Substring]("substring"),
9592
expression[Rand]("rand"),
9693
expression[Randn]("randn"),
9794
expression[CreateStruct]("struct"),
9895
expression[Sqrt]("sqrt"),
99-
expression[Upper]("upper"),
10096

10197
// Math functions
10298
expression[Acos]("acos"),
@@ -132,7 +128,14 @@ object FunctionRegistry {
132128
expression[Last]("last"),
133129
expression[Max]("max"),
134130
expression[Min]("min"),
135-
expression[Sum]("sum")
131+
expression[Sum]("sum"),
132+
133+
// string functions
134+
expression[Lower]("lower"),
135+
expression[StringLength]("length"),
136+
expression[Substring]("substr"),
137+
expression[Substring]("substring"),
138+
expression[Upper]("upper")
136139
)
137140

138141
val builtin: FunctionRegistry = {

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,9 @@ abstract class LeafExpression extends Expression with trees.LeafNode[Expression]
212212
abstract class UnaryExpression extends Expression with trees.UnaryNode[Expression] {
213213
self: Product =>
214214

215+
override def foldable: Boolean = child.foldable
216+
override def nullable: Boolean = child.nullable
217+
215218
/**
216219
* Called by unary expressions to generate a code block that returns null if its parent returns
217220
* null, and if not not null, use `f` to generate the expression.

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,3 +294,24 @@ object Substring {
294294
apply(str, pos, Literal(Integer.MAX_VALUE))
295295
}
296296
}
297+
298+
/**
299+
* A function that return the length of the given string expression.
300+
*/
301+
case class StringLength(child: Expression) extends UnaryExpression with ExpectsInputTypes {
302+
override def dataType: DataType = IntegerType
303+
override def expectedChildTypes: Seq[DataType] = Seq(StringType)
304+
305+
override def eval(input: Row): Any = {
306+
val string = child.eval(input)
307+
if (string == null) null else string.asInstanceOf[UTF8String].length
308+
}
309+
310+
override def toString: String = s"length($child)"
311+
312+
override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
313+
defineCodeGen(ctx, ev, c => s"($c).length()")
314+
}
315+
}
316+
317+

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringFunctionsSuite.scala

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,4 +215,16 @@ class StringFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
215215
evaluate("abbbbc" rlike regEx, create_row("**"))
216216
}
217217
}
218+
219+
test("length for string") {
220+
val regEx = 'a.string.at(0)
221+
checkEvaluation(StringLength(Literal("abc")), 3, create_row("abdef"))
222+
checkEvaluation(StringLength(regEx), 5, create_row("abdef"))
223+
checkEvaluation(StringLength(regEx), 0, create_row(""))
224+
checkEvaluation(StringLength(regEx), null, create_row(null))
225+
// TODO currently bug in codegen, let's temporally disable this
226+
// checkEvaluation(StringLength(Literal.create(null, StringType)), null, create_row("abdef"))
227+
}
228+
229+
218230
}

sql/core/src/main/scala/org/apache/spark/sql/functions.scala

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ import org.apache.spark.util.Utils
3737
* @groupname normal_funcs Non-aggregate functions
3838
* @groupname math_funcs Math functions
3939
* @groupname window_funcs Window functions
40+
* @groupname string_funcs String functions
4041
* @groupname Ungrouped Support functions for DataFrames.
4142
* @since 1.3.0
4243
*/
@@ -1317,6 +1318,23 @@ object functions {
13171318
*/
13181319
def toRadians(columnName: String): Column = toRadians(Column(columnName))
13191320

1321+
//////////////////////////////////////////////////////////////////////////////////////////////
1322+
// String functions
1323+
//////////////////////////////////////////////////////////////////////////////////////////////
1324+
1325+
/**
1326+
* Computes the length of a given string value
1327+
* @group string_funcs
1328+
* @since 1.5.0
1329+
*/
1330+
def strlen(e: Column): Column = StringLength(e.expr)
1331+
1332+
/**
1333+
* Computes the length of a given string column
1334+
* @group string_funcs
1335+
* @since 1.5.0
1336+
*/
1337+
def strlen(columnName: String): Column = strlen(Column(columnName))
13201338

13211339
//////////////////////////////////////////////////////////////////////////////////////////////
13221340
//////////////////////////////////////////////////////////////////////////////////////////////

sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,4 +109,24 @@ class DataFrameFunctionsSuite extends QueryTest {
109109
testData2.select(bitwiseNOT($"a")),
110110
testData2.collect().toSeq.map(r => Row(~r.getInt(0))))
111111
}
112+
113+
test("length") {
114+
checkAnswer(
115+
nullStrings.select(strlen($"s"), strlen("s")),
116+
nullStrings.collect().toSeq.map { r =>
117+
val v = r.getString(1)
118+
val l = if (v == null) null else v.length
119+
Row(l, l)
120+
})
121+
122+
checkAnswer(
123+
nullStrings.selectExpr("length(s)"),
124+
nullStrings.collect().toSeq.map { r =>
125+
val v = r.getString(1)
126+
val l = if (v == null) null else v.length
127+
Row(l)
128+
})
129+
}
130+
131+
112132
}

0 commit comments

Comments
 (0)