Skip to content

Commit 2857b55

Browse files
committed
Merge remote-tracking branch 'upstream/master' into UDAF
2 parents 4435f20 + c4e98ff commit 2857b55

File tree

21 files changed

+337
-131
lines changed

21 files changed

+337
-131
lines changed

build/mvn

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,17 @@ install_scala() {
112112
# the environment
113113
ZINC_PORT=${ZINC_PORT:-"3030"}
114114

115+
# Check for the `--force` flag dictating that `mvn` should be downloaded
116+
# regardless of whether the system already has a `mvn` install
117+
if [ "$1" == "--force" ]; then
118+
FORCE_MVN=1
119+
shift
120+
fi
121+
115122
# Install Maven if necessary
116123
MVN_BIN="$(command -v mvn)"
117124

118-
if [ ! "$MVN_BIN" ]; then
125+
if [ ! "$MVN_BIN" -o -n "$FORCE_MVN" ]; then
119126
install_mvn
120127
fi
121128

@@ -139,5 +146,7 @@ fi
139146
# Set any `mvn` options if not already present
140147
export MAVEN_OPTS=${MAVEN_OPTS:-"$_COMPILE_JVM_OPTS"}
141148

149+
echo "Using \`mvn\` from path: $MVN_BIN"
150+
142151
# Last, call the `mvn` command as usual
143152
${MVN_BIN} "$@"

docs/configuration.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -665,7 +665,7 @@ Apart from these, the following properties are also available, and may be useful
665665
<td>
666666
Initial size of Kryo's serialization buffer. Note that there will be one buffer
667667
<i>per core</i> on each worker. This buffer will grow up to
668-
<code>spark.kryoserializer.buffer.max.mb</code> if needed.
668+
<code>spark.kryoserializer.buffer.max</code> if needed.
669669
</td>
670670
</tr>
671671
<tr>

launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,12 @@
2525

2626
import static org.apache.spark.launcher.CommandBuilderUtils.*;
2727

28-
/**
28+
/**
2929
* Launcher for Spark applications.
30-
* <p/>
30+
* <p>
3131
* Use this class to start Spark applications programmatically. The class uses a builder pattern
3232
* to allow clients to configure the Spark application and launch it as a child process.
33+
* </p>
3334
*/
3435
public class SparkLauncher {
3536

launcher/src/main/java/org/apache/spark/launcher/package-info.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,17 @@
1717

1818
/**
1919
* Library for launching Spark applications.
20-
* <p/>
20+
*
21+
* <p>
2122
* This library allows applications to launch Spark programmatically. There's only one entry
2223
* point to the library - the {@link org.apache.spark.launcher.SparkLauncher} class.
23-
* <p/>
24+
* </p>
25+
*
26+
* <p>
2427
* To launch a Spark application, just instantiate a {@link org.apache.spark.launcher.SparkLauncher}
2528
* and configure the application to run. For example:
26-
*
29+
* </p>
30+
*
2731
* <pre>
2832
* {@code
2933
* import org.apache.spark.launcher.SparkLauncher;

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionals.scala

Lines changed: 36 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -230,24 +230,31 @@ case class CaseKeyWhen(key: Expression, branches: Seq[Expression]) extends CaseW
230230
}
231231
}
232232

233+
private def evalElse(input: InternalRow): Any = {
234+
if (branchesArr.length % 2 == 0) {
235+
null
236+
} else {
237+
branchesArr(branchesArr.length - 1).eval(input)
238+
}
239+
}
240+
233241
/** Written in imperative fashion for performance considerations. */
234242
override def eval(input: InternalRow): Any = {
235243
val evaluatedKey = key.eval(input)
236-
val len = branchesArr.length
237-
var i = 0
238-
// If all branches fail and an elseVal is not provided, the whole statement
239-
// defaults to null, according to Hive's semantics.
240-
while (i < len - 1) {
241-
if (threeValueEquals(evaluatedKey, branchesArr(i).eval(input))) {
242-
return branchesArr(i + 1).eval(input)
244+
// If key is null, we can just return the else part or null if there is no else.
245+
// If key is not null but doesn't match any when part, we need to return
246+
// the else part or null if there is no else, according to Hive's semantics.
247+
if (evaluatedKey != null) {
248+
val len = branchesArr.length
249+
var i = 0
250+
while (i < len - 1) {
251+
if (evaluatedKey == branchesArr(i).eval(input)) {
252+
return branchesArr(i + 1).eval(input)
253+
}
254+
i += 2
243255
}
244-
i += 2
245256
}
246-
var res: Any = null
247-
if (i == len - 1) {
248-
res = branchesArr(i).eval(input)
249-
}
250-
return res
257+
evalElse(input)
251258
}
252259

253260
override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
@@ -261,8 +268,7 @@ case class CaseKeyWhen(key: Expression, branches: Seq[Expression]) extends CaseW
261268
s"""
262269
if (!$got) {
263270
${cond.code}
264-
if (!${keyEval.isNull} && !${cond.isNull}
265-
&& ${ctx.genEqual(key.dataType, keyEval.primitive, cond.primitive)}) {
271+
if (!${cond.isNull} && ${ctx.genEqual(key.dataType, keyEval.primitive, cond.primitive)}) {
266272
$got = true;
267273
${res.code}
268274
${ev.isNull} = ${res.isNull};
@@ -290,19 +296,13 @@ case class CaseKeyWhen(key: Expression, branches: Seq[Expression]) extends CaseW
290296
boolean ${ev.isNull} = true;
291297
${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)};
292298
${keyEval.code}
293-
$cases
299+
if (!${keyEval.isNull}) {
300+
$cases
301+
}
294302
$other
295303
"""
296304
}
297305

298-
private def threeValueEquals(l: Any, r: Any) = {
299-
if (l == null || r == null) {
300-
false
301-
} else {
302-
l == r
303-
}
304-
}
305-
306306
override def toString: String = {
307307
s"CASE $key" + branches.sliding(2, 2).map {
308308
case Seq(cond, value) => s" WHEN $cond THEN $value"
@@ -311,7 +311,11 @@ case class CaseKeyWhen(key: Expression, branches: Seq[Expression]) extends CaseW
311311
}
312312
}
313313

314-
case class Least(children: Expression*) extends Expression {
314+
/**
315+
* A function that returns the least value of all parameters, skipping null values.
316+
* It takes at least 2 parameters, and returns null iff all parameters are null.
317+
*/
318+
case class Least(children: Seq[Expression]) extends Expression {
315319
require(children.length > 1, "LEAST requires at least 2 arguments, got " + children.length)
316320

317321
override def nullable: Boolean = children.forall(_.nullable)
@@ -356,12 +360,16 @@ case class Least(children: Expression*) extends Expression {
356360
${evalChildren.map(_.code).mkString("\n")}
357361
boolean ${ev.isNull} = true;
358362
${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)};
359-
${(0 until children.length).map(updateEval).mkString("\n")}
363+
${children.indices.map(updateEval).mkString("\n")}
360364
"""
361365
}
362366
}
363367

364-
case class Greatest(children: Expression*) extends Expression {
368+
/**
369+
* A function that returns the greatest value of all parameters, skipping null values.
370+
* It takes at least 2 parameters, and returns null iff all parameters are null.
371+
*/
372+
case class Greatest(children: Seq[Expression]) extends Expression {
365373
require(children.length > 1, "GREATEST requires at least 2 arguments, got " + children.length)
366374

367375
override def nullable: Boolean = children.forall(_.nullable)
@@ -406,7 +414,7 @@ case class Greatest(children: Expression*) extends Expression {
406414
${evalChildren.map(_.code).mkString("\n")}
407415
boolean ${ev.isNull} = true;
408416
${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)};
409-
${(0 until children.length).map(updateEval).mkString("\n")}
417+
${children.indices.map(updateEval).mkString("\n")}
410418
"""
411419
}
412420
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala

Lines changed: 40 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -144,35 +144,35 @@ class ConditionalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
144144
val c3 = 'a.string.at(2)
145145
val c4 = 'a.string.at(3)
146146
val c5 = 'a.string.at(4)
147-
checkEvaluation(Least(c4, c3, c5), "a", row)
148-
checkEvaluation(Least(c1, c2), 1, row)
149-
checkEvaluation(Least(c1, c2, Literal(-1)), -1, row)
150-
checkEvaluation(Least(c4, c5, c3, c3, Literal("a")), "a", row)
151-
152-
checkEvaluation(Least(Literal(null), Literal(null)), null, InternalRow.empty)
153-
checkEvaluation(Least(Literal(-1.0), Literal(2.5)), -1.0, InternalRow.empty)
154-
checkEvaluation(Least(Literal(-1), Literal(2)), -1, InternalRow.empty)
147+
checkEvaluation(Least(Seq(c4, c3, c5)), "a", row)
148+
checkEvaluation(Least(Seq(c1, c2)), 1, row)
149+
checkEvaluation(Least(Seq(c1, c2, Literal(-1))), -1, row)
150+
checkEvaluation(Least(Seq(c4, c5, c3, c3, Literal("a"))), "a", row)
151+
152+
checkEvaluation(Least(Seq(Literal(null), Literal(null))), null, InternalRow.empty)
153+
checkEvaluation(Least(Seq(Literal(-1.0), Literal(2.5))), -1.0, InternalRow.empty)
154+
checkEvaluation(Least(Seq(Literal(-1), Literal(2))), -1, InternalRow.empty)
155155
checkEvaluation(
156-
Least(Literal((-1.0).toFloat), Literal(2.5.toFloat)), (-1.0).toFloat, InternalRow.empty)
156+
Least(Seq(Literal((-1.0).toFloat), Literal(2.5.toFloat))), (-1.0).toFloat, InternalRow.empty)
157157
checkEvaluation(
158-
Least(Literal(Long.MaxValue), Literal(Long.MinValue)), Long.MinValue, InternalRow.empty)
159-
checkEvaluation(Least(Literal(1.toByte), Literal(2.toByte)), 1.toByte, InternalRow.empty)
158+
Least(Seq(Literal(Long.MaxValue), Literal(Long.MinValue))), Long.MinValue, InternalRow.empty)
159+
checkEvaluation(Least(Seq(Literal(1.toByte), Literal(2.toByte))), 1.toByte, InternalRow.empty)
160160
checkEvaluation(
161-
Least(Literal(1.toShort), Literal(2.toByte.toShort)), 1.toShort, InternalRow.empty)
162-
checkEvaluation(Least(Literal("abc"), Literal("aaaa")), "aaaa", InternalRow.empty)
163-
checkEvaluation(Least(Literal(true), Literal(false)), false, InternalRow.empty)
161+
Least(Seq(Literal(1.toShort), Literal(2.toByte.toShort))), 1.toShort, InternalRow.empty)
162+
checkEvaluation(Least(Seq(Literal("abc"), Literal("aaaa"))), "aaaa", InternalRow.empty)
163+
checkEvaluation(Least(Seq(Literal(true), Literal(false))), false, InternalRow.empty)
164164
checkEvaluation(
165-
Least(
165+
Least(Seq(
166166
Literal(BigDecimal("1234567890987654321123456")),
167-
Literal(BigDecimal("1234567890987654321123458"))),
167+
Literal(BigDecimal("1234567890987654321123458")))),
168168
BigDecimal("1234567890987654321123456"), InternalRow.empty)
169169
checkEvaluation(
170-
Least(Literal(Date.valueOf("2015-01-01")), Literal(Date.valueOf("2015-07-01"))),
170+
Least(Seq(Literal(Date.valueOf("2015-01-01")), Literal(Date.valueOf("2015-07-01")))),
171171
Date.valueOf("2015-01-01"), InternalRow.empty)
172172
checkEvaluation(
173-
Least(
173+
Least(Seq(
174174
Literal(Timestamp.valueOf("2015-07-01 08:00:00")),
175-
Literal(Timestamp.valueOf("2015-07-01 10:00:00"))),
175+
Literal(Timestamp.valueOf("2015-07-01 10:00:00")))),
176176
Timestamp.valueOf("2015-07-01 08:00:00"), InternalRow.empty)
177177
}
178178

@@ -183,35 +183,36 @@ class ConditionalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
183183
val c3 = 'a.string.at(2)
184184
val c4 = 'a.string.at(3)
185185
val c5 = 'a.string.at(4)
186-
checkEvaluation(Greatest(c4, c5, c3), "c", row)
187-
checkEvaluation(Greatest(c2, c1), 2, row)
188-
checkEvaluation(Greatest(c1, c2, Literal(2)), 2, row)
189-
checkEvaluation(Greatest(c4, c5, c3, Literal("ccc")), "ccc", row)
190-
191-
checkEvaluation(Greatest(Literal(null), Literal(null)), null, InternalRow.empty)
192-
checkEvaluation(Greatest(Literal(-1.0), Literal(2.5)), 2.5, InternalRow.empty)
193-
checkEvaluation(Greatest(Literal(-1), Literal(2)), 2, InternalRow.empty)
186+
checkEvaluation(Greatest(Seq(c4, c5, c3)), "c", row)
187+
checkEvaluation(Greatest(Seq(c2, c1)), 2, row)
188+
checkEvaluation(Greatest(Seq(c1, c2, Literal(2))), 2, row)
189+
checkEvaluation(Greatest(Seq(c4, c5, c3, Literal("ccc"))), "ccc", row)
190+
191+
checkEvaluation(Greatest(Seq(Literal(null), Literal(null))), null, InternalRow.empty)
192+
checkEvaluation(Greatest(Seq(Literal(-1.0), Literal(2.5))), 2.5, InternalRow.empty)
193+
checkEvaluation(Greatest(Seq(Literal(-1), Literal(2))), 2, InternalRow.empty)
194194
checkEvaluation(
195-
Greatest(Literal((-1.0).toFloat), Literal(2.5.toFloat)), 2.5.toFloat, InternalRow.empty)
195+
Greatest(Seq(Literal((-1.0).toFloat), Literal(2.5.toFloat))), 2.5.toFloat, InternalRow.empty)
196+
checkEvaluation(Greatest(
197+
Seq(Literal(Long.MaxValue), Literal(Long.MinValue))), Long.MaxValue, InternalRow.empty)
196198
checkEvaluation(
197-
Greatest(Literal(Long.MaxValue), Literal(Long.MinValue)), Long.MaxValue, InternalRow.empty)
198-
checkEvaluation(Greatest(Literal(1.toByte), Literal(2.toByte)), 2.toByte, InternalRow.empty)
199+
Greatest(Seq(Literal(1.toByte), Literal(2.toByte))), 2.toByte, InternalRow.empty)
199200
checkEvaluation(
200-
Greatest(Literal(1.toShort), Literal(2.toByte.toShort)), 2.toShort, InternalRow.empty)
201-
checkEvaluation(Greatest(Literal("abc"), Literal("aaaa")), "abc", InternalRow.empty)
202-
checkEvaluation(Greatest(Literal(true), Literal(false)), true, InternalRow.empty)
201+
Greatest(Seq(Literal(1.toShort), Literal(2.toByte.toShort))), 2.toShort, InternalRow.empty)
202+
checkEvaluation(Greatest(Seq(Literal("abc"), Literal("aaaa"))), "abc", InternalRow.empty)
203+
checkEvaluation(Greatest(Seq(Literal(true), Literal(false))), true, InternalRow.empty)
203204
checkEvaluation(
204-
Greatest(
205+
Greatest(Seq(
205206
Literal(BigDecimal("1234567890987654321123456")),
206-
Literal(BigDecimal("1234567890987654321123458"))),
207+
Literal(BigDecimal("1234567890987654321123458")))),
207208
BigDecimal("1234567890987654321123458"), InternalRow.empty)
208-
checkEvaluation(
209-
Greatest(Literal(Date.valueOf("2015-01-01")), Literal(Date.valueOf("2015-07-01"))),
209+
checkEvaluation(Greatest(
210+
Seq(Literal(Date.valueOf("2015-01-01")), Literal(Date.valueOf("2015-07-01")))),
210211
Date.valueOf("2015-07-01"), InternalRow.empty)
211212
checkEvaluation(
212-
Greatest(
213+
Greatest(Seq(
213214
Literal(Timestamp.valueOf("2015-07-01 08:00:00")),
214-
Literal(Timestamp.valueOf("2015-07-01 10:00:00"))),
215+
Literal(Timestamp.valueOf("2015-07-01 10:00:00")))),
215216
Timestamp.valueOf("2015-07-01 10:00:00"), InternalRow.empty)
216217
}
217218

sql/core/src/main/scala/org/apache/spark/sql/functions.scala

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1073,7 +1073,8 @@ object functions {
10731073
def floor(columnName: String): Column = floor(Column(columnName))
10741074

10751075
/**
1076-
* Returns the greatest value of the list of values.
1076+
* Returns the greatest value of the list of values, skipping null values.
1077+
* This function takes at least 2 parameters. It will return null iff all parameters are null.
10771078
*
10781079
* @group normal_funcs
10791080
* @since 1.5.0
@@ -1082,11 +1083,12 @@ object functions {
10821083
def greatest(exprs: Column*): Column = if (exprs.length < 2) {
10831084
sys.error("GREATEST takes at least 2 parameters")
10841085
} else {
1085-
Greatest(exprs.map(_.expr): _*)
1086+
Greatest(exprs.map(_.expr))
10861087
}
10871088

10881089
/**
1089-
* Returns the greatest value of the list of column names.
1090+
* Returns the greatest value of the list of column names, skipping null values.
1091+
* This function takes at least 2 parameters. It will return null iff all parameters are null.
10901092
*
10911093
* @group normal_funcs
10921094
* @since 1.5.0
@@ -1198,7 +1200,8 @@ object functions {
11981200
def hypot(l: Double, rightName: String): Column = hypot(l, Column(rightName))
11991201

12001202
/**
1201-
* Returns the least value of the list of values.
1203+
* Returns the least value of the list of values, skipping null values.
1204+
* This function takes at least 2 parameters. It will return null iff all parameters are null.
12021205
*
12031206
* @group normal_funcs
12041207
* @since 1.5.0
@@ -1207,11 +1210,12 @@ object functions {
12071210
def least(exprs: Column*): Column = if (exprs.length < 2) {
12081211
sys.error("LEAST takes at least 2 parameters")
12091212
} else {
1210-
Least(exprs.map(_.expr): _*)
1213+
Least(exprs.map(_.expr))
12111214
}
12121215

12131216
/**
1214-
* Returns the least value of the list of column names.
1217+
* Returns the least value of the list of column names, skipping null values.
1218+
* This function takes at least 2 parameters. It will return null iff all parameters are null.
12151219
*
12161220
* @group normal_funcs
12171221
* @since 1.5.0

0 commit comments

Comments
 (0)