Skip to content

Commit 02f5edf

Browse files
committed
Updated.
1 parent 90af6b8 commit 02f5edf

File tree

8 files changed

+94
-89
lines changed

8 files changed

+94
-89
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,7 @@ case object NullsLast extends NullOrdering{
5454
* An expression that can be used to sort a tuple. This class extends expression primarily so that
5555
* transformations over expression will descend into its child.
5656
*/
57-
case class SortOrder(
58-
child: Expression,
59-
direction: SortDirection,
60-
nullOrdering: NullOrdering)
57+
case class SortOrder(child: Expression, direction: SortDirection, nullOrdering: NullOrdering)
6158
extends UnaryExpression with Unevaluable {
6259

6360
/** Sort order is not foldable because we don't have an eval for it. */
@@ -94,34 +91,23 @@ case class SortPrefix(child: SortOrder) extends UnaryExpression {
9491

9592
val nullValue = child.child.dataType match {
9693
case BooleanType | DateType | TimestampType | _: IntegralType =>
97-
if (nullAsSmallest) {
98-
Long.MinValue
99-
} else {
100-
Long.MaxValue
101-
}
94+
if (nullAsSmallest) Long.MinValue else Long.MaxValue
10295
case dt: DecimalType if dt.precision - dt.scale <= Decimal.MAX_LONG_DIGITS =>
103-
if (nullAsSmallest) {
104-
Long.MinValue
105-
} else {
106-
Long.MaxValue
107-
}
96+
if (nullAsSmallest) Long.MinValue else Long.MaxValue
10897
case _: DecimalType =>
10998
if (nullAsSmallest) {
11099
DoublePrefixComparator.computePrefix(Double.NegativeInfinity)
111100
} else {
112101
DoublePrefixComparator.computePrefix(Double.NaN)
113102
}
114103
case _ =>
115-
if (nullAsSmallest) {
116-
0L
117-
} else {
118-
-1L
119-
}
104+
if (nullAsSmallest) 0L else -1L
120105
}
121106

122-
private def nullAsSmallest: Boolean = (child.isAscending && child.nullOrdering == NullsFirst) ||
107+
private def nullAsSmallest: Boolean = {
108+
(child.isAscending && child.nullOrdering == NullsFirst) ||
123109
(!child.isAscending && child.nullOrdering == NullsLast)
124-
110+
}
125111

126112
override def eval(input: InternalRow): Any = throw new UnsupportedOperationException
127113

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -100,16 +100,16 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
100100
// Nothing
101101
} else if ($isNullA) {
102102
return ${
103-
order.nullOrdering match {
104-
case NullsFirst => "-1"
105-
case NullsLast => "1"
106-
}};
103+
order.nullOrdering match {
104+
case NullsFirst => "-1"
105+
case NullsLast => "1"
106+
}};
107107
} else if ($isNullB) {
108108
return ${
109-
order.nullOrdering match {
110-
case NullsFirst => "1"
111-
case NullsLast => "-1"
112-
}};
109+
order.nullOrdering match {
110+
case NullsFirst => "1"
111+
case NullsLast => "-1"
112+
}};
113113
} else {
114114
int comp = ${ctx.genComp(order.child.dataType, primitiveA, primitiveB)};
115115
if (comp != 0) {

sql/core/src/main/scala/org/apache/spark/sql/Column.scala

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1007,7 +1007,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
10071007
/**
10081008
* Returns an ordering used in sorting.
10091009
* {{{
1010-
* // Scala: sort a DataFrame by age column in descending order.
1010+
* // Scala
10111011
* df.sort(df("age").desc)
10121012
*
10131013
* // Java
@@ -1020,9 +1020,9 @@ class Column(protected[sql] val expr: Expression) extends Logging {
10201020
def desc: Column = withExpr { SortOrder(expr, Descending) }
10211021

10221022
/**
1023-
* Returns an ordering used in sorting.
1023+
* Returns a descending ordering used in sorting, where null values appear before non-null values.
10241024
* {{{
1025-
* // Scala: sort a DataFrame by age column in descending order with NULLS FIRST.
1025+
* // Scala: sort a DataFrame by age column in descending order and null values appearing first.
10261026
* df.sort(df("age").desc_nulls_first)
10271027
*
10281028
* // Java
@@ -1035,7 +1035,22 @@ class Column(protected[sql] val expr: Expression) extends Logging {
10351035
def desc_nulls_first: Column = withExpr { SortOrder(expr, Descending, NullsFirst) }
10361036

10371037
/**
1038-
* Returns an ordering used in sorting.
1038+
* Returns a descending ordering used in sorting, where null values appear after non-null values.
1039+
* {{{
1040+
* // Scala: sort a DataFrame by age column in descending order and null values appearing last.
1041+
* df.sort(df("age").desc_nulls_last)
1042+
*
1043+
* // Java
1044+
* df.sort(df.col("age").desc_nulls_last());
1045+
* }}}
1046+
*
1047+
* @group expr_ops
1048+
* @since 2.1.0
1049+
*/
1050+
def desc_nulls_last: Column = withExpr { SortOrder(expr, Descending, NullsLast) }
1051+
1052+
/**
1053+
* Returns an ascending ordering used in sorting.
10391054
* {{{
10401055
* // Scala: sort a DataFrame by age column in ascending order.
10411056
* df.sort(df("age").asc)
@@ -1050,9 +1065,9 @@ class Column(protected[sql] val expr: Expression) extends Logging {
10501065
def asc: Column = withExpr { SortOrder(expr, Ascending) }
10511066

10521067
/**
1053-
* Returns an ordering used in sorting.
1068+
* Returns an ascending ordering used in sorting, where null values appear before non-null values.
10541069
* {{{
1055-
* // Scala: sort a DataFrame by age column in ascending order with NULLS LAST.
1070+
* // Scala: sort a DataFrame by age column in ascending order and null values appearing first.
10561071
* df.sort(df("age").asc_nulls_last)
10571072
*
10581073
* // Java
@@ -1062,8 +1077,22 @@ class Column(protected[sql] val expr: Expression) extends Logging {
10621077
* @group expr_ops
10631078
* @since 2.1.0
10641079
*/
1065-
def asc_nulls_last: Column = withExpr { SortOrder(expr, Ascending, NullsLast) }
1080+
def asc_nulls_first: Column = withExpr { SortOrder(expr, Ascending, NullsFirst) }
10661081

1082+
/**
1083+
* Returns an ordering used in sorting, where null values appear after non-null values.
1084+
* {{{
1085+
* // Scala: sort a DataFrame by age column in ascending order and null values appearing last.
1086+
* df.sort(df("age").asc_nulls_last)
1087+
*
1088+
* // Java
1089+
* df.sort(df.col("age").asc_nulls_last());
1090+
* }}}
1091+
*
1092+
* @group expr_ops
1093+
* @since 2.1.0
1094+
*/
1095+
def asc_nulls_last: Column = withExpr { SortOrder(expr, Ascending, NullsLast) }
10671096

10681097
/**
10691098
* Prints the expression to the console for debugging purpose.

sql/core/src/main/scala/org/apache/spark/sql/functions.scala

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,6 @@ object functions {
109109
/**
110110
* Returns a sort expression based on ascending order of the column.
111111
* {{{
112-
* // Sort by dept in ascending order, and then age in descending order.
113112
* df.sort(asc("dept"), desc("age"))
114113
* }}}
115114
*
@@ -119,22 +118,32 @@ object functions {
119118
def asc(columnName: String): Column = Column(columnName).asc
120119

121120
/**
122-
* Returns a sort expression based on ascending order of the column with NULLS LAST.
121+
* Returns a sort expression based on ascending order of the column,
122+
* and null values return before non-null values.
123123
* {{{
124-
* // Sort by dept in ascending order nulls first, and then age in descending order.
125124
* df.sort(asc_nulls_last("dept"), desc("age"))
126125
* }}}
127126
*
128127
* @group sort_funcs
129128
* @since 2.1.0
130129
*/
131-
def asc_nulls_last(columnName: String): Column = Column(columnName).asc_nulls_last
130+
def asc_nulls_first(columnName: String): Column = Column(columnName).asc_nulls_first
132131

132+
/**
133+
* Returns a sort expression based on ascending order of the column,
134+
* and null values appear after non-null values.
135+
* {{{
136+
* df.sort(asc_nulls_last("dept"), desc("age"))
137+
* }}}
138+
*
139+
* @group sort_funcs
140+
* @since 2.1.0
141+
*/
142+
def asc_nulls_last(columnName: String): Column = Column(columnName).asc_nulls_last
133143

134144
/**
135145
* Returns a sort expression based on the descending order of the column.
136146
* {{{
137-
* // Sort by dept in ascending order, and then age in descending order.
138147
* df.sort(asc("dept"), desc("age"))
139148
* }}}
140149
*
@@ -144,9 +153,9 @@ object functions {
144153
def desc(columnName: String): Column = Column(columnName).desc
145154

146155
/**
147-
* Returns a sort expression based on the descending order of the column with NULLS FIRST.
156+
* Returns a sort expression based on the descending order of the column,
157+
* and null values appear before non-null values.
148158
* {{{
149-
* // Sort by dept in ascending order, and then age in descending order NULLS FIRST.
150159
* df.sort(asc("dept"), desc_nulls_first("age"))
151160
* }}}
152161
*
@@ -155,6 +164,18 @@ object functions {
155164
*/
156165
def desc_nulls_first(columnName: String): Column = Column(columnName).desc_nulls_first
157166

167+
/**
168+
* Returns a sort expression based on the descending order of the column,
169+
* and null values appear after non-null values.
170+
* {{{
171+
* df.sort(asc("dept"), desc_nulls_last("age"))
172+
* }}}
173+
*
174+
* @group sort_funcs
175+
* @since 2.1.0
176+
*/
177+
def desc_nulls_last(columnName: String): Column = Column(columnName).desc_nulls_last
178+
158179

159180
//////////////////////////////////////////////////////////////////////////////////////////////
160181
// Aggregate functions

sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala

Lines changed: 15 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -326,37 +326,21 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
326326
}
327327

328328
test("sorting with null ordering") {
329-
checkAnswer(
330-
nullableData.orderBy('a.asc_nulls_last, 'b.desc_nulls_first),
331-
Seq(
332-
Row(2, null), Row(2, "B"), Row(3, null), Row(4, "a"),
333-
Row(5, "A"), Row(null, "c"), Row(null, "b")
334-
)
335-
)
336-
337-
checkAnswer(
338-
nullableData.orderBy(asc_nulls_last("a"), desc_nulls_first("b")),
339-
Seq(
340-
Row(2, null), Row(2, "B"), Row(3, null), Row(4, "a"),
341-
Row(5, "A"), Row(null, "c"), Row(null, "b")
342-
)
343-
)
344-
345-
checkAnswer(
346-
nullableData.orderBy('a.desc_nulls_first, 'b.asc_nulls_last),
347-
Seq(
348-
Row(null, "b"), Row(null, "c"), Row(5, "A"), Row(4, "a"),
349-
Row(3, null), Row(2, "B"), Row(2, null)
350-
)
351-
)
352-
353-
checkAnswer(
354-
nullableData.orderBy(desc_nulls_first("a"), asc_nulls_last("b")),
355-
Seq(
356-
Row(null, "b"), Row(null, "c"), Row(5, "A"), Row(4, "a"),
357-
Row(3, null), Row(2, "B"), Row(2, null)
358-
)
359-
)
329+
val data = Seq[java.lang.Integer](2, 1, null).toDF("key")
330+
331+
checkAnswer(data.orderBy('key.asc), Row(null) :: Row(1) :: Row(2) :: Nil)
332+
checkAnswer(data.orderBy(asc("key")), Row(null) :: Row(1) :: Row(2) :: Nil)
333+
checkAnswer(data.orderBy('key.asc_nulls_first), Row(null) :: Row(1) :: Row(2) :: Nil)
334+
checkAnswer(data.orderBy(asc_nulls_first("key")), Row(null) :: Row(1) :: Row(2) :: Nil)
335+
checkAnswer(data.orderBy('key.asc_nulls_last), Row(1) :: Row(2) :: Row(null) :: Nil)
336+
checkAnswer(data.orderBy(asc_nulls_last("key")), Row(1) :: Row(2) :: Row(null) :: Nil)
337+
338+
checkAnswer(data.orderBy('key.desc), Row(2) :: Row(1) :: Row(null) :: Nil)
339+
checkAnswer(data.orderBy(desc("key")), Row(2) :: Row(1) :: Row(null) :: Nil)
340+
checkAnswer(data.orderBy('key.desc_nulls_first), Row(null) :: Row(2) :: Row(1) :: Nil)
341+
checkAnswer(data.orderBy(desc_nulls_first("key")), Row(null) :: Row(2) :: Row(1) :: Nil)
342+
checkAnswer(data.orderBy('key.desc_nulls_last), Row(2) :: Row(1) :: Row(null) :: Nil)
343+
checkAnswer(data.orderBy(desc_nulls_last("key")), Row(2) :: Row(1) :: Row(null) :: Nil)
360344
}
361345

362346
test("global sorting") {

sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -169,20 +169,6 @@ private[sql] trait SQLTestData { self =>
169169
rdd
170170
}
171171

172-
protected lazy val nullableData: DataFrame = {
173-
val df = spark.sparkContext.parallelize(
174-
NullableRecord(4, "a") ::
175-
NullableRecord(null, "c") ::
176-
NullableRecord(2, null) ::
177-
NullableRecord(null, "b") ::
178-
NullableRecord(3, null) ::
179-
NullableRecord(5, "A") ::
180-
NullableRecord(2, "B") :: Nil, 2
181-
).toDF("a", "b")
182-
df.createOrReplaceTempView("nullableData")
183-
df
184-
}
185-
186172
protected lazy val nullInts: DataFrame = {
187173
val df = spark.sparkContext.parallelize(
188174
NullInts(1) ::
@@ -319,7 +305,6 @@ private[sql] object SQLTestData {
319305
case class IntField(i: Int)
320306
case class NullInts(a: Integer)
321307
case class NullStrings(n: Int, s: String)
322-
case class NullableRecord(n: Integer, s: String)
323308
case class TableName(tableName: String)
324309
case class Person(id: Int, name: String, age: Int)
325310
case class Salary(personId: Int, salary: Double)

0 commit comments

Comments
 (0)