Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,17 @@ private[sql] final case class BucketTransform(
override def toString: String = describe
}

private[sql] object BucketTransform {
def unapply(transform: Transform): Option[(Int, NamedReference)] = transform match {
case NamedTransform("bucket", Seq(
Lit(value: Int, IntegerType),
Ref(seq: Seq[String]))) =>
Some((value, FieldReference(seq)))
case _ =>
None
}
}

private[sql] final case class ApplyTransform(
name: String,
args: Seq[Expression]) extends Transform {
Expand All @@ -111,32 +122,104 @@ private[sql] final case class ApplyTransform(
override def toString: String = describe
}

/**
* Convenience extractor for any Literal.
*/
private object Lit {
def unapply[T](literal: Literal[T]): Some[(T, DataType)] = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some -> Option?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This always returns Some so I thought it was correct to use here. This practice was pointed out by @HyukjinKwon here: #24689 (comment)

Some((literal.value, literal.dataType))
}
}

/**
* Convenience extractor for any NamedReference.
*/
private object Ref {
def unapply(named: NamedReference): Some[Seq[String]] = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some -> Option.

Some(named.fieldNames)
}
}

/**
* Convenience extractor for any Transform.
*/
private object NamedTransform {
def unapply(transform: Transform): Some[(String, Seq[Expression])] = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some -> Option.

Some((transform.name, transform.arguments))
}
}

private[sql] final case class IdentityTransform(
ref: NamedReference) extends SingleColumnTransform(ref) {
override val name: String = "identity"
override def describe: String = ref.describe
}

private[sql] object IdentityTransform {
def unapply(transform: Transform): Option[FieldReference] = transform match {
case NamedTransform("identity", Seq(Ref(parts))) =>
Some(FieldReference(parts))
case _ =>
None
}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we have NamedTransform.unapply and Ref.unapply, the others are not required much like the following.

-      case IdentityTransform(FieldReference(seq)) =>
+      case NamedTransform("identity", Seq(Ref(seq))) =>
-      case YearsTransform(FieldReference(seq)) =>
+      case NamedTransform("years", Seq(Ref(seq))) =>

Do we need all of them?

Copy link
Contributor Author

@rdblue rdblue Jun 6, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we need them.

The idea is to make any transform that is equivalent to an IdentityTransform(...) instance of Spark's case class work in a match expression as though it were actually an IdentityTransform instance. That way, Spark can internally use these case classes, even though users may pass instances that are unknown classes. This will reduce future bugs caused by matching IdentityTransform instead of remembering to match the more general NamedTransform("identity", ...).


private[sql] final case class YearsTransform(
ref: NamedReference) extends SingleColumnTransform(ref) {
override val name: String = "years"
}

private[sql] object YearsTransform {
def unapply(transform: Transform): Option[FieldReference] = transform match {
case NamedTransform("years", Seq(Ref(parts))) =>
Some(FieldReference(parts))
case _ =>
None
}
}

private[sql] final case class MonthsTransform(
ref: NamedReference) extends SingleColumnTransform(ref) {
override val name: String = "months"
}

private[sql] object MonthsTransform {
def unapply(transform: Transform): Option[FieldReference] = transform match {
case NamedTransform("months", Seq(Ref(parts))) =>
Some(FieldReference(parts))
case _ =>
None
}
}

private[sql] final case class DaysTransform(
ref: NamedReference) extends SingleColumnTransform(ref) {
override val name: String = "days"
}

private[sql] object DaysTransform {
def unapply(transform: Transform): Option[FieldReference] = transform match {
case NamedTransform("days", Seq(Ref(parts))) =>
Some(FieldReference(parts))
case _ =>
None
}
}

private[sql] final case class HoursTransform(
ref: NamedReference) extends SingleColumnTransform(ref) {
override val name: String = "hours"
}

private[sql] object HoursTransform {
def unapply(transform: Transform): Option[FieldReference] = transform match {
case NamedTransform("hours", Seq(Ref(parts))) =>
Some(FieldReference(parts))
case _ =>
None
}
}

private[sql] final case class LiteralValue[T](value: T, dataType: DataType) extends Literal[T] {
override def describe: String = {
if (dataType.isInstanceOf[StringType]) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.catalog.v2.expressions

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst
import org.apache.spark.sql.types.DataType

class TransformExtractorSuite extends SparkFunSuite {
/**
* Creates a Literal using an anonymous class.
*/
private def lit[T](literal: T): Literal[T] = new Literal[T] {
override def value: T = literal
override def dataType: DataType = catalyst.expressions.Literal(literal).dataType
override def describe: String = literal.toString
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems that we have this already. Can we reuse LogicalExpressions.literal and remove this?

-    override def arguments: Array[Expression] = Array(lit(numBuckets), ref)
+    override def arguments: Array[Expression] = Array(LogicalExpressions.literal(numBuckets), ref)

Copy link
Contributor Author

@rdblue rdblue Jun 6, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using an anonymous class is part of the test.

The extract functions are intended to correctly match any Transform, NamedReference, or Literal instance that is equivalent. To test that, we need to test with objects that are equivalent according to the Java interface, but that do not actually use Spark's internal case classes.


/**
* Creates a NamedReference using an anonymous class.
*/
private def ref(names: String*): NamedReference = new NamedReference {
override def fieldNames: Array[String] = names.toArray
override def describe: String = names.mkString(".")
}
Copy link
Member

@dongjoon-hyun dongjoon-hyun Jun 6, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto. Please reuse LogicalExpressions.reference.

-    transform("identity", ref("a", "b")) match {
+    transform("identity", LogicalExpressions.reference("a.b")) match {

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Of course, you can omit the prefix LogicalExpressions. with the proper import.


/**
* Creates a Transform using an anonymous class.
*/
private def transform(func: String, ref: NamedReference): Transform = new Transform {
override def name: String = func
override def references: Array[NamedReference] = Array(ref)
override def arguments: Array[Expression] = Array(ref)
override def describe: String = ref.describe
}

test("Identity extractor") {
transform("identity", ref("a", "b")) match {
case IdentityTransform(FieldReference(seq)) =>
assert(seq === Seq("a", "b"))
case _ =>
fail("Did not match IdentityTransform extractor")
}

transform("unknown", ref("a", "b")) match {
case IdentityTransform(FieldReference(_)) =>
fail("Matched unknown transform")
case _ =>
// expected
}
}

test("Years extractor") {
transform("years", ref("a", "b")) match {
case YearsTransform(FieldReference(seq)) =>
assert(seq === Seq("a", "b"))
case _ =>
fail("Did not match YearsTransform extractor")
}

transform("unknown", ref("a", "b")) match {
case YearsTransform(FieldReference(_)) =>
fail("Matched unknown transform")
case _ =>
// expected
}
}

test("Months extractor") {
transform("months", ref("a", "b")) match {
case MonthsTransform(FieldReference(seq)) =>
assert(seq === Seq("a", "b"))
case _ =>
fail("Did not match MonthsTransform extractor")
}

transform("unknown", ref("a", "b")) match {
case MonthsTransform(FieldReference(_)) =>
fail("Matched unknown transform")
case _ =>
// expected
}
}

test("Days extractor") {
transform("days", ref("a", "b")) match {
case DaysTransform(FieldReference(seq)) =>
assert(seq === Seq("a", "b"))
case _ =>
fail("Did not match DaysTransform extractor")
}

transform("unknown", ref("a", "b")) match {
case DaysTransform(FieldReference(_)) =>
fail("Matched unknown transform")
case _ =>
// expected
}
}

test("Hours extractor") {
transform("hours", ref("a", "b")) match {
case HoursTransform(FieldReference(seq)) =>
assert(seq === Seq("a", "b"))
case _ =>
fail("Did not match HoursTransform extractor")
}

transform("unknown", ref("a", "b")) match {
case HoursTransform(FieldReference(_)) =>
fail("Matched unknown transform")
case _ =>
// expected
}
}

test("Bucket extractor") {
val col = ref("a", "b")
val bucketTransform = new Transform {
override def name: String = "bucket"
override def references: Array[NamedReference] = Array(col)
override def arguments: Array[Expression] = Array(lit(16), col)
override def describe: String = s"bucket(16, ${col.describe})"
}

bucketTransform match {
case BucketTransform(numBuckets, FieldReference(seq)) =>
assert(numBuckets === 16)
assert(seq === Seq("a", "b"))
case _ =>
fail("Did not match BucketTransform extractor")
}

transform("unknown", ref("a", "b")) match {
case BucketTransform(_, _) =>
fail("Matched unknown transform")
case _ =>
// expected
}
}
}