Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,7 @@ case class CurrentTimestamp() extends LeafExpression with CodegenFallback {

override def dataType: DataType = TimestampType

override def eval(input: InternalRow): Any = {
instantToMicros(Instant.now())
}
override def eval(input: InternalRow): Any = currentTimestamp()

override def prettyName: String = "current_timestamp"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@

package org.apache.spark.sql.catalyst.util

import java.nio.charset.StandardCharsets
import java.sql.{Date, Timestamp}
import java.time._
import java.time.temporal.{ChronoField, ChronoUnit, IsoFields}
import java.util.{Locale, TimeZone}
import java.util.concurrent.TimeUnit._

import scala.util.Try
import scala.util.control.NonFatal

import org.apache.spark.sql.types.Decimal
Expand Down Expand Up @@ -218,6 +220,8 @@ object DateTimeUtils {
var i = 0
var currentSegmentValue = 0
val bytes = s.trim.getBytes
val specialTimestamp = convertSpecialTimestamp(bytes, timeZoneId)
if (specialTimestamp.isDefined) return specialTimestamp
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we avoid to use return here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not 100% sure about bytecode for this though, no overhead to use return?

var j = 0
var digitsMilli = 0
var justTime = false
Expand Down Expand Up @@ -848,4 +852,67 @@ object DateTimeUtils {
val sinceEpoch = BigDecimal(timestamp) / MICROS_PER_SECOND + offset
new Decimal().set(sinceEpoch, 20, 6)
}

def currentTimestamp(): SQLTimestamp = instantToMicros(Instant.now())

private def today(zoneId: ZoneId): ZonedDateTime = {
Instant.now().atZone(zoneId).`with`(LocalTime.MIDNIGHT)
}

private val specialValueRe = """(\p{Alpha}+)\p{Blank}*(.*)""".r

/**
* Extracts special values from an input string ignoring case.
* @param input - a trimmed string
* @param zoneId - zone identifier used to get the current date.
* @return some special value in lower case or None.
*/
private def extractSpecialValue(input: String, zoneId: ZoneId): Option[String] = {
def isValid(value: String, timeZoneId: String): Boolean = {
// Special value can be without any time zone
if (timeZoneId.isEmpty) return true
// "now" must not have the time zone field
if (value.compareToIgnoreCase("now") == 0) return false
// If the time zone field presents in the input, it must be resolvable
try {
getZoneId(timeZoneId)
true
} catch {
case NonFatal(_) => false
}
}

assert(input.trim.length == input.length)
if (input.length < 3 || !input(0).isLetter) return None
input match {
case specialValueRe(v, z) if isValid(v, z) => Some(v.toLowerCase(Locale.US))
case _ => None
}
}

/**
* Converts notational shorthands that are converted to ordinary timestamps.
* @param input - a trimmed string
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about checking if an input is trimmed by assert?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will add the assert:

assert(input.trim.length == input.length)

* @param zoneId - zone identifier used to get the current date.
* @return some of microseconds since the epoch if the conversion completed
* successfully otherwise None.
*/
def convertSpecialTimestamp(input: String, zoneId: ZoneId): Option[SQLTimestamp] = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's different from convertSpecialDate? I know the output dataType is different, but the way to handle these special values is different, too?
https://github.com/apache/spark/pull/25708/files#diff-da60f07e1826788aaeb07f295fae4b8aR866
Can we share some code between them?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

extractSpecialValue(input, zoneId).flatMap {
case "epoch" => Some(0)
case "now" => Some(currentTimestamp())
case "today" => Some(instantToMicros(today(zoneId).toInstant))
case "tomorrow" => Some(instantToMicros(today(zoneId).plusDays(1).toInstant))
case "yesterday" => Some(instantToMicros(today(zoneId).minusDays(1).toInstant))
case _ => None
}
}

private def convertSpecialTimestamp(bytes: Array[Byte], zoneId: ZoneId): Option[SQLTimestamp] = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did you use Array[Byte] instead of UTF8String?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because I need String inside of extractSpecialValue, and UTF8String.fromString converts UTF8String to String via Array[Byte]. Why should we convert the same string to bytes twice?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ur, I see.

if (bytes.length > 0 && Character.isAlphabetic(bytes(0))) {
convertSpecialTimestamp(new String(bytes, StandardCharsets.UTF_8), zoneId)
} else {
None
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@ import java.time._
import java.time.format.DateTimeParseException
import java.time.temporal.ChronoField.MICRO_OF_SECOND
import java.time.temporal.TemporalQueries
import java.util.{Locale, TimeZone}
import java.util.Locale
import java.util.concurrent.TimeUnit.SECONDS

import DateTimeUtils.convertSpecialTimestamp

sealed trait TimestampFormatter extends Serializable {
/**
* Parses a timestamp in a string and converts it to microseconds.
Expand All @@ -50,14 +52,17 @@ class Iso8601TimestampFormatter(
protected lazy val formatter = getOrCreateFormatter(pattern, locale)

override def parse(s: String): Long = {
val parsed = formatter.parse(s)
val parsedZoneId = parsed.query(TemporalQueries.zone())
val timeZoneId = if (parsedZoneId == null) zoneId else parsedZoneId
val zonedDateTime = toZonedDateTime(parsed, timeZoneId)
val epochSeconds = zonedDateTime.toEpochSecond
val microsOfSecond = zonedDateTime.get(MICRO_OF_SECOND)
val specialDate = convertSpecialTimestamp(s.trim, zoneId)
specialDate.getOrElse {
val parsed = formatter.parse(s)
val parsedZoneId = parsed.query(TemporalQueries.zone())
val timeZoneId = if (parsedZoneId == null) zoneId else parsedZoneId
val zonedDateTime = toZonedDateTime(parsed, timeZoneId)
val epochSeconds = zonedDateTime.toEpochSecond
val microsOfSecond = zonedDateTime.get(MICRO_OF_SECOND)

Math.addExact(SECONDS.toMicros(epochSeconds), microsOfSecond)
Math.addExact(SECONDS.toMicros(epochSeconds), microsOfSecond)
}
}

override def format(us: Long): String = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,18 @@ package org.apache.spark.sql.catalyst.util

import java.sql.{Date, Timestamp}
import java.text.SimpleDateFormat
import java.time.ZoneId
import java.time.{LocalDateTime, LocalTime, ZoneId}
import java.util.{Locale, TimeZone}
import java.util.concurrent.TimeUnit

import org.scalatest.Matchers

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
import org.apache.spark.sql.catalyst.util.DateTimeUtils._
import org.apache.spark.unsafe.types.UTF8String

class DateTimeUtilsSuite extends SparkFunSuite {
class DateTimeUtilsSuite extends SparkFunSuite with Matchers {

val TimeZonePST = TimeZone.getTimeZone("PST")
private def defaultZoneId = ZoneId.systemDefault()
Expand Down Expand Up @@ -142,10 +144,14 @@ class DateTimeUtilsSuite extends SparkFunSuite {
assert(stringToDate(UTF8String.fromString("1999 08")).isEmpty)
}

private def toTimestamp(str: String, zoneId: ZoneId): Option[SQLTimestamp] = {
stringToTimestamp(UTF8String.fromString(str), zoneId)
}

test("string to timestamp") {
for (tz <- ALL_TIMEZONES) {
def checkStringToTimestamp(str: String, expected: Option[Long]): Unit = {
assert(stringToTimestamp(UTF8String.fromString(str), tz.toZoneId) === expected)
assert(toTimestamp(str, tz.toZoneId) === expected)
}

checkStringToTimestamp("1969-12-31 16:00:00", Option(date(1969, 12, 31, 16, tz = tz)))
Expand Down Expand Up @@ -271,8 +277,8 @@ class DateTimeUtilsSuite extends SparkFunSuite {
UTF8String.fromString("2015-02-29 00:00:00"), defaultZoneId).isEmpty)
assert(stringToTimestamp(
UTF8String.fromString("2015-04-31 00:00:00"), defaultZoneId).isEmpty)
assert(stringToTimestamp(UTF8String.fromString("2015-02-29"), defaultZoneId).isEmpty)
assert(stringToTimestamp(UTF8String.fromString("2015-04-31"), defaultZoneId).isEmpty)
assert(toTimestamp("2015-02-29", defaultZoneId).isEmpty)
assert(toTimestamp("2015-04-31", defaultZoneId).isEmpty)
}

test("hours") {
Expand Down Expand Up @@ -456,8 +462,7 @@ class DateTimeUtilsSuite extends SparkFunSuite {
timezone: TimeZone = DateTimeUtils.defaultTimeZone()): Unit = {
val truncated =
DateTimeUtils.truncTimestamp(inputTS, level, timezone)
val expectedTS =
DateTimeUtils.stringToTimestamp(UTF8String.fromString(expected), defaultZoneId)
val expectedTS = toTimestamp(expected, defaultZoneId)
assert(truncated === expectedTS.get)
}

Expand Down Expand Up @@ -564,4 +569,21 @@ class DateTimeUtilsSuite extends SparkFunSuite {
assert(DateTimeUtils.toMillis(-9223372036844776001L) === -9223372036844777L)
assert(DateTimeUtils.toMillis(-157700927876544L) === -157700927877L)
}

test("special timestamp values") {
DateTimeTestUtils.outstandingZoneIds.foreach { zoneId =>
val tolerance = TimeUnit.SECONDS.toMicros(30)

assert(toTimestamp("Epoch", zoneId).get === 0)
val now = instantToMicros(LocalDateTime.now(zoneId).atZone(zoneId).toInstant)
toTimestamp("NOW", zoneId).get should be (now +- tolerance)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you check illegal cases, e.g., now CET

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

assert(toTimestamp("now UTC", zoneId) === None)
val today = instantToMicros(LocalDateTime.now(zoneId)
.`with`(LocalTime.MIDNIGHT)
.atZone(zoneId).toInstant)
toTimestamp(" Yesterday", zoneId).get should be (today - MICROS_PER_DAY +- tolerance)
toTimestamp("Today ", zoneId).get should be (today +- tolerance)
toTimestamp(" tomorrow CET ", zoneId).get should be (today + MICROS_PER_DAY +- tolerance)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,18 @@

package org.apache.spark.sql.util

import java.time.{LocalDateTime, ZoneId, ZoneOffset}
import java.time.{LocalDateTime, LocalTime, ZoneOffset}
import java.util.concurrent.TimeUnit

import org.scalatest.Matchers

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.plans.SQLHelper
import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils, TimestampFormatter}
import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, instantToMicros, MICROS_PER_DAY}
import org.apache.spark.sql.internal.SQLConf

class TimestampFormatterSuite extends SparkFunSuite with SQLHelper {
class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers {

test("parsing timestamps using time zones") {
val localDate = "2018-12-02T10:11:12.001234"
Expand Down Expand Up @@ -131,4 +135,24 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper {
val micros = DateTimeUtils.instantToMicros(instant)
assert(TimestampFormatter(ZoneOffset.UTC).format(micros) === "-0099-01-01 00:00:00")
}

test("special timestamp values") {
DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
val zoneId = getZoneId(timeZone)
val formatter = TimestampFormatter(zoneId)
val tolerance = TimeUnit.SECONDS.toMicros(30)

assert(formatter.parse("EPOCH") === 0)
val now = instantToMicros(LocalDateTime.now(zoneId).atZone(zoneId).toInstant)
formatter.parse("now") should be (now +- tolerance)
val today = instantToMicros(LocalDateTime.now(zoneId)
.`with`(LocalTime.MIDNIGHT)
.atZone(zoneId).toInstant)
formatter.parse("yesterday CET") should be (today - MICROS_PER_DAY +- tolerance)
formatter.parse(" TODAY ") should be (today +- tolerance)
formatter.parse("Tomorrow ") should be (today + MICROS_PER_DAY +- tolerance)
}
}
}
}
29 changes: 15 additions & 14 deletions sql/core/src/test/resources/sql-tests/inputs/pgSQL/timestamp.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

CREATE TABLE TIMESTAMP_TBL (d1 timestamp) USING parquet;

-- [SPARK-28141] Timestamp type can not accept special values
-- Test shorthand input values
-- We can't just "select" the results since they aren't constants; test for
-- equality instead. We can do that by running the test inside a transaction
Expand All @@ -17,22 +16,24 @@ CREATE TABLE TIMESTAMP_TBL (d1 timestamp) USING parquet;
-- block is entered exactly at local midnight; then 'now' and 'today' have
-- the same values and the counts will come out different.

-- INSERT INTO TIMESTAMP_TBL VALUES ('now');
INSERT INTO TIMESTAMP_TBL VALUES ('now');
-- SELECT pg_sleep(0.1);

-- BEGIN;

-- INSERT INTO TIMESTAMP_TBL VALUES ('now');
-- INSERT INTO TIMESTAMP_TBL VALUES ('today');
-- INSERT INTO TIMESTAMP_TBL VALUES ('yesterday');
-- INSERT INTO TIMESTAMP_TBL VALUES ('tomorrow');
INSERT INTO TIMESTAMP_TBL VALUES ('now');
INSERT INTO TIMESTAMP_TBL VALUES ('today');
INSERT INTO TIMESTAMP_TBL VALUES ('yesterday');
INSERT INTO TIMESTAMP_TBL VALUES ('tomorrow');
-- time zone should be ignored by this data type
-- INSERT INTO TIMESTAMP_TBL VALUES ('tomorrow EST');
-- INSERT INTO TIMESTAMP_TBL VALUES ('tomorrow zulu');

-- SELECT count(*) AS One FROM TIMESTAMP_TBL WHERE d1 = timestamp 'today';
-- SELECT count(*) AS Three FROM TIMESTAMP_TBL WHERE d1 = timestamp 'tomorrow';
-- SELECT count(*) AS One FROM TIMESTAMP_TBL WHERE d1 = timestamp 'yesterday';
INSERT INTO TIMESTAMP_TBL VALUES ('tomorrow EST');
-- [SPARK-29024] Ignore case while resolving time zones
INSERT INTO TIMESTAMP_TBL VALUES ('tomorrow Zulu');

SELECT count(*) AS One FROM TIMESTAMP_TBL WHERE d1 = timestamp 'today';
SELECT count(*) AS Three FROM TIMESTAMP_TBL WHERE d1 = timestamp 'tomorrow';
SELECT count(*) AS One FROM TIMESTAMP_TBL WHERE d1 = timestamp 'yesterday';
-- [SPARK-29025] Support seconds precision by the timestamp type
-- SELECT count(*) AS One FROM TIMESTAMP_TBL WHERE d1 = timestamp(2) 'now';

-- COMMIT;
Expand All @@ -48,12 +49,12 @@ CREATE TABLE TIMESTAMP_TBL (d1 timestamp) USING parquet;
-- SELECT count(*) AS two FROM TIMESTAMP_TBL WHERE d1 = timestamp(2) 'now';
-- COMMIT;

-- TRUNCATE TIMESTAMP_TBL;
TRUNCATE TABLE TIMESTAMP_TBL;

-- Special values
-- INSERT INTO TIMESTAMP_TBL VALUES ('-infinity');
-- INSERT INTO TIMESTAMP_TBL VALUES ('infinity');
-- INSERT INTO TIMESTAMP_TBL VALUES ('epoch');
INSERT INTO TIMESTAMP_TBL VALUES ('epoch');
-- [SPARK-27923] Spark SQL insert there obsolete special values to NULL
-- Obsolete special values
-- INSERT INTO TIMESTAMP_TBL VALUES ('invalid');
Expand Down
Loading