Skip to content

Commit 4723f8e

Browse files
committed
Merge remote-tracking branch 'apache/master' into SPARK-35780-full-range-datetime
2 parents cd330a6 + 0674327 commit 4723f8e

File tree

60 files changed

+2313
-527
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+2313
-527
lines changed

common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -562,24 +562,24 @@ public UTF8String trim() {
562562
}
563563

564564
/**
565-
* Trims whitespaces ({@literal <=} ASCII 32) from both ends of this string.
565+
* Trims whitespace ASCII characters from both ends of this string.
566566
*
567-
* Note that, this method is the same as java's {@link String#trim}, and different from
568-
* {@link UTF8String#trim()} which remove only spaces(= ASCII 32) from both ends.
567+
* Note that, this method is different from {@link UTF8String#trim()} which removes
568+
* only spaces(= ASCII 32) from both ends.
569569
*
570570
* @return A UTF8String whose value is this UTF8String, with any leading and trailing white
571571
* space removed, or this UTF8String if it has no leading or trailing whitespace.
572572
*
573573
*/
574574
public UTF8String trimAll() {
575575
int s = 0;
576-
// skip all of the whitespaces (<=0x20) in the left side
576+
// skip all of the whitespaces in the left side
577577
while (s < this.numBytes && Character.isWhitespace(getByte(s))) s++;
578578
if (s == this.numBytes) {
579579
// Everything trimmed
580580
return EMPTY_UTF8;
581581
}
582-
// skip all of the whitespaces (<=0x20) in the right side
582+
// skip all of the whitespaces in the right side
583583
int e = this.numBytes - 1;
584584
while (e > s && Character.isWhitespace(getByte(e))) e--;
585585
if (s == 0 && e == numBytes - 1) {

core/src/main/resources/error/error-classes.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
{
2+
"AMBIGUOUS_FIELD_NAME" : {
3+
"message" : [ "Field name %s is ambiguous and has %s matching fields in the struct." ],
4+
"sqlState" : "42000"
5+
},
26
"DIVIDE_BY_ZERO" : {
37
"message" : [ "divide by zero" ],
48
"sqlState" : "22012"
@@ -7,6 +11,10 @@
711
"message" : [ "Found duplicate keys '%s'" ],
812
"sqlState" : "23000"
913
},
14+
"INVALID_FIELD_NAME" : {
15+
"message" : [ "Field name %s is invalid: %s is not a struct." ],
16+
"sqlState" : "42000"
17+
},
1018
"MISSING_COLUMN" : {
1119
"message" : [ "cannot resolve '%s' given input columns: [%s]" ],
1220
"sqlState" : "42000"

core/src/test/scala/org/apache/spark/scheduler/HealthTrackerSuite.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,11 @@ class HealthTrackerSuite extends SparkFunSuite with BeforeAndAfterEach with Mock
6262
// All executors and hosts used in tests should be in this set, so that [[assertEquivalentToSet]]
6363
// works. Its OK if its got extraneous entries
6464
val allExecutorAndHostIds = {
65-
(('A' to 'Z')++ (1 to 100).map(_.toString))
66-
.flatMap{ suffix =>
65+
('A' to 'Z')
66+
.flatMap { suffix =>
6767
Seq(s"host$suffix", s"host-$suffix")
6868
}
69-
}.toSet
69+
}.toSet ++ (1 to 100).map(_.toString)
7070

7171
/**
7272
* Its easier to write our tests as if we could directly look at the sets of nodes & executors in

docs/sql-migration-guide.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,9 @@ license: |
8383

8484
- In Spark 3.2, `TRANSFORM` operator can support `ArrayType/MapType/StructType` without Hive SerDe, in this mode, we use `StructsToJosn` to convert `ArrayType/MapType/StructType` column to `STRING` and use `JsonToStructs` to parse `STRING` to `ArrayType/MapType/StructType`. In Spark 3.1, Spark just support case `ArrayType/MapType/StructType` column as `STRING` but can't support parse `STRING` to `ArrayType/MapType/StructType` output columns.
8585

86-
- In Spark 3.2, the unit-to-unit interval literals like `INTERVAL '1-1' YEAR TO MONTH` are converted to ANSI interval types: `YearMonthIntervalType` or `DayTimeIntervalType`. In Spark 3.1 and earlier, such interval literals are converted to `CalendarIntervalType`. To restore the behavior before Spark 3.2, you can set `spark.sql.legacy.interval.enabled` to `true`.
86+
- In Spark 3.2, the unit-to-unit interval literals like `INTERVAL '1-1' YEAR TO MONTH` and the unit list interval literals like `INTERVAL '3' DAYS '1' HOUR` are converted to ANSI interval types: `YearMonthIntervalType` or `DayTimeIntervalType`. In Spark 3.1 and earlier, such interval literals are converted to `CalendarIntervalType`. To restore the behavior before Spark 3.2, you can set `spark.sql.legacy.interval.enabled` to `true`.
87+
88+
- In Spark 3.2, the unit list interval literals can not mix year-month fields (YEAR and MONTH) and day-time fields (WEEK, DAY, ..., MICROSECOND). For example, `INTERVAL 1 day 1 hour` is invalid in Spark 3.2. In Spark 3.1 and earlier, there is no such limitation and the literal returns value of `CalendarIntervalType`. To restore the behavior before Spark 3.2, you can set `spark.sql.legacy.interval.enabled` to `true`.
8789

8890
- In Spark 3.2, Spark supports `DayTimeIntervalType` and `YearMonthIntervalType` as inputs and outputs of `TRANSFORM` clause in Hive `SERDE` mode, the behavior is different between Hive `SERDE` mode and `ROW FORMAT DELIMITED` mode when these two types are used as inputs. In Hive `SERDE` mode, `DayTimeIntervalType` column is converted to `HiveIntervalDayTime`, its string format is `[-]?d h:m:s.n`, but in `ROW FORMAT DELIMITED` mode the format is `INTERVAL '[-]?d h:m:s.n' DAY TO TIME`. In Hive `SERDE` mode, `YearMonthIntervalType` column is converted to `HiveIntervalYearMonth`, its string format is `[-]?y-m`, but in `ROW FORMAT DELIMITED` mode the format is `INTERVAL '[-]?y-m' YEAR TO MONTH`.
8991

@@ -157,6 +159,8 @@ license: |
157159

158160
- In Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they match to the pattern defined by the JSON option `timestampFormat`. Since version 3.0.1, the timestamp type inference is disabled by default. Set the JSON option `inferTimestamp` to `true` to enable such type inference.
159161

162+
- In Spark 3.0, when casting string to integral types(tinyint, smallint, int and bigint), datetime types(date, timestamp and interval) and boolean type, the leading and trailing characters (<= ASCII 32) will be trimmed. For example, `cast('\b1\b' as int)` results `1`. Since Spark 3.0.1, only the leading and trailing whitespace ASCII characters will be trimmed. For example, `cast('\t1\t' as int)` results `1` but `cast('\b1\b' as int)` results `NULL`.
163+
160164
## Upgrading from Spark SQL 2.4 to 3.0
161165

162166
### Dataset/DataFrame APIs

external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2429,8 +2429,9 @@ class KafkaSourceStressSuite extends KafkaSourceTest {
24292429
(d, running) => {
24302430
Random.nextInt(5) match {
24312431
case 0 => // Add a new topic
2432-
topics = topics ++ Seq(newStressTopic)
2433-
AddKafkaData(topics.toSet, d: _*)(message = s"Add topic $newStressTopic",
2432+
val newTopic = newStressTopic
2433+
topics = topics ++ Seq(newTopic)
2434+
AddKafkaData(topics.toSet, d: _*)(message = s"Add topic $newTopic",
24342435
topicAction = (topic, partition) => {
24352436
if (partition.isEmpty) {
24362437
testUtils.createTopic(topic, partitions = nextInt(1, 6))

resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,14 @@ package org.apache.spark.deploy.k8s
1818

1919
import java.io.File
2020

21+
import com.fasterxml.jackson.databind.ObjectMapper
2122
import com.google.common.base.Charsets
2223
import com.google.common.io.Files
2324
import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient, KubernetesClient}
25+
import io.fabric8.kubernetes.client.Config.KUBERNETES_REQUEST_RETRY_BACKOFFLIMIT_SYSTEM_PROPERTY
2426
import io.fabric8.kubernetes.client.Config.autoConfigure
2527
import io.fabric8.kubernetes.client.utils.HttpClientUtils
28+
import io.fabric8.kubernetes.client.utils.Utils.getSystemPropertyOrEnvVar
2629
import okhttp3.Dispatcher
2730

2831
import org.apache.spark.SparkConf
@@ -74,6 +77,11 @@ private[spark] object SparkKubernetesClientFactory extends Logging {
7477
kubeContext.map("context " + _).getOrElse("current context") +
7578
" from users K8S config file")
7679

80+
// if backoff limit is not set then set it to 3
81+
if (getSystemPropertyOrEnvVar(KUBERNETES_REQUEST_RETRY_BACKOFFLIMIT_SYSTEM_PROPERTY) == null) {
82+
System.setProperty(KUBERNETES_REQUEST_RETRY_BACKOFFLIMIT_SYSTEM_PROPERTY, "3")
83+
}
84+
7785
// Start from an auto-configured config with the desired context
7886
// Fabric 8 uses null to indicate that the users current context should be used so if no
7987
// explicit setting pass null
@@ -102,6 +110,8 @@ private[spark] object SparkKubernetesClientFactory extends Logging {
102110
val httpClientWithCustomDispatcher = baseHttpClient.newBuilder()
103111
.dispatcher(dispatcher)
104112
.build()
113+
logDebug("Kubernetes client config: " +
114+
new ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(config))
105115
new DefaultKubernetesClient(httpClientWithCustomDispatcher, config)
106116
}
107117

sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -877,7 +877,7 @@ interval
877877
;
878878

879879
errorCapturingMultiUnitsInterval
880-
: multiUnitsInterval unitToUnitInterval?
880+
: body=multiUnitsInterval unitToUnitInterval?
881881
;
882882

883883
multiUnitsInterval

sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,6 @@ class AttributeMap[A](val baseMap: Map[ExprId, (Attribute, A)])
5050
override def iterator: Iterator[(Attribute, A)] = baseMap.valuesIterator
5151

5252
override def -(key: Attribute): Map[Attribute, A] = baseMap.values.toMap - key
53+
54+
def ++(other: AttributeMap[A]): AttributeMap[A] = new AttributeMap(baseMap ++ other.baseMap)
5355
}

sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,4 +51,6 @@ class AttributeMap[A](val baseMap: Map[ExprId, (Attribute, A)])
5151
override def iterator: Iterator[(Attribute, A)] = baseMap.valuesIterator
5252

5353
override def removed(key: Attribute): Map[Attribute, A] = baseMap.values.toMap - key
54+
55+
def ++(other: AttributeMap[A]): AttributeMap[A] = new AttributeMap(baseMap ++ other.baseMap)
5456
}

sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ package org.apache.spark.sql
2020
import org.apache.spark.{SparkThrowable, SparkThrowableHelper}
2121
import org.apache.spark.annotation.Stable
2222
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
23+
import org.apache.spark.sql.catalyst.trees.Origin
2324

2425
/**
2526
* Thrown when a query fails to analyze, usually because the query itself is invalid.
@@ -48,12 +49,11 @@ class AnalysisException protected[sql] (
4849
def this(
4950
errorClass: String,
5051
messageParameters: Array[String],
51-
line: Option[Int],
52-
startPosition: Option[Int]) =
52+
origin: Origin) =
5353
this(
5454
SparkThrowableHelper.getMessage(errorClass, messageParameters),
55-
line = line,
56-
startPosition = startPosition,
55+
line = origin.line,
56+
startPosition = origin.startPosition,
5757
errorClass = Some(errorClass),
5858
messageParameters = messageParameters)
5959

0 commit comments

Comments
 (0)