Skip to content

Commit 2aa6480

Browse files
committed
Merge remote-tracking branch 'remotes/origin/master' into interval-is-positive
2 parents 084c8d5 + 0cf4f07 commit 2aa6480

File tree

153 files changed

+6536
-1718
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

153 files changed

+6536
-1718
lines changed

common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java

Lines changed: 30 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -32,94 +32,11 @@ public final class CalendarInterval implements Serializable {
3232
public static final long MICROS_PER_DAY = MICROS_PER_HOUR * 24;
3333
public static final long MICROS_PER_WEEK = MICROS_PER_DAY * 7;
3434

35-
/**
36-
* A function to generate regex which matches interval string's unit part like "3 years".
37-
*
38-
* First, we can leave out some units in interval string, and we only care about the value of
39-
* unit, so here we use non-capturing group to wrap the actual regex.
40-
* At the beginning of the actual regex, we should match spaces before the unit part.
41-
* Next is the number part, starts with an optional "-" to represent negative value. We use
42-
* capturing group to wrap this part as we need the value later.
43-
* Finally is the unit name, ends with an optional "s".
44-
*/
45-
private static String unitRegex(String unit) {
46-
return "(?:\\s+(-?\\d+)\\s+" + unit + "s?)?";
47-
}
48-
49-
private static Pattern p = Pattern.compile("interval" + unitRegex("year") + unitRegex("month") +
50-
unitRegex("week") + unitRegex("day") + unitRegex("hour") + unitRegex("minute") +
51-
unitRegex("second") + unitRegex("millisecond") + unitRegex("microsecond"),
52-
Pattern.CASE_INSENSITIVE);
53-
54-
private static Pattern yearMonthPattern =
55-
Pattern.compile("^(?:['|\"])?([+|-])?(\\d+)-(\\d+)(?:['|\"])?$");
35+
private static Pattern yearMonthPattern = Pattern.compile(
36+
"^([+|-])?(\\d+)-(\\d+)$");
5637

5738
private static Pattern dayTimePattern = Pattern.compile(
58-
"^(?:['|\"])?([+|-])?((\\d+) )?((\\d+):)?(\\d+):(\\d+)(\\.(\\d+))?(?:['|\"])?$");
59-
60-
private static Pattern quoteTrimPattern = Pattern.compile("^(?:['|\"])?(.*?)(?:['|\"])?$");
61-
62-
private static long toLong(String s) {
63-
if (s == null) {
64-
return 0;
65-
} else {
66-
return Long.parseLong(s);
67-
}
68-
}
69-
70-
/**
71-
* Convert a string to CalendarInterval. Return null if the input string is not a valid interval.
72-
* This method is case-insensitive.
73-
*/
74-
public static CalendarInterval fromString(String s) {
75-
try {
76-
return fromCaseInsensitiveString(s);
77-
} catch (IllegalArgumentException e) {
78-
return null;
79-
}
80-
}
81-
82-
/**
83-
* Convert a string to CalendarInterval. This method can handle
84-
* strings without the `interval` prefix and throws IllegalArgumentException
85-
* when the input string is not a valid interval.
86-
*
87-
* @throws IllegalArgumentException if the string is not a valid internal.
88-
*/
89-
public static CalendarInterval fromCaseInsensitiveString(String s) {
90-
if (s == null) {
91-
throw new IllegalArgumentException("Interval cannot be null");
92-
}
93-
String trimmed = s.trim();
94-
if (trimmed.isEmpty()) {
95-
throw new IllegalArgumentException("Interval cannot be blank");
96-
}
97-
String prefix = "interval";
98-
String intervalStr = trimmed;
99-
// Checks the given interval string does not start with the `interval` prefix
100-
if (!intervalStr.regionMatches(true, 0, prefix, 0, prefix.length())) {
101-
// Prepend `interval` if it does not present because
102-
// the regular expression strictly require it.
103-
intervalStr = prefix + " " + trimmed;
104-
} else if (intervalStr.length() == prefix.length()) {
105-
throw new IllegalArgumentException("Interval string must have time units");
106-
}
107-
108-
Matcher m = p.matcher(intervalStr);
109-
if (!m.matches()) {
110-
throw new IllegalArgumentException("Invalid interval: " + s);
111-
}
112-
113-
long months = toLong(m.group(1)) * 12 + toLong(m.group(2));
114-
long microseconds = toLong(m.group(3)) * MICROS_PER_WEEK;
115-
microseconds += toLong(m.group(4)) * MICROS_PER_DAY;
116-
microseconds += toLong(m.group(5)) * MICROS_PER_HOUR;
117-
microseconds += toLong(m.group(6)) * MICROS_PER_MINUTE;
118-
microseconds += toLong(m.group(7)) * MICROS_PER_SECOND;
119-
microseconds += toLong(m.group(8)) * MICROS_PER_MILLI;
120-
microseconds += toLong(m.group(9));
121-
return new CalendarInterval((int) months, microseconds);
122-
}
39+
"^([+|-])?((\\d+) )?((\\d+):)?(\\d+):(\\d+)(\\.(\\d+))?$");
12340

12441
public static long toLongWithRange(String fieldName,
12542
String s, long minValue, long maxValue) throws IllegalArgumentException {
@@ -242,72 +159,59 @@ public static CalendarInterval fromDayTimeString(String s, String from, String t
242159
return result;
243160
}
244161

245-
public static CalendarInterval fromSingleUnitString(String unit, String s)
162+
public static CalendarInterval fromUnitStrings(String[] units, String[] values)
246163
throws IllegalArgumentException {
164+
assert units.length == values.length;
165+
int months = 0;
166+
long microseconds = 0;
247167

248-
CalendarInterval result = null;
249-
if (s == null) {
250-
throw new IllegalArgumentException(String.format("Interval %s string was null", unit));
251-
}
252-
s = s.trim();
253-
Matcher m = quoteTrimPattern.matcher(s);
254-
if (!m.matches()) {
255-
throw new IllegalArgumentException(
256-
"Interval string does not match day-time format of 'd h:m:s.n': " + s);
257-
} else {
168+
for (int i = 0; i < units.length; i++) {
258169
try {
259-
switch (unit) {
170+
switch (units[i]) {
260171
case "year":
261-
int year = (int) toLongWithRange("year", m.group(1),
262-
Integer.MIN_VALUE / 12, Integer.MAX_VALUE / 12);
263-
result = new CalendarInterval(year * 12, 0L);
172+
months = Math.addExact(months, Math.multiplyExact(Integer.parseInt(values[i]), 12));
264173
break;
265174
case "month":
266-
int month = (int) toLongWithRange("month", m.group(1),
267-
Integer.MIN_VALUE, Integer.MAX_VALUE);
268-
result = new CalendarInterval(month, 0L);
175+
months = Math.addExact(months, Integer.parseInt(values[i]));
269176
break;
270177
case "week":
271-
long week = toLongWithRange("week", m.group(1),
272-
Long.MIN_VALUE / MICROS_PER_WEEK, Long.MAX_VALUE / MICROS_PER_WEEK);
273-
result = new CalendarInterval(0, week * MICROS_PER_WEEK);
178+
microseconds = Math.addExact(
179+
microseconds,
180+
Math.multiplyExact(Long.parseLong(values[i]), MICROS_PER_WEEK));
274181
break;
275182
case "day":
276-
long day = toLongWithRange("day", m.group(1),
277-
Long.MIN_VALUE / MICROS_PER_DAY, Long.MAX_VALUE / MICROS_PER_DAY);
278-
result = new CalendarInterval(0, day * MICROS_PER_DAY);
183+
microseconds = Math.addExact(
184+
microseconds,
185+
Math.multiplyExact(Long.parseLong(values[i]), MICROS_PER_DAY));
279186
break;
280187
case "hour":
281-
long hour = toLongWithRange("hour", m.group(1),
282-
Long.MIN_VALUE / MICROS_PER_HOUR, Long.MAX_VALUE / MICROS_PER_HOUR);
283-
result = new CalendarInterval(0, hour * MICROS_PER_HOUR);
188+
microseconds = Math.addExact(
189+
microseconds,
190+
Math.multiplyExact(Long.parseLong(values[i]), MICROS_PER_HOUR));
284191
break;
285192
case "minute":
286-
long minute = toLongWithRange("minute", m.group(1),
287-
Long.MIN_VALUE / MICROS_PER_MINUTE, Long.MAX_VALUE / MICROS_PER_MINUTE);
288-
result = new CalendarInterval(0, minute * MICROS_PER_MINUTE);
193+
microseconds = Math.addExact(
194+
microseconds,
195+
Math.multiplyExact(Long.parseLong(values[i]), MICROS_PER_MINUTE));
289196
break;
290197
case "second": {
291-
long micros = parseSecondNano(m.group(1));
292-
result = new CalendarInterval(0, micros);
198+
microseconds = Math.addExact(microseconds, parseSecondNano(values[i]));
293199
break;
294200
}
295201
case "millisecond":
296-
long millisecond = toLongWithRange("millisecond", m.group(1),
297-
Long.MIN_VALUE / MICROS_PER_MILLI, Long.MAX_VALUE / MICROS_PER_MILLI);
298-
result = new CalendarInterval(0, millisecond * MICROS_PER_MILLI);
202+
microseconds = Math.addExact(
203+
microseconds,
204+
Math.multiplyExact(Long.parseLong(values[i]), MICROS_PER_MILLI));
299205
break;
300-
case "microsecond": {
301-
long micros = Long.parseLong(m.group(1));
302-
result = new CalendarInterval(0, micros);
206+
case "microsecond":
207+
microseconds = Math.addExact(microseconds, Long.parseLong(values[i]));
303208
break;
304-
}
305209
}
306210
} catch (Exception e) {
307211
throw new IllegalArgumentException("Error parsing interval string: " + e.getMessage(), e);
308212
}
309213
}
310-
return result;
214+
return new CalendarInterval(months, microseconds);
311215
}
312216

313217
/**

common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java

Lines changed: 12 additions & 162 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@
1919

2020
import org.junit.Test;
2121

22-
import java.util.Arrays;
23-
2422
import static org.junit.Assert.*;
2523
import static org.apache.spark.unsafe.types.CalendarInterval.*;
2624

@@ -62,72 +60,6 @@ public void toStringTest() {
6260
assertEquals("interval 2 years 10 months 3 weeks 13 hours 123 microseconds", i.toString());
6361
}
6462

65-
@Test
66-
public void fromStringTest() {
67-
testSingleUnit("year", 3, 36, 0);
68-
testSingleUnit("month", 3, 3, 0);
69-
testSingleUnit("week", 3, 0, 3 * MICROS_PER_WEEK);
70-
testSingleUnit("day", 3, 0, 3 * MICROS_PER_DAY);
71-
testSingleUnit("hour", 3, 0, 3 * MICROS_PER_HOUR);
72-
testSingleUnit("minute", 3, 0, 3 * MICROS_PER_MINUTE);
73-
testSingleUnit("second", 3, 0, 3 * MICROS_PER_SECOND);
74-
testSingleUnit("millisecond", 3, 0, 3 * MICROS_PER_MILLI);
75-
testSingleUnit("microsecond", 3, 0, 3);
76-
77-
CalendarInterval result = new CalendarInterval(-5 * 12 + 23, 0);
78-
Arrays.asList(
79-
"interval -5 years 23 month",
80-
" -5 years 23 month",
81-
"interval -5 years 23 month ",
82-
" -5 years 23 month ",
83-
" interval -5 years 23 month ").forEach(input ->
84-
assertEquals(fromString(input), result)
85-
);
86-
87-
// Error cases
88-
Arrays.asList(
89-
"interval 3month 1 hour",
90-
"3month 1 hour",
91-
"interval 3 moth 1 hour",
92-
"3 moth 1 hour",
93-
"interval",
94-
"int",
95-
"",
96-
null).forEach(input -> assertNull(fromString(input)));
97-
}
98-
99-
@Test
100-
public void fromCaseInsensitiveStringTest() {
101-
for (String input : new String[]{"5 MINUTES", "5 minutes", "5 Minutes"}) {
102-
assertEquals(fromCaseInsensitiveString(input), new CalendarInterval(0, 5L * 60 * 1_000_000));
103-
}
104-
105-
for (String input : new String[]{null, "", " "}) {
106-
try {
107-
fromCaseInsensitiveString(input);
108-
fail("Expected to throw an exception for the invalid input");
109-
} catch (IllegalArgumentException e) {
110-
String msg = e.getMessage();
111-
if (input == null) assertTrue(msg.contains("cannot be null"));
112-
else assertTrue(msg.contains("cannot be blank"));
113-
}
114-
}
115-
116-
for (String input : new String[]{"interval", "interval1 day", "foo", "foo 1 day"}) {
117-
try {
118-
fromCaseInsensitiveString(input);
119-
fail("Expected to throw an exception for the invalid input");
120-
} catch (IllegalArgumentException e) {
121-
String msg = e.getMessage();
122-
if (input.trim().equalsIgnoreCase("interval")) {
123-
assertTrue(msg.contains("Interval string must have time units"));
124-
} else {
125-
assertTrue(msg.contains("Invalid interval:"));
126-
}
127-
}
128-
}
129-
}
130-
13163
@Test
13264
public void fromYearMonthStringTest() {
13365
String input;
@@ -194,107 +126,25 @@ public void fromDayTimeStringTest() {
194126
}
195127
}
196128

197-
@Test
198-
public void fromSingleUnitStringTest() {
199-
String input;
200-
CalendarInterval i;
201-
202-
input = "12";
203-
i = new CalendarInterval(12 * 12, 0L);
204-
assertEquals(fromSingleUnitString("year", input), i);
205-
206-
input = "100";
207-
i = new CalendarInterval(0, 100 * MICROS_PER_DAY);
208-
assertEquals(fromSingleUnitString("day", input), i);
209-
210-
input = "1999.38888";
211-
i = new CalendarInterval(0, 1999 * MICROS_PER_SECOND + 38);
212-
assertEquals(fromSingleUnitString("second", input), i);
213-
214-
try {
215-
input = String.valueOf(Integer.MAX_VALUE);
216-
fromSingleUnitString("year", input);
217-
fail("Expected to throw an exception for the invalid input");
218-
} catch (IllegalArgumentException e) {
219-
assertTrue(e.getMessage().contains("outside range"));
220-
}
221-
222-
try {
223-
input = String.valueOf(Long.MAX_VALUE / MICROS_PER_HOUR + 1);
224-
fromSingleUnitString("hour", input);
225-
fail("Expected to throw an exception for the invalid input");
226-
} catch (IllegalArgumentException e) {
227-
assertTrue(e.getMessage().contains("outside range"));
228-
}
229-
}
230-
231129
@Test
232130
public void addTest() {
233-
String input = "interval 3 month 1 hour";
234-
String input2 = "interval 2 month 100 hour";
235-
236-
CalendarInterval interval = fromString(input);
237-
CalendarInterval interval2 = fromString(input2);
238-
239-
assertEquals(interval.add(interval2), new CalendarInterval(5, 101 * MICROS_PER_HOUR));
131+
CalendarInterval input1 = new CalendarInterval(3, 1 * MICROS_PER_HOUR);
132+
CalendarInterval input2 = new CalendarInterval(2, 100 * MICROS_PER_HOUR);
133+
assertEquals(input1.add(input2), new CalendarInterval(5, 101 * MICROS_PER_HOUR));
240134

241-
input = "interval -10 month -81 hour";
242-
input2 = "interval 75 month 200 hour";
243-
244-
interval = fromString(input);
245-
interval2 = fromString(input2);
246-
247-
assertEquals(interval.add(interval2), new CalendarInterval(65, 119 * MICROS_PER_HOUR));
135+
input1 = new CalendarInterval(-10, -81 * MICROS_PER_HOUR);
136+
input2 = new CalendarInterval(75, 200 * MICROS_PER_HOUR);
137+
assertEquals(input1.add(input2), new CalendarInterval(65, 119 * MICROS_PER_HOUR));
248138
}
249139

250140
@Test
251141
public void subtractTest() {
252-
String input = "interval 3 month 1 hour";
253-
String input2 = "interval 2 month 100 hour";
254-
255-
CalendarInterval interval = fromString(input);
256-
CalendarInterval interval2 = fromString(input2);
257-
258-
assertEquals(interval.subtract(interval2), new CalendarInterval(1, -99 * MICROS_PER_HOUR));
259-
260-
input = "interval -10 month -81 hour";
261-
input2 = "interval 75 month 200 hour";
262-
263-
interval = fromString(input);
264-
interval2 = fromString(input2);
265-
266-
assertEquals(interval.subtract(interval2), new CalendarInterval(-85, -281 * MICROS_PER_HOUR));
267-
}
268-
269-
private static void testSingleUnit(String unit, int number, int months, long microseconds) {
270-
Arrays.asList("interval ", "").forEach(prefix -> {
271-
String input1 = prefix + number + " " + unit;
272-
String input2 = prefix + number + " " + unit + "s";
273-
CalendarInterval result = new CalendarInterval(months, microseconds);
274-
assertEquals(fromString(input1), result);
275-
assertEquals(fromString(input2), result);
276-
});
277-
}
278-
279-
@Test
280-
public void fromStringCaseSensitivityTest() {
281-
testSingleUnit("YEAR", 3, 36, 0);
282-
testSingleUnit("Month", 3, 3, 0);
283-
testSingleUnit("Week", 3, 0, 3 * MICROS_PER_WEEK);
284-
testSingleUnit("DAY", 3, 0, 3 * MICROS_PER_DAY);
285-
testSingleUnit("HouR", 3, 0, 3 * MICROS_PER_HOUR);
286-
testSingleUnit("MiNuTe", 3, 0, 3 * MICROS_PER_MINUTE);
287-
testSingleUnit("Second", 3, 0, 3 * MICROS_PER_SECOND);
288-
testSingleUnit("MilliSecond", 3, 0, 3 * MICROS_PER_MILLI);
289-
testSingleUnit("MicroSecond", 3, 0, 3);
290-
291-
String input;
292-
293-
input = "INTERVAL -5 YEARS 23 MONTHS";
294-
CalendarInterval result = new CalendarInterval(-5 * 12 + 23, 0);
295-
assertEquals(fromString(input), result);
142+
CalendarInterval input1 = new CalendarInterval(3, 1 * MICROS_PER_HOUR);
143+
CalendarInterval input2 = new CalendarInterval(2, 100 * MICROS_PER_HOUR);
144+
assertEquals(input1.subtract(input2), new CalendarInterval(1, -99 * MICROS_PER_HOUR));
296145

297-
assertNull(fromString("INTERVAL"));
298-
assertNull(fromString(" Interval "));
146+
input1 = new CalendarInterval(-10, -81 * MICROS_PER_HOUR);
147+
input2 = new CalendarInterval(75, 200 * MICROS_PER_HOUR);
148+
assertEquals(input1.subtract(input2), new CalendarInterval(-85, -281 * MICROS_PER_HOUR));
299149
}
300150
}

0 commit comments

Comments
 (0)