Skip to content

Commit 92112b5

Browse files
committed
simplify interval string parsing
1 parent e99a9f7 commit 92112b5

File tree

22 files changed

+249
-410
lines changed

22 files changed

+249
-410
lines changed

common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java

Lines changed: 50 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -32,94 +32,11 @@ public final class CalendarInterval implements Serializable {
3232
public static final long MICROS_PER_DAY = MICROS_PER_HOUR * 24;
3333
public static final long MICROS_PER_WEEK = MICROS_PER_DAY * 7;
3434

35-
/**
36-
* A function to generate regex which matches interval string's unit part like "3 years".
37-
*
38-
* First, we can leave out some units in interval string, and we only care about the value of
39-
* unit, so here we use non-capturing group to wrap the actual regex.
40-
* At the beginning of the actual regex, we should match spaces before the unit part.
41-
* Next is the number part, starts with an optional "-" to represent negative value. We use
42-
* capturing group to wrap this part as we need the value later.
43-
* Finally is the unit name, ends with an optional "s".
44-
*/
45-
private static String unitRegex(String unit) {
46-
return "(?:\\s+(-?\\d+)\\s+" + unit + "s?)?";
47-
}
48-
49-
private static Pattern p = Pattern.compile("interval" + unitRegex("year") + unitRegex("month") +
50-
unitRegex("week") + unitRegex("day") + unitRegex("hour") + unitRegex("minute") +
51-
unitRegex("second") + unitRegex("millisecond") + unitRegex("microsecond"),
52-
Pattern.CASE_INSENSITIVE);
53-
54-
private static Pattern yearMonthPattern =
55-
Pattern.compile("^(?:['|\"])?([+|-])?(\\d+)-(\\d+)(?:['|\"])?$");
35+
private static Pattern yearMonthPattern = Pattern.compile(
36+
"^([+|-])?(\\d+)-(\\d+)$");
5637

5738
private static Pattern dayTimePattern = Pattern.compile(
58-
"^(?:['|\"])?([+|-])?((\\d+) )?((\\d+):)?(\\d+):(\\d+)(\\.(\\d+))?(?:['|\"])?$");
59-
60-
private static Pattern quoteTrimPattern = Pattern.compile("^(?:['|\"])?(.*?)(?:['|\"])?$");
61-
62-
private static long toLong(String s) {
63-
if (s == null) {
64-
return 0;
65-
} else {
66-
return Long.parseLong(s);
67-
}
68-
}
69-
70-
/**
71-
* Convert a string to CalendarInterval. Return null if the input string is not a valid interval.
72-
* This method is case-insensitive.
73-
*/
74-
public static CalendarInterval fromString(String s) {
75-
try {
76-
return fromCaseInsensitiveString(s);
77-
} catch (IllegalArgumentException e) {
78-
return null;
79-
}
80-
}
81-
82-
/**
83-
* Convert a string to CalendarInterval. This method can handle
84-
* strings without the `interval` prefix and throws IllegalArgumentException
85-
* when the input string is not a valid interval.
86-
*
87-
* @throws IllegalArgumentException if the string is not a valid internal.
88-
*/
89-
public static CalendarInterval fromCaseInsensitiveString(String s) {
90-
if (s == null) {
91-
throw new IllegalArgumentException("Interval cannot be null");
92-
}
93-
String trimmed = s.trim();
94-
if (trimmed.isEmpty()) {
95-
throw new IllegalArgumentException("Interval cannot be blank");
96-
}
97-
String prefix = "interval";
98-
String intervalStr = trimmed;
99-
// Checks the given interval string does not start with the `interval` prefix
100-
if (!intervalStr.regionMatches(true, 0, prefix, 0, prefix.length())) {
101-
// Prepend `interval` if it does not present because
102-
// the regular expression strictly require it.
103-
intervalStr = prefix + " " + trimmed;
104-
} else if (intervalStr.length() == prefix.length()) {
105-
throw new IllegalArgumentException("Interval string must have time units");
106-
}
107-
108-
Matcher m = p.matcher(intervalStr);
109-
if (!m.matches()) {
110-
throw new IllegalArgumentException("Invalid interval: " + s);
111-
}
112-
113-
long months = toLong(m.group(1)) * 12 + toLong(m.group(2));
114-
long microseconds = toLong(m.group(3)) * MICROS_PER_WEEK;
115-
microseconds += toLong(m.group(4)) * MICROS_PER_DAY;
116-
microseconds += toLong(m.group(5)) * MICROS_PER_HOUR;
117-
microseconds += toLong(m.group(6)) * MICROS_PER_MINUTE;
118-
microseconds += toLong(m.group(7)) * MICROS_PER_SECOND;
119-
microseconds += toLong(m.group(8)) * MICROS_PER_MILLI;
120-
microseconds += toLong(m.group(9));
121-
return new CalendarInterval((int) months, microseconds);
122-
}
39+
"^([+|-])?((\\d+) )?((\\d+):)?(\\d+):(\\d+)(\\.(\\d+))?$");
12340

12441
public static long toLongWithRange(String fieldName,
12542
String s, long minValue, long maxValue) throws IllegalArgumentException {
@@ -250,62 +167,55 @@ public static CalendarInterval fromSingleUnitString(String unit, String s)
250167
throw new IllegalArgumentException(String.format("Interval %s string was null", unit));
251168
}
252169
s = s.trim();
253-
Matcher m = quoteTrimPattern.matcher(s);
254-
if (!m.matches()) {
255-
throw new IllegalArgumentException(
256-
"Interval string does not match day-time format of 'd h:m:s.n': " + s);
257-
} else {
258-
try {
259-
switch (unit) {
260-
case "year":
261-
int year = (int) toLongWithRange("year", m.group(1),
262-
Integer.MIN_VALUE / 12, Integer.MAX_VALUE / 12);
263-
result = new CalendarInterval(year * 12, 0L);
264-
break;
265-
case "month":
266-
int month = (int) toLongWithRange("month", m.group(1),
267-
Integer.MIN_VALUE, Integer.MAX_VALUE);
268-
result = new CalendarInterval(month, 0L);
269-
break;
270-
case "week":
271-
long week = toLongWithRange("week", m.group(1),
272-
Long.MIN_VALUE / MICROS_PER_WEEK, Long.MAX_VALUE / MICROS_PER_WEEK);
273-
result = new CalendarInterval(0, week * MICROS_PER_WEEK);
274-
break;
275-
case "day":
276-
long day = toLongWithRange("day", m.group(1),
277-
Long.MIN_VALUE / MICROS_PER_DAY, Long.MAX_VALUE / MICROS_PER_DAY);
278-
result = new CalendarInterval(0, day * MICROS_PER_DAY);
279-
break;
280-
case "hour":
281-
long hour = toLongWithRange("hour", m.group(1),
282-
Long.MIN_VALUE / MICROS_PER_HOUR, Long.MAX_VALUE / MICROS_PER_HOUR);
283-
result = new CalendarInterval(0, hour * MICROS_PER_HOUR);
284-
break;
285-
case "minute":
286-
long minute = toLongWithRange("minute", m.group(1),
287-
Long.MIN_VALUE / MICROS_PER_MINUTE, Long.MAX_VALUE / MICROS_PER_MINUTE);
288-
result = new CalendarInterval(0, minute * MICROS_PER_MINUTE);
289-
break;
290-
case "second": {
291-
long micros = parseSecondNano(m.group(1));
292-
result = new CalendarInterval(0, micros);
293-
break;
294-
}
295-
case "millisecond":
296-
long millisecond = toLongWithRange("millisecond", m.group(1),
297-
Long.MIN_VALUE / MICROS_PER_MILLI, Long.MAX_VALUE / MICROS_PER_MILLI);
298-
result = new CalendarInterval(0, millisecond * MICROS_PER_MILLI);
299-
break;
300-
case "microsecond": {
301-
long micros = Long.parseLong(m.group(1));
302-
result = new CalendarInterval(0, micros);
303-
break;
304-
}
170+
try {
171+
switch (unit) {
172+
case "year":
173+
int year = (int) toLongWithRange("year", s,
174+
Integer.MIN_VALUE / 12, Integer.MAX_VALUE / 12);
175+
result = new CalendarInterval(year * 12, 0L);
176+
break;
177+
case "month":
178+
int month = (int) toLongWithRange("month", s,
179+
Integer.MIN_VALUE, Integer.MAX_VALUE);
180+
result = new CalendarInterval(month, 0L);
181+
break;
182+
case "week":
183+
long week = toLongWithRange("week", s,
184+
Long.MIN_VALUE / MICROS_PER_WEEK, Long.MAX_VALUE / MICROS_PER_WEEK);
185+
result = new CalendarInterval(0, week * MICROS_PER_WEEK);
186+
break;
187+
case "day":
188+
long day = toLongWithRange("day", s,
189+
Long.MIN_VALUE / MICROS_PER_DAY, Long.MAX_VALUE / MICROS_PER_DAY);
190+
result = new CalendarInterval(0, day * MICROS_PER_DAY);
191+
break;
192+
case "hour":
193+
long hour = toLongWithRange("hour", s,
194+
Long.MIN_VALUE / MICROS_PER_HOUR, Long.MAX_VALUE / MICROS_PER_HOUR);
195+
result = new CalendarInterval(0, hour * MICROS_PER_HOUR);
196+
break;
197+
case "minute":
198+
long minute = toLongWithRange("minute", s,
199+
Long.MIN_VALUE / MICROS_PER_MINUTE, Long.MAX_VALUE / MICROS_PER_MINUTE);
200+
result = new CalendarInterval(0, minute * MICROS_PER_MINUTE);
201+
break;
202+
case "second": {
203+
long micros = parseSecondNano(s);
204+
result = new CalendarInterval(0, micros);
205+
break;
305206
}
306-
} catch (Exception e) {
307-
throw new IllegalArgumentException("Error parsing interval string: " + e.getMessage(), e);
207+
case "millisecond":
208+
long millisecond = toLongWithRange("millisecond", s,
209+
Long.MIN_VALUE / MICROS_PER_MILLI, Long.MAX_VALUE / MICROS_PER_MILLI);
210+
result = new CalendarInterval(0, millisecond * MICROS_PER_MILLI);
211+
break;
212+
case "microsecond":
213+
long micros = Long.parseLong(s);
214+
result = new CalendarInterval(0, micros);
215+
break;
308216
}
217+
} catch (Exception e) {
218+
throw new IllegalArgumentException("Error parsing interval string: " + e.getMessage(), e);
309219
}
310220
return result;
311221
}

0 commit comments

Comments
 (0)