Skip to content

Commit c14fb6e

Browse files
DanCodedThiseadgbear
authored andcommitted
Added support for day and other datetime keywords in DF (#294)
* start here * no macro, just simple vistor mut * remvoe redundant trait creation * integration test add, fixed logic, next unit tests * activated impl and cargo fmt + clippy * added unit tests + impl extension * cargo fmt + clippy * cargo clippy * cargo clippy * removed redundant check * fix * merged with Artem PR * cargo fmt + clippy * small fixes * cargo fmt + clippy
1 parent c485aa8 commit c14fb6e

File tree

7 files changed

+362
-82
lines changed

7 files changed

+362
-82
lines changed

crates/runtime/src/execution/datafusion/functions/convert_timezone.rs

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -341,11 +341,10 @@ mod tests {
341341
);
342342
assert_eq!(
343343
result, expected,
344-
"convert_timezone created wrong value for {}",
345-
source_timestamp_tz_value
346-
)
344+
"convert_timezone created wrong value for {source_timestamp_tz_value}"
345+
);
347346
}
348-
_ => panic!("Conversion of {} failed", source_timestamp_tz),
347+
_ => panic!("Conversion of {source_timestamp_tz} failed"),
349348
}
350349
}
351350
#[test]
@@ -371,11 +370,10 @@ mod tests {
371370
);
372371
assert_eq!(
373372
result, expected,
374-
"convert_timezone created wrong value for {}",
375-
source_timestamp_tz_value
376-
)
373+
"convert_timezone created wrong value for {source_timestamp_tz_value}"
374+
);
377375
}
378-
_ => panic!("Conversion of {} failed", source_timestamp_tz_value),
376+
_ => panic!("Conversion of {source_timestamp_tz_value} failed"),
379377
}
380378
}
381379
#[test]
@@ -403,11 +401,10 @@ mod tests {
403401
);
404402
assert_ne!(
405403
result, expected,
406-
"convert_timezone created wrong value for {}",
407-
source_timestamp_tz_value
408-
)
404+
"convert_timezone created wrong value for {source_timestamp_tz_value}"
405+
);
409406
}
410-
_ => panic!("Conversion of {} failed", source_timestamp_tz_value),
407+
_ => panic!("Conversion of {source_timestamp_tz_value} failed"),
411408
}
412409
}
413410
}

crates/runtime/src/execution/datafusion/functions/date_add.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ mod tests {
248248
Some(1736600400000000i64),
249249
Some(Arc::from(String::from("+00").into_boxed_str())),
250250
);
251-
assert_eq!(&result, &expected, "date_add created a wrong value")
251+
assert_eq!(&result, &expected, "date_add created a wrong value");
252252
}
253253
_ => panic!("Conversion failed"),
254254
}
@@ -283,7 +283,7 @@ mod tests {
283283
)
284284
.to_array()
285285
.unwrap();
286-
assert_eq!(&result, &expected, "date_add created a wrong value")
286+
assert_eq!(&result, &expected, "date_add created a wrong value");
287287
}
288288
_ => panic!("Conversion failed"),
289289
}
@@ -318,7 +318,7 @@ mod tests {
318318
))
319319
.to_array(2)
320320
.unwrap();
321-
assert_eq!(&result, &expected, "date_add created a wrong value")
321+
assert_eq!(&result, &expected, "date_add created a wrong value");
322322
}
323323
_ => panic!("Conversion failed"),
324324
}

crates/runtime/src/execution/datafusion/functions/mod.rs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818
use std::sync::Arc;
1919

2020
use datafusion::{common::Result, execution::FunctionRegistry, logical_expr::ScalarUDF};
21-
use sqlparser::ast::Value::SingleQuotedString;
22-
use sqlparser::ast::{Expr, Function, FunctionArg, FunctionArgExpr, FunctionArguments};
21+
use sqlparser::ast::Value::{self, SingleQuotedString};
22+
use sqlparser::ast::{
23+
Expr, Function, FunctionArg, FunctionArgExpr, FunctionArgumentList, FunctionArguments, Ident,
24+
};
2325

2426
mod convert_timezone;
2527
mod date_add;
@@ -99,4 +101,18 @@ pub fn visit_functions_expressions(func: &mut Function) {
99101
_ => func_name,
100102
};
101103
func.name = sqlparser::ast::ObjectName(vec![sqlparser::ast::Ident::new(name)]);
104+
if let FunctionArguments::List(FunctionArgumentList { args, .. }) = &mut func.args {
105+
match func_name {
106+
"dateadd" | "date_add" | "datediff" | "date_diff" => {
107+
if let Some(FunctionArg::Unnamed(FunctionArgExpr::Expr(ident))) =
108+
args.iter_mut().next()
109+
{
110+
if let Expr::Identifier(Ident { value, .. }) = ident {
111+
*ident = Expr::Value(Value::SingleQuotedString(value.clone()));
112+
}
113+
}
114+
}
115+
_ => {}
116+
}
117+
}
102118
}

crates/runtime/src/execution/query.rs

Lines changed: 141 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,6 @@ impl IceBucketQuery {
128128
.parse_query(query.as_str())
129129
.context(super::error::DataFusionSnafu)?;
130130
Self::postprocess_query_statement(&mut statement);
131-
132131
// statement = self.update_statement_references(statement, warehouse_name);
133132
// query = statement.to_string();
134133

@@ -264,18 +263,15 @@ impl IceBucketQuery {
264263
/// Panics if .
265264
#[must_use]
266265
#[allow(clippy::unwrap_used)]
267-
#[tracing::instrument(level = "trace", skip(self), ret)]
268-
pub fn preprocess_query(&self, query: &str) -> String {
266+
#[tracing::instrument(level = "trace", ret)]
267+
pub fn preprocess_query(query: &str) -> String {
269268
// Replace field[0].subfield -> json_get(json_get(field, 0), 'subfield')
270269
// TODO: This regex should be a static allocation
271270
let re = regex::Regex::new(r"(\w+.\w+)\[(\d+)][:\.](\w+)").unwrap();
272-
let date_add =
273-
regex::Regex::new(r"(date|time|timestamp)(_?add|_?diff)\(\s*([a-zA-Z]+),").unwrap();
274-
271+
//date_add processing moved to `postprocess_query_statement`
275272
let mut query = re
276273
.replace_all(query, "json_get(json_get($1, $2), '$3')")
277274
.to_string();
278-
query = date_add.replace_all(&query, "$1$2('$3',").to_string();
279275
let alter_iceberg_table = regex::Regex::new(r"alter\s+iceberg\s+table").unwrap();
280276
query = alter_iceberg_table
281277
.replace_all(&query, "alter table")
@@ -1406,8 +1402,144 @@ mod tests {
14061402
}
14071403

14081404
#[cfg(test)]
1409-
mod test {
1410-
use crate::datafusion::execution::SqlExecutor;
1405+
mod tests {
1406+
use super::SqlExecutor;
1407+
use crate::datafusion::{session::SessionParams, type_planner::CustomTypePlanner};
1408+
use datafusion::sql::parser::Statement as DFStatement;
1409+
use datafusion::sql::sqlparser::ast::visit_expressions;
1410+
use datafusion::sql::sqlparser::ast::{Expr, ObjectName};
1411+
use datafusion::{
1412+
execution::SessionStateBuilder,
1413+
prelude::{SessionConfig, SessionContext},
1414+
};
1415+
use datafusion_iceberg::planner::IcebergQueryPlanner;
1416+
use sqlparser::ast::Value;
1417+
use sqlparser::ast::{
1418+
Function, FunctionArg, FunctionArgExpr, FunctionArgumentList, FunctionArguments,
1419+
};
1420+
use std::ops::ControlFlow;
1421+
use std::sync::Arc;
1422+
1423+
struct Test<'a, T> {
1424+
input: &'a str,
1425+
expected: T,
1426+
should_work: bool,
1427+
}
1428+
impl<'a, T> Test<'a, T> {
1429+
pub const fn new(input: &'a str, expected: T, should_work: bool) -> Self {
1430+
Self {
1431+
input,
1432+
expected,
1433+
should_work,
1434+
}
1435+
}
1436+
}
1437+
#[test]
1438+
#[allow(
1439+
clippy::unwrap_used,
1440+
clippy::explicit_iter_loop,
1441+
clippy::collapsible_match
1442+
)]
1443+
fn test_timestamp_keywords_postprocess() {
1444+
let state = SessionStateBuilder::new()
1445+
.with_config(
1446+
SessionConfig::new()
1447+
.with_information_schema(true)
1448+
.with_option_extension(SessionParams::default())
1449+
.set_str("datafusion.sql_parser.dialect", "SNOWFLAKE"),
1450+
)
1451+
.with_default_features()
1452+
.with_query_planner(Arc::new(IcebergQueryPlanner {}))
1453+
.with_type_planner(Arc::new(CustomTypePlanner {}))
1454+
.build();
1455+
let ctx = SessionContext::new_with_state(state);
1456+
let executor = SqlExecutor::new(ctx).unwrap();
1457+
let test = vec![
1458+
Test::new(
1459+
"SELECT dateadd(year, 5, '2025-06-01')",
1460+
Value::SingleQuotedString("year".to_owned()),
1461+
true,
1462+
),
1463+
Test::new(
1464+
"SELECT dateadd(\"year\", 5, '2025-06-01')",
1465+
Value::SingleQuotedString("year".to_owned()),
1466+
true,
1467+
),
1468+
Test::new(
1469+
"SELECT dateadd('year', 5, '2025-06-01')",
1470+
Value::SingleQuotedString("year".to_owned()),
1471+
true,
1472+
),
1473+
Test::new(
1474+
"SELECT dateadd(\"'year'\", 5, '2025-06-01')",
1475+
Value::SingleQuotedString("year".to_owned()),
1476+
false,
1477+
),
1478+
Test::new(
1479+
"SELECT dateadd(\'year\', 5, '2025-06-01')",
1480+
Value::SingleQuotedString("year".to_owned()),
1481+
true,
1482+
),
1483+
Test::new(
1484+
"SELECT datediff(day, 5, '2025-06-01')",
1485+
Value::SingleQuotedString("day".to_owned()),
1486+
true,
1487+
),
1488+
Test::new(
1489+
"SELECT datediff('week', 5, '2025-06-01')",
1490+
Value::SingleQuotedString("week".to_owned()),
1491+
true,
1492+
),
1493+
Test::new(
1494+
"SELECT datediff(nsecond, 10000000, '2025-06-01')",
1495+
Value::SingleQuotedString("nsecond".to_owned()),
1496+
true,
1497+
),
1498+
Test::new(
1499+
"SELECT date_diff(hour, 5, '2025-06-01')",
1500+
Value::SingleQuotedString("hour".to_owned()),
1501+
true,
1502+
),
1503+
Test::new(
1504+
"SELECT date_add(us, 100000, '2025-06-01')",
1505+
Value::SingleQuotedString("us".to_owned()),
1506+
true,
1507+
),
1508+
];
1509+
for test in test.iter() {
1510+
let mut statement = executor.parse_query(test.input).unwrap();
1511+
SqlExecutor::postprocess_query_statement(&mut statement);
1512+
if let DFStatement::Statement(statement) = statement {
1513+
visit_expressions(&statement, |expr| {
1514+
if let Expr::Function(Function {
1515+
name: ObjectName(idents),
1516+
args: FunctionArguments::List(FunctionArgumentList { args, .. }),
1517+
..
1518+
}) = expr
1519+
{
1520+
match idents.first().unwrap().value.as_str() {
1521+
"dateadd" | "date_add" | "datediff" | "date_diff" => {
1522+
if let FunctionArg::Unnamed(FunctionArgExpr::Expr(ident)) =
1523+
args.iter().next().unwrap()
1524+
{
1525+
if let Expr::Value(found) = ident {
1526+
if test.should_work {
1527+
assert_eq!(*found, test.expected);
1528+
} else {
1529+
assert_ne!(*found, test.expected);
1530+
}
1531+
}
1532+
}
1533+
}
1534+
_ => {}
1535+
}
1536+
}
1537+
ControlFlow::<()>::Continue(())
1538+
});
1539+
}
1540+
}
1541+
}
1542+
14111543
use datafusion::sql::parser::DFParser;
14121544

14131545
#[allow(clippy::unwrap_used)]

crates/runtime/src/tests/queries.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
// use crate::tests::utils::macros::test_query;
18+
use crate::tests::utils::macros::test_query;
1919

20+
test_query!(select_date_add_diff, "SELECT dateadd(day, 5, '2025-06-01')");
2021
// // SELECT
2122
// test_query!(select_star, "SELECT * FROM employee_table");
2223
// test_query!(select_ilike, "SELECT * ILIKE '%id%' FROM employee_table;");

0 commit comments

Comments
 (0)