Skip to content

Commit d59876f

Browse files
authored
Added min/max UDFs (#325)
* Add ST_MakePolygon, st_polygon * Add docs * Add ST_dimention * Add license * Add ST_Endpoint, ST_PointN * Reimplement ST_Point to work with snowflake linestring * Add St_x, st_y, st_srid * St_min max for x,y * Add macro for end and start points
1 parent 1f062ce commit d59876f

File tree

7 files changed

+449
-264
lines changed

7 files changed

+449
-264
lines changed

crates/runtime/src/datafusion/functions/geospatial/accessors/dim.rs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ use arrow_schema::DataType;
2626
use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER;
2727
use datafusion::logical_expr::{ColumnarValue, Documentation, ScalarUDFImpl, Signature};
2828
use datafusion_common::{DataFusionError, Result};
29+
use datafusion_expr::ScalarFunctionArgs;
2930
use geoarrow::array::AsNativeArray;
3031
use geoarrow::datatypes::NativeType;
3132
use geoarrow::scalar::Geometry;
@@ -63,8 +64,8 @@ impl ScalarUDFImpl for GeomDimension {
6364
Ok(DataType::UInt8)
6465
}
6566

66-
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
67-
dim_impl(args)
67+
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
68+
dim_impl(&args.args)
6869
}
6970

7071
fn documentation(&self) -> Option<&Documentation> {
@@ -198,9 +199,13 @@ mod tests {
198199
];
199200

200201
for (array, exp) in args {
201-
let args = vec![ColumnarValue::Array(array.clone())];
202+
let args = ScalarFunctionArgs {
203+
args: vec![ColumnarValue::Array(array)],
204+
number_rows: 2,
205+
return_type: &DataType::Null,
206+
};
202207
let dim_fn = GeomDimension::new();
203-
let result = dim_fn.invoke_batch(&args, 2).unwrap().to_array(2).unwrap();
208+
let result = dim_fn.invoke_with_args(args).unwrap().to_array(2).unwrap();
204209
let result = result.as_primitive::<UInt8Type>();
205210
assert_eq!(result.value(0), exp);
206211
}
Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use crate::datafusion::functions::geospatial::data_types::{
19+
any_single_geometry_type_input, parse_to_native_array,
20+
};
21+
use arrow_array::builder::Float64Builder;
22+
use arrow_schema::DataType;
23+
use arrow_schema::DataType::Float64;
24+
use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER;
25+
use datafusion::logical_expr::{ColumnarValue, Documentation, ScalarUDFImpl, Signature};
26+
use datafusion_common::{DataFusionError, Result};
27+
use datafusion_expr::ScalarFunctionArgs;
28+
use geo_traits::CoordTrait;
29+
use geo_traits::RectTrait;
30+
use geoarrow::algorithm::geo::BoundingRect;
31+
use geoarrow::trait_::ArrayAccessor;
32+
use std::any::Any;
33+
use std::sync::{Arc, OnceLock};
34+
35+
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
36+
37+
macro_rules! create_extremum_udf {
38+
($name:ident, $func_name:expr, $index:expr, $is_max:expr, $doc:expr, $syntax:expr) => {
39+
#[derive(Debug)]
40+
pub struct $name {
41+
signature: Signature,
42+
}
43+
44+
impl $name {
45+
pub fn new() -> Self {
46+
Self {
47+
signature: any_single_geometry_type_input(),
48+
}
49+
}
50+
}
51+
52+
impl ScalarUDFImpl for $name {
53+
fn as_any(&self) -> &dyn Any {
54+
self
55+
}
56+
57+
fn name(&self) -> &'static str {
58+
$func_name
59+
}
60+
61+
fn signature(&self) -> &Signature {
62+
&self.signature
63+
}
64+
65+
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
66+
Ok(Float64)
67+
}
68+
69+
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
70+
get_extremum(&args.args, $index, $is_max)
71+
}
72+
73+
fn documentation(&self) -> Option<&Documentation> {
74+
Some(DOCUMENTATION.get_or_init(|| {
75+
Documentation::builder(DOC_SECTION_OTHER, $doc, $syntax)
76+
.with_argument("g1", "geometry")
77+
.with_related_udf("st_xmin")
78+
.with_related_udf("st_ymin")
79+
.with_related_udf("st_zmin")
80+
.with_related_udf("st_xmax")
81+
.build()
82+
}))
83+
}
84+
}
85+
};
86+
}
87+
88+
create_extremum_udf!(
89+
MinX,
90+
"st_xmin",
91+
0,
92+
false,
93+
"Returns the minimum longitude (X coordinate) of all points contained in the specified geometry.",
94+
"ST_XMin(geom)"
95+
);
96+
97+
create_extremum_udf!(
98+
MinY,
99+
"st_ymin",
100+
1,
101+
false,
102+
"Returns the minimum latitude (Y coordinate) of all points contained in the specified geometry.",
103+
"ST_YMin(geom)"
104+
);
105+
106+
create_extremum_udf!(
107+
MaxX,
108+
"st_xmax",
109+
0,
110+
true,
111+
"Returns the maximum longitude (X coordinate) of all points contained in the specified geometry.",
112+
"ST_XMax(geom)"
113+
);
114+
115+
create_extremum_udf!(
116+
MaxY,
117+
"st_ymax",
118+
1,
119+
true,
120+
"Returns the maximum latitude (Y coordinate) of all points contained in the specified geometry.",
121+
"ST_YMax(geom)"
122+
);
123+
124+
fn get_extremum(args: &[ColumnarValue], index: i64, is_max: bool) -> Result<ColumnarValue> {
125+
let arg = ColumnarValue::values_to_arrays(args)?
126+
.into_iter()
127+
.next()
128+
.ok_or_else(|| DataFusionError::Execution("Expected only one argument".to_string()))?;
129+
130+
let array = ColumnarValue::values_to_arrays(args)?
131+
.into_iter()
132+
.next()
133+
.ok_or_else(|| DataFusionError::Execution("Expected at least one argument".to_string()))?;
134+
135+
let native_array = parse_to_native_array(&array)?;
136+
let native_array_ref = native_array
137+
.as_ref()
138+
.bounding_rect()
139+
.map_err(|e| DataFusionError::Execution(format!("Error getting bounding rect: {e}")))?;
140+
141+
let mut output_array = Float64Builder::with_capacity(arg.len());
142+
for rect in native_array_ref.iter() {
143+
match (index, is_max) {
144+
(0, false) => output_array.append_option(rect.map(|r| r.min().x())),
145+
(1, false) => output_array.append_option(rect.map(|r| r.min().y())),
146+
(0, true) => output_array.append_option(rect.map(|r| r.max().x())),
147+
(1, true) => output_array.append_option(rect.map(|r| r.max().y())),
148+
_ => {
149+
return Err(DataFusionError::Execution(
150+
"Index out of bounds".to_string(),
151+
))
152+
}
153+
}
154+
}
155+
Ok(ColumnarValue::Array(Arc::new(output_array.finish())))
156+
}
157+
158+
#[cfg(test)]
159+
mod tests {
160+
use super::*;
161+
use super::{MaxX, MaxY, MinX, MinY};
162+
use arrow_array::cast::AsArray;
163+
use arrow_array::types::Float64Type;
164+
use arrow_array::ArrayRef;
165+
use datafusion::logical_expr::ColumnarValue;
166+
use geo_types::{line_string, point, polygon};
167+
use geoarrow::array::{CoordType, LineStringBuilder, PointBuilder, PolygonBuilder};
168+
use geoarrow::datatypes::Dimension;
169+
use geoarrow::ArrayBase;
170+
171+
#[test]
172+
#[allow(clippy::unwrap_used, clippy::float_cmp)]
173+
fn test_extrema() {
174+
let dim = Dimension::XY;
175+
let ct = CoordType::Separated;
176+
177+
let args: [(ArrayRef, [[f64; 2]; 4]); 3] = [
178+
(
179+
{
180+
let data = vec![
181+
line_string![(x: 0., y: 0.), (x: 1., y: 0.), (x: 1., y: 1.), (x: 0., y: 1.), (x: 0., y: 0.)],
182+
line_string![(x: -60., y: -30.), (x: 60., y: -30.)],
183+
];
184+
let array =
185+
LineStringBuilder::from_line_strings(&data, dim, ct, Arc::default())
186+
.finish();
187+
array.to_array_ref()
188+
},
189+
[[0., -60.], [1., 60.], [0., -30.], [1., -30.]],
190+
),
191+
(
192+
{
193+
let data = [point! {x: 0., y: 0.}, point! {x: 1., y: 1.}];
194+
let array =
195+
PointBuilder::from_points(data.iter(), dim, ct, Arc::default()).finish();
196+
array.to_array_ref()
197+
},
198+
[[0., 1.], [0., 1.], [0., 1.], [0., 1.]],
199+
),
200+
(
201+
{
202+
let data = vec![
203+
polygon![(x: 3.3, y: 30.2), (x: 4.7, y: 24.6), (x: 13.4, y: 25.1), (x: 24.4, y: 30.0),(x:3.3,y:30.4)],
204+
polygon![(x: 3.2, y: 11.1), (x: 4.7, y: 24.6), (x: 13.4, y: 25.1), (x: 19.4, y: 31.0),(x:3.3,y:36.4)],
205+
];
206+
let array =
207+
PolygonBuilder::from_polygons(&data, dim, ct, Arc::default()).finish();
208+
array.to_array_ref()
209+
},
210+
[[3.3, 3.2], [24.4, 19.4], [24.6, 11.1], [30.4, 36.4]],
211+
),
212+
];
213+
214+
let udfs: Vec<Box<dyn ScalarUDFImpl>> = vec![
215+
Box::new(MinX::new()),
216+
Box::new(MaxX::new()),
217+
Box::new(MinY::new()),
218+
Box::new(MaxY::new()),
219+
];
220+
221+
for (array, exp) in args {
222+
for (i, udf) in udfs.iter().enumerate() {
223+
let res = udf
224+
.invoke_with_args(ScalarFunctionArgs {
225+
args: vec![ColumnarValue::Array(array.clone())],
226+
number_rows: 2,
227+
return_type: &DataType::Null,
228+
})
229+
.unwrap()
230+
.to_array(2)
231+
.unwrap();
232+
let res = res.as_primitive::<Float64Type>();
233+
assert_eq!(res.value(0), exp[i][0]);
234+
assert_eq!(res.value(1), exp[i][1]);
235+
}
236+
}
237+
}
238+
}

0 commit comments

Comments
 (0)