1616// under the License.
1717
1818use fnv:: FnvHashSet ;
19+ use serde_bytes:: ByteBuf ;
1920
2021use crate :: expr:: visitors:: bound_predicate_visitor:: { BoundPredicateVisitor , visit} ;
2122use crate :: expr:: { BoundPredicate , BoundReference } ;
@@ -42,13 +43,13 @@ impl ManifestEvaluator {
4243 /// see if this `ManifestFile` could possibly contain data that matches
4344 /// the scan's filter.
4445 pub ( crate ) fn eval ( & self , manifest_file : & ManifestFile ) -> Result < bool > {
45- if manifest_file. partitions . is_empty ( ) {
46- return Ok ( true ) ;
46+ match & manifest_file. partitions {
47+ Some ( p) if !p. is_empty ( ) => {
48+ let mut evaluator = ManifestFilterVisitor :: new ( p) ;
49+ visit ( & mut evaluator, & self . partition_filter )
50+ }
51+ _ => Ok ( true ) ,
4752 }
48-
49- let mut evaluator = ManifestFilterVisitor :: new ( & manifest_file. partitions ) ;
50-
51- visit ( & mut evaluator, & self . partition_filter )
5253 }
5354}
5455
@@ -154,9 +155,19 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> {
154155 _predicate : & BoundPredicate ,
155156 ) -> crate :: Result < bool > {
156157 let field = self . field_summary_for_reference ( reference) ;
158+
157159 match & field. lower_bound {
158- Some ( bound) if datum <= bound => ROWS_CANNOT_MATCH ,
159- Some ( _) => ROWS_MIGHT_MATCH ,
160+ Some ( bound_bytes) => {
161+ let bound = ManifestFilterVisitor :: bytes_to_datum (
162+ bound_bytes,
163+ * reference. field ( ) . field_type . clone ( ) ,
164+ ) ;
165+ if datum <= & bound {
166+ ROWS_CANNOT_MATCH
167+ } else {
168+ ROWS_MIGHT_MATCH
169+ }
170+ }
160171 None => ROWS_CANNOT_MATCH ,
161172 }
162173 }
@@ -169,8 +180,17 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> {
169180 ) -> crate :: Result < bool > {
170181 let field = self . field_summary_for_reference ( reference) ;
171182 match & field. lower_bound {
172- Some ( bound) if datum < bound => ROWS_CANNOT_MATCH ,
173- Some ( _) => ROWS_MIGHT_MATCH ,
183+ Some ( bound_bytes) => {
184+ let bound = ManifestFilterVisitor :: bytes_to_datum (
185+ bound_bytes,
186+ * reference. field ( ) . field_type . clone ( ) ,
187+ ) ;
188+ if datum < & bound {
189+ ROWS_CANNOT_MATCH
190+ } else {
191+ ROWS_MIGHT_MATCH
192+ }
193+ }
174194 None => ROWS_CANNOT_MATCH ,
175195 }
176196 }
@@ -183,8 +203,17 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> {
183203 ) -> crate :: Result < bool > {
184204 let field = self . field_summary_for_reference ( reference) ;
185205 match & field. upper_bound {
186- Some ( bound) if datum >= bound => ROWS_CANNOT_MATCH ,
187- Some ( _) => ROWS_MIGHT_MATCH ,
206+ Some ( bound_bytes) => {
207+ let bound = ManifestFilterVisitor :: bytes_to_datum (
208+ bound_bytes,
209+ * reference. field ( ) . field_type . clone ( ) ,
210+ ) ;
211+ if datum >= & bound {
212+ ROWS_CANNOT_MATCH
213+ } else {
214+ ROWS_MIGHT_MATCH
215+ }
216+ }
188217 None => ROWS_CANNOT_MATCH ,
189218 }
190219 }
@@ -197,8 +226,17 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> {
197226 ) -> crate :: Result < bool > {
198227 let field = self . field_summary_for_reference ( reference) ;
199228 match & field. upper_bound {
200- Some ( bound) if datum > bound => ROWS_CANNOT_MATCH ,
201- Some ( _) => ROWS_MIGHT_MATCH ,
229+ Some ( bound_bytes) => {
230+ let bound = ManifestFilterVisitor :: bytes_to_datum (
231+ bound_bytes,
232+ * reference. field ( ) . field_type . clone ( ) ,
233+ ) ;
234+ if datum > & bound {
235+ ROWS_CANNOT_MATCH
236+ } else {
237+ ROWS_MIGHT_MATCH
238+ }
239+ }
202240 None => ROWS_CANNOT_MATCH ,
203241 }
204242 }
@@ -215,14 +253,22 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> {
215253 return ROWS_CANNOT_MATCH ;
216254 }
217255
218- if let Some ( lower_bound) = & field. lower_bound {
219- if lower_bound > datum {
256+ if let Some ( lower_bound_bytes) = & field. lower_bound {
257+ let lower_bound = ManifestFilterVisitor :: bytes_to_datum (
258+ lower_bound_bytes,
259+ * reference. field ( ) . field_type . clone ( ) ,
260+ ) ;
261+ if & lower_bound > datum {
220262 return ROWS_CANNOT_MATCH ;
221263 }
222264 }
223265
224- if let Some ( upper_bound) = & field. upper_bound {
225- if upper_bound < datum {
266+ if let Some ( upper_bound_bytes) = & field. upper_bound {
267+ let upper_bound = ManifestFilterVisitor :: bytes_to_datum (
268+ upper_bound_bytes,
269+ * reference. field ( ) . field_type . clone ( ) ,
270+ ) ;
271+ if & upper_bound < datum {
226272 return ROWS_CANNOT_MATCH ;
227273 }
228274 }
@@ -260,23 +306,15 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> {
260306 let prefix_len = prefix. len ( ) ;
261307
262308 if let Some ( lower_bound) = & field. lower_bound {
263- let lower_bound_str = ManifestFilterVisitor :: datum_as_str (
264- lower_bound,
265- "Cannot perform starts_with on non-string lower bound" ,
266- ) ?;
267- let min_len = lower_bound_str. len ( ) . min ( prefix_len) ;
268- if prefix. as_bytes ( ) . lt ( & lower_bound_str. as_bytes ( ) [ ..min_len] ) {
309+ let min_len = lower_bound. len ( ) . min ( prefix_len) ;
310+ if prefix. as_bytes ( ) . lt ( & lower_bound[ ..min_len] ) {
269311 return ROWS_CANNOT_MATCH ;
270312 }
271313 }
272314
273315 if let Some ( upper_bound) = & field. upper_bound {
274- let upper_bound_str = ManifestFilterVisitor :: datum_as_str (
275- upper_bound,
276- "Cannot perform starts_with on non-string upper bound" ,
277- ) ?;
278- let min_len = upper_bound_str. len ( ) . min ( prefix_len) ;
279- if prefix. as_bytes ( ) . gt ( & upper_bound_str. as_bytes ( ) [ ..min_len] ) {
316+ let min_len = upper_bound. len ( ) . min ( prefix_len) ;
317+ if prefix. as_bytes ( ) . gt ( & upper_bound[ ..min_len] ) {
280318 return ROWS_CANNOT_MATCH ;
281319 }
282320 }
@@ -305,35 +343,19 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> {
305343 // not_starts_with will match unless all values must start with the prefix. This happens when
306344 // the lower and upper bounds both start with the prefix.
307345 if let Some ( lower_bound) = & field. lower_bound {
308- let lower_bound_str = ManifestFilterVisitor :: datum_as_str (
309- lower_bound,
310- "Cannot perform not_starts_with on non-string lower bound" ,
311- ) ?;
312-
313346 // if lower is shorter than the prefix then lower doesn't start with the prefix
314- if prefix_len > lower_bound_str . len ( ) {
347+ if prefix_len > lower_bound . len ( ) {
315348 return ROWS_MIGHT_MATCH ;
316349 }
317350
318- if prefix
319- . as_bytes ( )
320- . eq ( & lower_bound_str. as_bytes ( ) [ ..prefix_len] )
321- {
351+ if prefix. as_bytes ( ) . eq ( & lower_bound[ ..prefix_len] ) {
322352 if let Some ( upper_bound) = & field. upper_bound {
323- let upper_bound_str = ManifestFilterVisitor :: datum_as_str (
324- upper_bound,
325- "Cannot perform not_starts_with on non-string upper bound" ,
326- ) ?;
327-
328353 // if upper is shorter than the prefix then upper can't start with the prefix
329- if prefix_len > upper_bound_str . len ( ) {
354+ if prefix_len > upper_bound . len ( ) {
330355 return ROWS_MIGHT_MATCH ;
331356 }
332357
333- if prefix
334- . as_bytes ( )
335- . eq ( & upper_bound_str. as_bytes ( ) [ ..prefix_len] )
336- {
358+ if prefix. as_bytes ( ) . eq ( & upper_bound[ ..prefix_len] ) {
337359 return ROWS_CANNOT_MATCH ;
338360 }
339361 }
@@ -359,13 +381,21 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> {
359381 }
360382
361383 if let Some ( lower_bound) = & field. lower_bound {
362- if literals. iter ( ) . all ( |datum| lower_bound > datum) {
384+ let lower_bound = ManifestFilterVisitor :: bytes_to_datum (
385+ lower_bound,
386+ * reference. field ( ) . clone ( ) . field_type ,
387+ ) ;
388+ if literals. iter ( ) . all ( |datum| & lower_bound > datum) {
363389 return ROWS_CANNOT_MATCH ;
364390 }
365391 }
366392
367393 if let Some ( upper_bound) = & field. upper_bound {
368- if literals. iter ( ) . all ( |datum| upper_bound < datum) {
394+ let upper_bound = ManifestFilterVisitor :: bytes_to_datum (
395+ upper_bound,
396+ * reference. field ( ) . clone ( ) . field_type ,
397+ ) ;
398+ if literals. iter ( ) . all ( |datum| & upper_bound < datum) {
369399 return ROWS_CANNOT_MATCH ;
370400 }
371401 }
@@ -414,6 +444,11 @@ impl ManifestFilterVisitor<'_> {
414444 } ;
415445 Ok ( bound)
416446 }
447+
448+ fn bytes_to_datum ( bytes : & ByteBuf , t : Type ) -> Datum {
449+ let p = t. as_primitive_type ( ) . unwrap ( ) ;
450+ Datum :: try_from_bytes ( bytes, p. clone ( ) ) . unwrap ( )
451+ }
417452}
418453
419454#[ cfg( test) ]
@@ -520,8 +555,8 @@ mod test {
520555 FieldSummary {
521556 contains_null: false ,
522557 contains_nan: None ,
523- lower_bound: Some ( Datum :: int( INT_MIN_VALUE ) ) ,
524- upper_bound: Some ( Datum :: int( INT_MAX_VALUE ) ) ,
558+ lower_bound: Some ( Datum :: int( INT_MIN_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
559+ upper_bound: Some ( Datum :: int( INT_MAX_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
525560 } ,
526561 // all_nulls_missing_nan
527562 FieldSummary {
@@ -534,22 +569,22 @@ mod test {
534569 FieldSummary {
535570 contains_null: true ,
536571 contains_nan: None ,
537- lower_bound: Some ( Datum :: string( STRING_MIN_VALUE ) ) ,
538- upper_bound: Some ( Datum :: string( STRING_MAX_VALUE ) ) ,
572+ lower_bound: Some ( Datum :: string( STRING_MIN_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
573+ upper_bound: Some ( Datum :: string( STRING_MAX_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
539574 } ,
540575 // no_nulls
541576 FieldSummary {
542577 contains_null: false ,
543578 contains_nan: None ,
544- lower_bound: Some ( Datum :: string( STRING_MIN_VALUE ) ) ,
545- upper_bound: Some ( Datum :: string( STRING_MAX_VALUE ) ) ,
579+ lower_bound: Some ( Datum :: string( STRING_MIN_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
580+ upper_bound: Some ( Datum :: string( STRING_MAX_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
546581 } ,
547582 // float
548583 FieldSummary {
549584 contains_null: true ,
550585 contains_nan: None ,
551- lower_bound: Some ( Datum :: float( 0.0 ) ) ,
552- upper_bound: Some ( Datum :: float( 20.0 ) ) ,
586+ lower_bound: Some ( Datum :: float( 0.0 ) . to_bytes ( ) . unwrap ( ) ) ,
587+ upper_bound: Some ( Datum :: float( 20.0 ) . to_bytes ( ) . unwrap ( ) ) ,
553588 } ,
554589 // all_nulls_double
555590 FieldSummary {
@@ -583,8 +618,8 @@ mod test {
583618 FieldSummary {
584619 contains_null: false ,
585620 contains_nan: Some ( false ) ,
586- lower_bound: Some ( Datum :: float( 0.0 ) ) ,
587- upper_bound: Some ( Datum :: float( 20.0 ) ) ,
621+ lower_bound: Some ( Datum :: float( 0.0 ) . to_bytes ( ) . unwrap ( ) ) ,
622+ upper_bound: Some ( Datum :: float( 20.0 ) . to_bytes ( ) . unwrap ( ) ) ,
588623 } ,
589624 // all_nulls_missing_nan_float
590625 FieldSummary {
@@ -597,15 +632,15 @@ mod test {
597632 FieldSummary {
598633 contains_null: true ,
599634 contains_nan: None ,
600- lower_bound: Some ( Datum :: string( STRING_MIN_VALUE ) ) ,
601- upper_bound: Some ( Datum :: string( STRING_MIN_VALUE ) ) ,
635+ lower_bound: Some ( Datum :: string( STRING_MIN_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
636+ upper_bound: Some ( Datum :: string( STRING_MIN_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
602637 } ,
603638 // no_nulls_same_value_a
604639 FieldSummary {
605640 contains_null: false ,
606641 contains_nan: None ,
607- lower_bound: Some ( Datum :: string( STRING_MIN_VALUE ) ) ,
608- upper_bound: Some ( Datum :: string( STRING_MIN_VALUE ) ) ,
642+ lower_bound: Some ( Datum :: string( STRING_MIN_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
643+ upper_bound: Some ( Datum :: string( STRING_MIN_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
609644 } ,
610645 ]
611646 }
@@ -625,7 +660,7 @@ mod test {
625660 added_rows_count : None ,
626661 existing_rows_count : None ,
627662 deleted_rows_count : None ,
628- partitions,
663+ partitions : Some ( partitions ) ,
629664 key_metadata : vec ! [ ] ,
630665 }
631666 }
0 commit comments