@@ -523,7 +523,7 @@ mod tests {
523523 } ;
524524 use crate :: table:: Table ;
525525 use crate :: TableIdent ;
526- use arrow_array:: { ArrayRef , Int64Array , RecordBatch } ;
526+ use arrow_array:: { ArrayRef , Int64Array , RecordBatch , StringArray } ;
527527 use futures:: TryStreamExt ;
528528 use parquet:: arrow:: { ArrowWriter , PARQUET_FIELD_ID_META_KEY } ;
529529 use parquet:: basic:: Compression ;
@@ -705,10 +705,15 @@ mod tests {
705705 PARQUET_FIELD_ID_META_KEY . to_string( ) ,
706706 "3" . to_string( ) ,
707707 ) ] ) ) ,
708+ arrow_schema:: Field :: new( "a" , arrow_schema:: DataType :: Utf8 , false )
709+ . with_metadata( HashMap :: from( [ (
710+ PARQUET_FIELD_ID_META_KEY . to_string( ) ,
711+ "4" . to_string( ) ,
712+ ) ] ) ) ,
708713 ] ;
709714 Arc :: new ( arrow_schema:: Schema :: new ( fields) )
710715 } ;
711- // 3 columns:
716+ // 4 columns:
712717 // x: [1, 1, 1, 1, ...]
713718 let col1 = Arc :: new ( Int64Array :: from_iter_values ( vec ! [ 1 ; 1024 ] ) ) as ArrayRef ;
714719
@@ -725,7 +730,14 @@ mod tests {
725730
726731 // z: [3, 3, 3, 3, ..., 4, 4, 4, 4]
727732 let col3 = Arc :: new ( Int64Array :: from_iter_values ( values) ) as ArrayRef ;
728- let to_write = RecordBatch :: try_new ( schema. clone ( ) , vec ! [ col1, col2, col3] ) . unwrap ( ) ;
733+
734+ // a: ["Apache", "Apache", "Apache", ..., "Iceberg", "Iceberg", "Iceberg"]
735+ let mut values = vec ! [ "Apache" ; 512 ] ;
736+ values. append ( vec ! [ "Iceberg" ; 512 ] . as_mut ( ) ) ;
737+ let col4 = Arc :: new ( StringArray :: from_iter_values ( values) ) as ArrayRef ;
738+
739+ let to_write =
740+ RecordBatch :: try_new ( schema. clone ( ) , vec ! [ col1, col2, col3, col4] ) . unwrap ( ) ;
729741
730742 // Write the Parquet files
731743 let props = WriterProperties :: builder ( )
@@ -773,7 +785,7 @@ mod tests {
773785 fn test_select_no_exist_column ( ) {
774786 let table = TableTestFixture :: new ( ) . table ;
775787
776- let table_scan = table. scan ( ) . select ( [ "x" , "y" , "z" , "a" ] ) . build ( ) ;
788+ let table_scan = table. scan ( ) . select ( [ "x" , "y" , "z" , "a" , "b" ] ) . build ( ) ;
777789 assert ! ( table_scan. is_err( ) ) ;
778790 }
779791
@@ -1040,4 +1052,94 @@ mod tests {
10401052 let expected_z = Arc :: new ( Int64Array :: from_iter_values ( values) ) as ArrayRef ;
10411053 assert_eq ! ( col, & expected_z) ;
10421054 }
1055+
1056+ #[ tokio:: test]
1057+ async fn test_filter_on_arrow_startswith ( ) {
1058+ let mut fixture = TableTestFixture :: new ( ) ;
1059+ fixture. setup_manifest_files ( ) . await ;
1060+
1061+ // Filter: a STARTSWITH "Ice"
1062+ let mut builder = fixture. table . scan ( ) ;
1063+ let predicate = Reference :: new ( "a" ) . starts_with ( Datum :: string ( "Ice" ) ) ;
1064+ builder = builder. filter ( predicate) ;
1065+ let table_scan = builder. build ( ) . unwrap ( ) ;
1066+
1067+ let batch_stream = table_scan. to_arrow ( ) . await . unwrap ( ) ;
1068+
1069+ let batches: Vec < _ > = batch_stream. try_collect ( ) . await . unwrap ( ) ;
1070+
1071+ assert_eq ! ( batches[ 0 ] . num_rows( ) , 512 ) ;
1072+
1073+ let col = batches[ 0 ] . column_by_name ( "a" ) . unwrap ( ) ;
1074+ let string_arr = col. as_any ( ) . downcast_ref :: < StringArray > ( ) . unwrap ( ) ;
1075+ assert_eq ! ( string_arr. value( 0 ) , "Iceberg" ) ;
1076+ }
1077+
1078+ #[ tokio:: test]
1079+ async fn test_filter_on_arrow_not_startswith ( ) {
1080+ let mut fixture = TableTestFixture :: new ( ) ;
1081+ fixture. setup_manifest_files ( ) . await ;
1082+
1083+ // Filter: a NOT STARTSWITH "Ice"
1084+ let mut builder = fixture. table . scan ( ) ;
1085+ let predicate = Reference :: new ( "a" ) . not_starts_with ( Datum :: string ( "Ice" ) ) ;
1086+ builder = builder. filter ( predicate) ;
1087+ let table_scan = builder. build ( ) . unwrap ( ) ;
1088+
1089+ let batch_stream = table_scan. to_arrow ( ) . await . unwrap ( ) ;
1090+
1091+ let batches: Vec < _ > = batch_stream. try_collect ( ) . await . unwrap ( ) ;
1092+
1093+ assert_eq ! ( batches[ 0 ] . num_rows( ) , 512 ) ;
1094+
1095+ let col = batches[ 0 ] . column_by_name ( "a" ) . unwrap ( ) ;
1096+ let string_arr = col. as_any ( ) . downcast_ref :: < StringArray > ( ) . unwrap ( ) ;
1097+ assert_eq ! ( string_arr. value( 0 ) , "Apache" ) ;
1098+ }
1099+
1100+ #[ tokio:: test]
1101+ async fn test_filter_on_arrow_in ( ) {
1102+ let mut fixture = TableTestFixture :: new ( ) ;
1103+ fixture. setup_manifest_files ( ) . await ;
1104+
1105+ // Filter: a IN ("Sioux", "Iceberg")
1106+ let mut builder = fixture. table . scan ( ) ;
1107+ let predicate =
1108+ Reference :: new ( "a" ) . is_in ( [ Datum :: string ( "Sioux" ) , Datum :: string ( "Iceberg" ) ] ) ;
1109+ builder = builder. filter ( predicate) ;
1110+ let table_scan = builder. build ( ) . unwrap ( ) ;
1111+
1112+ let batch_stream = table_scan. to_arrow ( ) . await . unwrap ( ) ;
1113+
1114+ let batches: Vec < _ > = batch_stream. try_collect ( ) . await . unwrap ( ) ;
1115+
1116+ assert_eq ! ( batches[ 0 ] . num_rows( ) , 512 ) ;
1117+
1118+ let col = batches[ 0 ] . column_by_name ( "a" ) . unwrap ( ) ;
1119+ let string_arr = col. as_any ( ) . downcast_ref :: < StringArray > ( ) . unwrap ( ) ;
1120+ assert_eq ! ( string_arr. value( 0 ) , "Iceberg" ) ;
1121+ }
1122+
1123+ #[ tokio:: test]
1124+ async fn test_filter_on_arrow_not_in ( ) {
1125+ let mut fixture = TableTestFixture :: new ( ) ;
1126+ fixture. setup_manifest_files ( ) . await ;
1127+
1128+ // Filter: a NOT IN ("Sioux", "Iceberg")
1129+ let mut builder = fixture. table . scan ( ) ;
1130+ let predicate =
1131+ Reference :: new ( "a" ) . is_not_in ( [ Datum :: string ( "Sioux" ) , Datum :: string ( "Iceberg" ) ] ) ;
1132+ builder = builder. filter ( predicate) ;
1133+ let table_scan = builder. build ( ) . unwrap ( ) ;
1134+
1135+ let batch_stream = table_scan. to_arrow ( ) . await . unwrap ( ) ;
1136+
1137+ let batches: Vec < _ > = batch_stream. try_collect ( ) . await . unwrap ( ) ;
1138+
1139+ assert_eq ! ( batches[ 0 ] . num_rows( ) , 512 ) ;
1140+
1141+ let col = batches[ 0 ] . column_by_name ( "a" ) . unwrap ( ) ;
1142+ let string_arr = col. as_any ( ) . downcast_ref :: < StringArray > ( ) . unwrap ( ) ;
1143+ assert_eq ! ( string_arr. value( 0 ) , "Apache" ) ;
1144+ }
10431145}
0 commit comments