@@ -67,13 +67,12 @@ impl EqualityDeleteWriterConfig {
6767 original_arrow_schema,
6868 & equality_ids,
6969 // The following rule comes from https://iceberg.apache.org/spec/#identifier-field-ids
70+ // and https://iceberg.apache.org/spec/#equality-delete-files
7071 // - The identifier field ids must be used for primitive types.
7172 // - The identifier field ids must not be used for floating point types or nullable fields.
72- // - The identifier field ids can be nested field of struct but not nested field of nullable struct.
7373 |field| {
7474 // Only primitive type is allowed to be used for identifier field ids
75- if field. is_nullable ( )
76- || field. data_type ( ) . is_nested ( )
75+ if field. data_type ( ) . is_nested ( )
7776 || matches ! (
7877 field. data_type( ) ,
7978 DataType :: Float16 | DataType :: Float32 | DataType :: Float64
@@ -92,7 +91,7 @@ impl EqualityDeleteWriterConfig {
9291 . map_err ( |e| Error :: new ( ErrorKind :: Unexpected , e. to_string ( ) ) ) ?,
9392 ) )
9493 } ,
95- |field : & Field | !field . is_nullable ( ) ,
94+ |_field : & Field | true ,
9695 ) ?;
9796 Ok ( Self {
9897 equality_ids,
@@ -172,6 +171,7 @@ mod test {
172171
173172 use arrow_array:: types:: Int32Type ;
174173 use arrow_array:: { ArrayRef , BooleanArray , Int32Array , Int64Array , RecordBatch , StructArray } ;
174+ use arrow_buffer:: NullBuffer ;
175175 use arrow_schema:: DataType ;
176176 use arrow_select:: concat:: concat_batches;
177177 use itertools:: Itertools ;
@@ -484,14 +484,10 @@ mod test {
484484 // Float and Double are not allowed to be used for equality delete
485485 assert ! ( EqualityDeleteWriterConfig :: new( vec![ 0 ] , schema. clone( ) , None ) . is_err( ) ) ;
486486 assert ! ( EqualityDeleteWriterConfig :: new( vec![ 1 ] , schema. clone( ) , None ) . is_err( ) ) ;
487- // Int is nullable, not allowed to be used for equality delete
488- assert ! ( EqualityDeleteWriterConfig :: new( vec![ 2 ] , schema. clone( ) , None ) . is_err( ) ) ;
489487 // Struct is not allowed to be used for equality delete
490488 assert ! ( EqualityDeleteWriterConfig :: new( vec![ 3 ] , schema. clone( ) , None ) . is_err( ) ) ;
491489 // Nested field of struct is allowed to be used for equality delete
492490 assert ! ( EqualityDeleteWriterConfig :: new( vec![ 4 ] , schema. clone( ) , None ) . is_ok( ) ) ;
493- // Nested field of optional struct is not allowed to be used for equality delete
494- assert ! ( EqualityDeleteWriterConfig :: new( vec![ 6 ] , schema. clone( ) , None ) . is_err( ) ) ;
495491 // Nested field of map is not allowed to be used for equality delete
496492 assert ! ( EqualityDeleteWriterConfig :: new( vec![ 7 ] , schema. clone( ) , None ) . is_err( ) ) ;
497493 assert ! ( EqualityDeleteWriterConfig :: new( vec![ 8 ] , schema. clone( ) , None ) . is_err( ) ) ;
@@ -657,4 +653,61 @@ mod test {
657653
658654 Ok ( ( ) )
659655 }
656+
657+ #[ tokio:: test]
658+ async fn test_equality_delete_with_nullable_field ( ) -> Result < ( ) , anyhow:: Error > {
659+ // prepare data
660+ // Int, Struct(Int)
661+ let schema = Schema :: builder ( )
662+ . with_schema_id ( 1 )
663+ . with_fields ( vec ! [
664+ NestedField :: optional( 0 , "col0" , Type :: Primitive ( PrimitiveType :: Int ) ) . into( ) ,
665+ NestedField :: optional(
666+ 1 ,
667+ "col1" ,
668+ Type :: Struct ( StructType :: new( vec![ NestedField :: optional(
669+ 2 ,
670+ "sub_col" ,
671+ Type :: Primitive ( PrimitiveType :: Int ) ,
672+ )
673+ . into( ) ] ) ) ,
674+ )
675+ . into( ) ,
676+ ] )
677+ . build ( )
678+ . unwrap ( ) ;
679+ let arrow_schema = Arc :: new ( schema_to_arrow_schema ( & schema) . unwrap ( ) ) ;
680+ // null 1
681+ // 2 null(struct)
682+ // 3 null(field)
683+ let col0 = Arc :: new ( Int32Array :: from ( vec ! [ None , Some ( 2 ) , Some ( 3 ) ] ) ) as ArrayRef ;
684+ let nulls = NullBuffer :: from ( vec ! [ true , false , true ] ) ;
685+ let col1 = Arc :: new ( StructArray :: new (
686+ if let DataType :: Struct ( fields) = arrow_schema. fields . get ( 1 ) . unwrap ( ) . data_type ( ) {
687+ fields. clone ( )
688+ } else {
689+ unreachable ! ( )
690+ } ,
691+ vec ! [ Arc :: new( Int32Array :: from( vec![ Some ( 1 ) , Some ( 2 ) , None ] ) ) ] ,
692+ Some ( nulls) ,
693+ ) ) ;
694+ let columns = vec ! [ col0, col1] ;
695+
696+ let to_write = RecordBatch :: try_new ( arrow_schema. clone ( ) , columns) . unwrap ( ) ;
697+ let equality_ids = vec ! [ 0_i32 , 2 ] ;
698+ let equality_config =
699+ EqualityDeleteWriterConfig :: new ( equality_ids, Arc :: new ( schema) , None ) . unwrap ( ) ;
700+ let projector = equality_config. projector . clone ( ) ;
701+
702+ // check
703+ let to_write_projected = projector. project_bacth ( to_write) ?;
704+ let expect_batch =
705+ RecordBatch :: try_new ( equality_config. projected_arrow_schema_ref ( ) . clone ( ) , vec ! [
706+ Arc :: new( Int32Array :: from( vec![ None , Some ( 2 ) , Some ( 3 ) ] ) ) as ArrayRef ,
707+ Arc :: new( Int32Array :: from( vec![ Some ( 1 ) , None , None ] ) ) as ArrayRef ,
708+ ] )
709+ . unwrap ( ) ;
710+ assert_eq ! ( to_write_projected, expect_batch) ;
711+ Ok ( ( ) )
712+ }
660713}
0 commit comments