@@ -437,6 +437,7 @@ def test_summaries_with_null(spark: SparkSession, session_catalog: Catalog, arro
437437
438438 tbl .append (arrow_table_with_null )
439439 tbl .append (arrow_table_with_null )
440+ tbl .overwrite (arrow_table_with_null )
440441
441442 rows = spark .sql (
442443 f"""
@@ -447,7 +448,7 @@ def test_summaries_with_null(spark: SparkSession, session_catalog: Catalog, arro
447448 ).collect ()
448449
449450 operations = [row .operation for row in rows ]
450- assert operations == ['append' , 'append' ]
451+ assert operations == ['append' , 'append' , 'overwrite' ]
451452
452453 summaries = [row .summary for row in rows ]
453454
@@ -474,6 +475,20 @@ def test_summaries_with_null(spark: SparkSession, session_catalog: Catalog, arro
474475 'total-position-deletes' : '0' ,
475476 'total-records' : '6' ,
476477 }
478+ assert summaries [2 ] == {
479+ 'removed-files-size' : '28942' ,
480+ 'added-data-files' : '3' ,
481+ 'total-equality-deletes' : '0' ,
482+ 'added-records' : '3' ,
483+ 'total-position-deletes' : '0' ,
484+ 'deleted-data-files' : '6' ,
485+ 'added-files-size' : '14471' ,
486+ 'total-delete-files' : '0' ,
487+ 'total-files-size' : '14471' ,
488+ 'deleted-records' : '6' ,
489+ 'total-records' : '3' ,
490+ 'total-data-files' : '3' ,
491+ }
477492
478493
479494@pytest .mark .integration
@@ -495,6 +510,7 @@ def test_data_files_with_table_partitioned_with_null(
495510
496511 tbl .append (arrow_table_with_null )
497512 tbl .append (arrow_table_with_null )
513+ tbl .overwrite (arrow_table_with_null )
498514
499515 # added_data_files_count, existing_data_files_count, deleted_data_files_count
500516 rows = spark .sql (
@@ -504,13 +520,9 @@ def test_data_files_with_table_partitioned_with_null(
504520 """
505521 ).collect ()
506522
507- assert [row .added_data_files_count for row in rows ] == [3 , 3 , 3 ]
508- assert [row .existing_data_files_count for row in rows ] == [
509- 0 ,
510- 0 ,
511- 0 ,
512- ]
513- assert [row .deleted_data_files_count for row in rows ] == [0 , 0 , 0 ]
523+ assert [row .added_data_files_count for row in rows ] == [3 , 3 , 3 , 3 , 0 ]
524+ assert [row .existing_data_files_count for row in rows ] == [0 , 0 , 0 , 0 , 0 ]
525+ assert [row .deleted_data_files_count for row in rows ] == [0 , 0 , 0 , 0 , 6 ]
514526
515527
516528@pytest .mark .integration
@@ -529,5 +541,8 @@ def test_invalid_arguments(spark: SparkSession, session_catalog: Catalog, arrow_
529541 properties = {'format-version' : '1' },
530542 )
531543
544+ with pytest .raises (ValueError , match = "Expected PyArrow table, got: not a df" ):
545+ tbl .overwrite ("not a df" )
546+
532547 with pytest .raises (ValueError , match = "Expected PyArrow table, got: not a df" ):
533548 tbl .append ("not a df" )
0 commit comments