@@ -380,27 +380,48 @@ def _main(
380
380
381
381
if stats :
382
382
diff = list (diff_iter )
383
- unique_diff_count = len ({i [0 ] for _ , i in diff })
384
- max_table_count = max (differ .stats ["table1_count" ], differ .stats ["table2_count" ])
385
- percent = 100 * unique_diff_count / (max_table_count or 1 )
386
- plus = len ([1 for op , _ in diff if op == "+" ])
387
- minus = len ([1 for op , _ in diff if op == "-" ])
383
+ key_columns_len = len (key_columns )
384
+
385
+ diff_by_key = {}
386
+ for sign , values in diff :
387
+ k = values [:key_columns_len ]
388
+ if k in diff_by_key :
389
+ assert sign != diff_by_key [k ]
390
+ diff_by_key [k ] = "!"
391
+ else :
392
+ diff_by_key [k ] = sign
393
+
394
+ diff_by_sign = {k : 0 for k in "+-!" }
395
+ for sign in diff_by_key .values ():
396
+ diff_by_sign [sign ] += 1
397
+
398
+ table1_count = differ .stats .pop ("table1_count" )
399
+ table2_count = differ .stats .pop ("table2_count" )
400
+ del differ .stats ['diff_count' ]
401
+ unchanged = table1_count - diff_by_sign ["-" ] - diff_by_sign ["!" ]
402
+ diff_percent = 1 - unchanged / max (table1_count , table2_count )
388
403
389
404
if json_output :
390
405
json_output = {
391
- "different_rows" : len (diff ),
392
- "different_percent" : percent ,
393
- "different_+" : plus ,
394
- "different_-" : minus ,
395
- "different_unique" : unique_diff_count ,
396
- "total" : max_table_count ,
406
+ "rows_A" : table1_count ,
407
+ "rows_B" : table2_count ,
408
+ "exclusive_A" : diff_by_sign ["-" ],
409
+ "exclusive_B" : diff_by_sign ["+" ],
410
+ "updated" : diff_by_sign ["!" ],
411
+ "unchanged" : unchanged ,
412
+ "total" : sum (diff_by_sign .values ()),
397
413
"stats" : differ .stats ,
398
414
}
399
- rich .print (json .dumps (json_output ))
415
+ rich .print_json (json .dumps (json_output ))
400
416
else :
401
- print (f"Diff-Total: { unique_diff_count } changed rows out of { max_table_count } " )
402
- print (f"Diff-Percent: { percent :.14f} %" )
403
- print (f"Diff-Split: +{ plus } -{ minus } " )
417
+ rich .print (f"{ table1_count } rows in table A" )
418
+ rich .print (f"{ table2_count } rows in table B" )
419
+ rich .print (f"{ diff_by_sign ['-' ]} rows exclusive to table A (not present in B)" )
420
+ rich .print (f"{ diff_by_sign ['+' ]} rows exclusive to table B (not present in A)" )
421
+ rich .print (f"{ diff_by_sign ['!' ]} rows updated" )
422
+ rich .print (f"{ unchanged } rows unchanged" )
423
+ rich .print (f"{ 100 * diff_percent :.2f} % difference score" )
424
+
404
425
if differ .stats :
405
426
print ("Extra-Info:" )
406
427
for k , v in differ .stats .items ():
0 commit comments