Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 3918343

Browse files
committed
Temporarily remove some extra stats (were sometimes wrong due to threading)
1 parent 88d2f48 commit 3918343

File tree

2 files changed

+17
-10
lines changed

2 files changed

+17
-10
lines changed

data_diff/joindiff_tables.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -236,25 +236,32 @@ def _collect_stats(self, i, table_seg: TableSegment):
236236
# Metrics
237237
col_exprs = merge_dicts(
238238
{
239-
f"min_{c}": min_(this[c]),
240-
f"max_{c}": max_(this[c]),
239+
# f"min_{c}": min_(this[c]),
240+
# f"max_{c}": max_(this[c]),
241241
}
242242
if c in table_seg.key_columns else
243243
{
244244
f"sum_{c}": sum_(this[c]),
245-
f"avg_{c}": avg(this[c]),
246-
f"min_{c}": min_(this[c]),
247-
f"max_{c}": max_(this[c]),
245+
# f"avg_{c}": avg(this[c]),
246+
# f"min_{c}": min_(this[c]),
247+
# f"max_{c}": max_(this[c]),
248248
}
249249
for c in table_seg.relevant_columns
250250
if isinstance(table_seg._schema[c], NumericType)
251251
)
252252
col_exprs["count"] = Count()
253253

254254
res = db.query(table_seg.make_select().select(**col_exprs), tuple)
255-
res = dict(zip([f"table{i}_{n}" for n in col_exprs], map(json_friendly_value, res)))
256-
for k, v in res.items():
257-
self.stats[k] = self.stats.get(k, 0) + (v or 0)
255+
256+
for col_name, value in safezip(col_exprs, res):
257+
if value is not None:
258+
value = json_friendly_value(value)
259+
stat_name = f"table{i}_{col_name}"
260+
261+
if stat_name in self.stats:
262+
self.stats[stat_name] += value
263+
else:
264+
self.stats[stat_name] = value
258265

259266
logger.debug("Done collecting stats for table #%s", i)
260267

tests/test_joindiff.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,8 @@ def test_diff_small_tables(self):
122122
self.assertEqual(expected, diff)
123123
self.assertEqual(2, self.differ.stats["table1_count"])
124124
self.assertEqual(1, self.differ.stats["table2_count"])
125-
self.assertEqual(3, self.differ.stats["table1_max_id"])
126-
self.assertEqual(1, self.differ.stats["table2_min_id"])
125+
# self.assertEqual(2, self.differ.stats["table1_max_id"])
126+
# self.assertEqual(1, self.differ.stats["table2_min_id"])
127127

128128
# Test materialize
129129
materialize_path = self.connection.parse_table_name(f"test_mat_{random_table_suffix()}")

0 commit comments

Comments
 (0)