|
6 | 6 | from functools import partial
|
7 | 7 | import logging
|
8 | 8 | from typing import List
|
| 9 | +from itertools import chain |
9 | 10 |
|
10 | 11 | from runtype import dataclass
|
11 | 12 |
|
@@ -183,13 +184,17 @@ def _diff_segments(
|
183 | 184 | else None,
|
184 | 185 | ):
|
185 | 186 |
|
| 187 | + assert len(a_cols) == len(b_cols) |
186 | 188 | logger.debug("Querying for different rows")
|
187 | 189 | for is_xa, is_xb, *x in db.query(diff_rows, list):
|
188 | 190 | if is_xa and is_xb:
|
189 | 191 | # Can't both be exclusive, meaning a pk is NULL
|
190 | 192 | # This can happen if the explicit null test didn't finish running yet
|
191 | 193 | raise ValueError("NULL values in one or more primary keys")
|
192 |
| - _is_diff, a_row, b_row = _slice_tuple(x, len(is_diff_cols), len(a_cols), len(b_cols)) |
| 194 | + # _is_diff, a_row, b_row = _slice_tuple(x, len(is_diff_cols), len(a_cols), len(b_cols)) |
| 195 | + _is_diff, ab_row = _slice_tuple(x, len(is_diff_cols), len(a_cols) + len(b_cols)) |
| 196 | + a_row, b_row = ab_row[::2], ab_row[1::2] |
| 197 | + assert len(a_row) == len(b_row) |
193 | 198 | if not is_xb:
|
194 | 199 | yield "-", tuple(a_row)
|
195 | 200 | if not is_xa:
|
@@ -273,10 +278,12 @@ def _create_outer_join(self, table1, table2):
|
273 | 278 |
|
274 | 279 | is_diff_cols = {f"is_diff_{c1}": bool_to_int(a[c1].is_distinct_from(b[c2])) for c1, c2 in safezip(cols1, cols2)}
|
275 | 280 |
|
276 |
| - a_cols = {f"table1_{c}": NormalizeAsString(a[c]) for c in cols1} |
277 |
| - b_cols = {f"table2_{c}": NormalizeAsString(b[c]) for c in cols2} |
| 281 | + a_cols = {f"{c}_a": NormalizeAsString(a[c]) for c in cols1} |
| 282 | + b_cols = {f"{c}_b": NormalizeAsString(b[c]) for c in cols2} |
| 283 | + # Order columns as col1_a, col1_b, col2_a, col2_b, etc. |
| 284 | + cols = {k: v for k, v in chain(*zip(a_cols.items(), b_cols.items()))} |
278 | 285 |
|
279 |
| - all_rows = _outerjoin(db, a, b, keys1, keys2, {**is_diff_cols, **a_cols, **b_cols}) |
| 286 | + all_rows = _outerjoin(db, a, b, keys1, keys2, {**is_diff_cols, **cols}) |
280 | 287 | diff_rows = all_rows.where(or_(this[c] == 1 for c in is_diff_cols))
|
281 | 288 | return diff_rows, a_cols, b_cols, is_diff_cols, all_rows
|
282 | 289 |
|
|
0 commit comments