Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 69e1154

Browse files
authored
Merge pull request #287 from datafold/materialize_rename_cols
Materialize: rename and reorder columns
2 parents 9193f76 + dc24012 commit 69e1154

File tree

2 files changed

+13
-5
lines changed

2 files changed

+13
-5
lines changed

data_diff/joindiff_tables.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from functools import partial
77
import logging
88
from typing import List
9+
from itertools import chain
910

1011
from runtype import dataclass
1112

@@ -183,13 +184,17 @@ def _diff_segments(
183184
else None,
184185
):
185186

187+
assert len(a_cols) == len(b_cols)
186188
logger.debug("Querying for different rows")
187189
for is_xa, is_xb, *x in db.query(diff_rows, list):
188190
if is_xa and is_xb:
189191
# Can't both be exclusive, meaning a pk is NULL
190192
# This can happen if the explicit null test didn't finish running yet
191193
raise ValueError("NULL values in one or more primary keys")
192-
_is_diff, a_row, b_row = _slice_tuple(x, len(is_diff_cols), len(a_cols), len(b_cols))
194+
# _is_diff, a_row, b_row = _slice_tuple(x, len(is_diff_cols), len(a_cols), len(b_cols))
195+
_is_diff, ab_row = _slice_tuple(x, len(is_diff_cols), len(a_cols) + len(b_cols))
196+
a_row, b_row = ab_row[::2], ab_row[1::2]
197+
assert len(a_row) == len(b_row)
193198
if not is_xb:
194199
yield "-", tuple(a_row)
195200
if not is_xa:
@@ -273,10 +278,12 @@ def _create_outer_join(self, table1, table2):
273278

274279
is_diff_cols = {f"is_diff_{c1}": bool_to_int(a[c1].is_distinct_from(b[c2])) for c1, c2 in safezip(cols1, cols2)}
275280

276-
a_cols = {f"table1_{c}": NormalizeAsString(a[c]) for c in cols1}
277-
b_cols = {f"table2_{c}": NormalizeAsString(b[c]) for c in cols2}
281+
a_cols = {f"{c}_a": NormalizeAsString(a[c]) for c in cols1}
282+
b_cols = {f"{c}_b": NormalizeAsString(b[c]) for c in cols2}
283+
# Order columns as col1_a, col1_b, col2_a, col2_b, etc.
284+
cols = {k: v for k, v in chain(*zip(a_cols.items(), b_cols.items()))}
278285

279-
all_rows = _outerjoin(db, a, b, keys1, keys2, {**is_diff_cols, **a_cols, **b_cols})
286+
all_rows = _outerjoin(db, a, b, keys1, keys2, {**is_diff_cols, **cols})
280287
diff_rows = all_rows.where(or_(this[c] == 1 for c in is_diff_cols))
281288
return diff_rows, a_cols, b_cols, is_diff_cols, all_rows
282289

tests/test_joindiff.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,8 @@ def test_diff_small_tables(self):
134134
t = TablePath(materialize_path)
135135
rows = self.connection.query(t.select(), List[tuple])
136136
# is_xa, is_xb, is_diff1, is_diff2, row1, row2
137-
assert rows == [(1, 0, 1, 1) + expected_row + (None, None)], rows
137+
# assert rows == [(1, 0, 1, 1) + expected_row + (None, None)], rows
138+
assert rows == [(1, 0, 1, 1) + (expected_row[0], None, expected_row[1], None)], rows
138139
self.connection.query(t.drop())
139140

140141
# Test materialize all rows

0 commit comments

Comments
 (0)