From 784aba8be5180c121c8273dbf431392d5c902f08 Mon Sep 17 00:00:00 2001 From: Sebastian Garrido Date: Sat, 14 Jan 2023 10:50:17 -0500 Subject: [PATCH] Enforce ordering by replication key during full refresh For the incremental strategy if we stop an initial data load midway our state will be that of the last record that was written to the target. Right now the generated SQL for this case does not enforce ordering records by the replication_key. If we don't enforce the ordering of the records from the source we have no guarantee that all records before the last one were written to the target (as Postgres might return them in disorder). To overcome these issues we should enforce ordering. --- tap_postgres/sync_strategies/incremental.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tap_postgres/sync_strategies/incremental.py b/tap_postgres/sync_strategies/incremental.py index c84df36..358a56d 100644 --- a/tap_postgres/sync_strategies/incremental.py +++ b/tap_postgres/sync_strategies/incremental.py @@ -141,5 +141,6 @@ def _get_select_sql(params): select_sql = f""" SELECT {','.join(escaped_columns)} FROM {post_db.fully_qualified_table_name(schema_name, stream['table_name'])} + ORDER BY {post_db.prepare_columns_sql(replication_key)} ASC """ return select_sql